7 years ago · ce1d126566
--- a/codec/0doc.go
+++ b/codec/0doc.go
@@ -225,35 +225,46 @@ with some caveats. See Encode documentation.
 
				 */
			
 
				 package codec
			
 
				 
			
 
				-// TODO:
			
 
				-//   - When mid-stack inlining is enabled, do the following:
			
 
				-//     - if 41<=inlineExtraCallCost<=56, make ioEncWriter.{writen1,writen2,writestr,writeb,atEndOfEncode} go:noinline
			
 
				-//     - if <=40, do nothing
			
 
				-//
			
 
				-// PUNTED:
			
 
				-//   - To make Handle comparable, make extHandle in BasicHandle a non-embedded pointer,
			
 
				-//     and use overlay methods on *BasicHandle to call through to extHandle after initializing
			
 
				-//     the "xh *extHandle" to point to a real slice.
			
 
				-//
			
 
				-// BEFORE EACH RELEASE:
			
 
				-//   - Look through and fix padding for each type, to eliminate false sharing
			
 
				-//     - critical shared objects that are read many times
			
 
				-//       TypeInfos
			
 
				-//     - pooled objects:
			
 
				-//       decNaked, decNakedContainers, codecFner, typeInfoLoadArray, 
			
 
				-//     - small objects allocated independently, that we read/use much across threads:
			
 
				-//       codecFn, typeInfo
			
 
				-//     - Objects allocated independently and used a lot
			
 
				-//       Decoder, Encoder,
			
 
				-//       xxxHandle, xxxEncDriver, xxxDecDriver (xxx = json, msgpack, cbor, binc, simple)
			
 
				-//     - In all above, arrange values modified together to be close to each other.
			
 
				-//
			
 
				-//     For all of these, either ensure that they occupy full cache lines,
			
 
				-//     or ensure that the things just past the cache line boundary are hardly read/written
			
 
				-//     e.g. JsonHandle.RawBytesExt - which is copied into json(En|De)cDriver at init
			
 
				-//
			
 
				-//     Occupying full cache lines means they occupy 8*N words (where N is an integer).
			
 
				-//     Check this out by running: ./run.sh -z
			
 
				-//     - look at those tagged ****, meaning they are not occupying full cache lines
			
 
				-//     - look at those tagged <<<<, meaning they are larger than 32 words (something to watch)
			
 
				-//   - Run "golint -min_confidence 0.81"
			
 
				+/*
			
 
				+MID-STACK INLINING:
			
 
				+  - The code currently works optimally with fully enabled mid-stack inlining.
			
 
				+    This way, when enabled, we are already performant.
			
 
				+  - To see how well inlining is working, use the following scripts below:
			
 
				+
			
 
				+    myblanklines 20
			
 
				+    zf=7; go build -gcflags "-m=2 -l=4" > $TMPDIR/a$zf.txt 2>&1
			
 
				+    for i in decReaderSwitch bytesDecReader bufioDecReader ioDecReader \
			
 
				+        encWriterSwitch bytesEncAppender bufioEncWriter ioEncWriter
			
 
				+    do echo ; grep -E "cannot inline \(\*${i}\)." $TMPDIR/a7.txt; done 
			
 
				+
			
 
				+  - When mid-stack inlining is enabled, consider doing the following:
			
 
				+    - if 41<=inlineExtraCallCost<=56, make (buf)ioEncWriter.{writen1,writen2,writestr,writeb,atEndOfEncode} go:noinline
			
 
				+    - if <=40, do nothing (no go:inline)
			
 
				+
			
 
				+PUNTED:
			
 
				+  - To make Handle comparable, make extHandle in BasicHandle a non-embedded pointer,
			
 
				+    and use overlay methods on *BasicHandle to call through to extHandle after initializing
			
 
				+    the "xh *extHandle" to point to a real slice.
			
 
				+
			
 
				+BEFORE EACH RELEASE:
			
 
				+  - Look through and fix padding for each type, to eliminate false sharing
			
 
				+    - critical shared objects that are read many times
			
 
				+      TypeInfos
			
 
				+    - pooled objects:
			
 
				+      decNaked, decNakedContainers, codecFner, typeInfoLoadArray, 
			
 
				+    - small objects allocated independently, that we read/use much across threads:
			
 
				+      codecFn, typeInfo
			
 
				+    - Objects allocated independently and used a lot
			
 
				+      Decoder, Encoder,
			
 
				+      xxxHandle, xxxEncDriver, xxxDecDriver (xxx = json, msgpack, cbor, binc, simple)
			
 
				+    - In all above, arrange values modified together to be close to each other.
			
 
				+    For all of these, either ensure that they occupy full cache lines,
			
 
				+    or ensure that the things just past the cache line boundary are hardly read/written
			
 
				+    e.g. JsonHandle.RawBytesExt - which is copied into json(En|De)cDriver at init
			
 
				+
			
 
				+    Occupying full cache lines means they occupy 8*N words (where N is an integer).
			
 
				+    Check this out by running: ./run.sh -z
			
 
				+    - look at those tagged ****, meaning they are not occupying full cache lines
			
 
				+    - look at those tagged <<<<, meaning they are larger than 32 words (something to watch)
			
 
				+  - Run "golint -min_confidence 0.81"
			
 
				+*/
			
--- a/codec/codec_test.go
+++ b/codec/codec_test.go
@@ -2330,13 +2330,15 @@ func doTestMissingFields(t *testing.T, name string, h Handle) {
 
				 	// encode missingFielderT2, decode into missingFielderT1, encode it out again, decode into new missingFielderT2, compare
			
 
				 	v1 := missingFielderT2{S: "true seven eight", B: true, F: 777.0, I: -888}
			
 
				 	b1 := testMarshalErr(v1, h, t, name+"-missing-enc-2")
			
 
				-	// xdebugf("b1: %s", b1)
			
 
				+	// xdebugf("marshal into b1: %s", b1)
			
 
				 	var v2 missingFielderT1
			
 
				 	testUnmarshalErr(&v2, b1, h, t, name+"-missing-dec-1")
			
 
				-	// xdebugf("unmarshal worked")
			
 
				+	// xdebugf("unmarshal into v2: %v", v2)
			
 
				 	b2 := testMarshalErr(&v2, h, t, name+"-missing-enc-1")
			
 
				+	// xdebugf("marshal into b2: %s", b2)
			
 
				 	var v3 missingFielderT2
			
 
				 	testUnmarshalErr(&v3, b2, h, t, name+"-missing-dec-2")
			
 
				+	// xdebugf("unmarshal into v3: %v", v3)
			
 
				 	testDeepEqualErr(v1, v3, t, name+"-missing-cmp-2")
			
 
				 }
			
 
				 
			
--- a/codec/decode.go
+++ b/codec/decode.go
@@ -23,8 +23,8 @@ const (
 
				 const (
			
 
				 	decDefMaxDepth         = 1024 // maximum depth
			
 
				 	decDefSliceCap         = 8
			
 
				-	decDefChanCap          = 64 // should be large, as cap cannot be expanded
			
 
				-	decScratchByteArrayLen = cacheLineSize - (8 * 1)
			
 
				+	decDefChanCap          = 64            // should be large, as cap cannot be expanded
			
 
				+	decScratchByteArrayLen = cacheLineSize // - (8 * 1)
			
 
				 )
			
 
				 
			
 
				 var (
			
@@ -367,9 +367,12 @@ func (z *bufioDecReader) UnreadByte() (err error) {
 
				 }
			
 
				 
			
 
				 func (z *bufioDecReader) readx(n int) (bs []byte) {
			
 
				-	if n <= 0 || z.err != nil {
			
 
				+	if n <= 0 {
			
 
				 		return
			
 
				 	}
			
 
				+	if z.err != nil {
			
 
				+		panic(z.err)
			
 
				+	}
			
 
				 	if z.c+n <= len(z.buf) {
			
 
				 		bs = z.buf[z.c : z.c+n]
			
 
				 		z.n += n
			
@@ -435,17 +438,14 @@ func (z *bufioDecReader) skip(accept *bitset256) (token byte) {
 
				 	if z.trb {
			
 
				 		z.tr = append(z.tr, z.buf[z.c:]...)
			
 
				 	}
			
 
				-	if z.err != nil {
			
 
				-		return 0
			
 
				-	}
			
 
				 	var n2 int
			
 
				 	for {
			
 
				+		if z.err != nil {
			
 
				+			panic(z.err)
			
 
				+		}
			
 
				 		z.c = 0
			
 
				 		z.buf = z.buf[0:cap(z.buf)]
			
 
				 		n2, z.err = z.r.Read(z.buf)
			
 
				-		if n2 > 0 && z.err != nil {
			
 
				-			z.err = nil
			
 
				-		}
			
 
				 		z.buf = z.buf[:n2]
			
 
				 		for i := 0; i < n2; i++ {
			
 
				 			if token = z.buf[i]; !accept.isset(token) {
			
@@ -454,9 +454,6 @@ func (z *bufioDecReader) skip(accept *bitset256) (token byte) {
 
				 			}
			
 
				 		}
			
 
				 		z.n += n2
			
 
				-		if z.err != nil {
			
 
				-			return 0
			
 
				-		}
			
 
				 		if z.trb {
			
 
				 			z.tr = append(z.tr, z.buf[:n2]...)
			
 
				 		}
			
@@ -489,17 +486,17 @@ func (z *bufioDecReader) readTo(in []byte, accept *bitset256) (out []byte) {
 
				 	if z.trb {
			
 
				 		z.tr = append(z.tr, z.buf[z.c:]...)
			
 
				 	}
			
 
				-	if z.err != nil {
			
 
				-		return
			
 
				-	}
			
 
				 	var n2 int
			
 
				 	for {
			
 
				+		if z.err != nil {
			
 
				+			if z.err == io.EOF {
			
 
				+				return // readTo should read until it matches or end is reached
			
 
				+			}
			
 
				+			panic(z.err)
			
 
				+		}
			
 
				 		z.c = 0
			
 
				 		z.buf = z.buf[0:cap(z.buf)]
			
 
				 		n2, z.err = z.r.Read(z.buf)
			
 
				-		if n2 > 0 && z.err != nil {
			
 
				-			z.err = nil
			
 
				-		}
			
 
				 		z.buf = z.buf[:n2]
			
 
				 		for i := 0; i < n2; i++ {
			
 
				 			if !accept.isset(z.buf[i]) {
			
@@ -508,9 +505,6 @@ func (z *bufioDecReader) readTo(in []byte, accept *bitset256) (out []byte) {
 
				 		}
			
 
				 		out = append(out, z.buf[:n2]...)
			
 
				 		z.n += n2
			
 
				-		if z.err != nil {
			
 
				-			return
			
 
				-		}
			
 
				 		if z.trb {
			
 
				 			z.tr = append(z.tr, z.buf[:n2]...)
			
 
				 		}
			
@@ -544,17 +538,14 @@ func (z *bufioDecReader) readUntil(in []byte, stop byte) (out []byte) {
 
				 	if z.trb {
			
 
				 		z.tr = append(z.tr, z.buf[z.c:]...)
			
 
				 	}
			
 
				-	if z.err != nil {
			
 
				-		return
			
 
				-	}
			
 
				 	var n2 int
			
 
				 	for {
			
 
				+		if z.err != nil {
			
 
				+			panic(z.err)
			
 
				+		}
			
 
				 		z.c = 0
			
 
				 		z.buf = z.buf[0:cap(z.buf)]
			
 
				 		n2, z.err = z.r.Read(z.buf)
			
 
				-		if n2 > 0 && z.err != nil {
			
 
				-			z.err = nil
			
 
				-		}
			
 
				 		z.buf = z.buf[:n2]
			
 
				 		for i := 0; i < n2; i++ {
			
 
				 			if z.buf[i] == stop {
			
@@ -563,9 +554,6 @@ func (z *bufioDecReader) readUntil(in []byte, stop byte) (out []byte) {
 
				 		}
			
 
				 		out = append(out, z.buf[:n2]...)
			
 
				 		z.n += n2
			
 
				-		if z.err != nil {
			
 
				-			return
			
 
				-		}
			
 
				 		if z.trb {
			
 
				 			z.tr = append(z.tr, z.buf[:n2]...)
			
 
				 		}
			
@@ -695,6 +683,7 @@ func (z *ioDecReader) UnreadByte() (err error) {
 
				 	return
			
 
				 }
			
 
				 
			
 
				+// //go:noinline
			
 
				 func (z *ioDecReader) readx(n int) (bs []byte) {
			
 
				 	if n <= 0 {
			
 
				 		return
			
@@ -714,6 +703,7 @@ func (z *ioDecReader) readx(n int) (bs []byte) {
 
				 	return
			
 
				 }
			
 
				 
			
 
				+// //go:noinline
			
 
				 func (z *ioDecReader) readb(bs []byte) {
			
 
				 	if len(bs) == 0 {
			
 
				 		return
			
@@ -727,6 +717,7 @@ func (z *ioDecReader) readb(bs []byte) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+// //go:noinline
			
 
				 func (z *ioDecReader) readn1eof() (b uint8, eof bool) {
			
 
				 	b, err := z.br.ReadByte()
			
 
				 	if err == nil {
			
@@ -742,6 +733,7 @@ func (z *ioDecReader) readn1eof() (b uint8, eof bool) {
 
				 	return
			
 
				 }
			
 
				 
			
 
				+// //go:noinline
			
 
				 func (z *ioDecReader) readn1() (b uint8) {
			
 
				 	b, err := z.br.ReadByte()
			
 
				 	if err == nil {
			
@@ -754,6 +746,7 @@ func (z *ioDecReader) readn1() (b uint8) {
 
				 	panic(err)
			
 
				 }
			
 
				 
			
 
				+// //go:noinline
			
 
				 func (z *ioDecReader) skip(accept *bitset256) (token byte) {
			
 
				 	var eof bool
			
 
				 	// for {
			
@@ -850,13 +843,13 @@ var errBytesDecReaderCannotUnread = errors.New("cannot unread last byte read")
 
				 type bytesDecReader struct {
			
 
				 	b []byte // data
			
 
				 	c int    // cursor
			
 
				-	a int    // available
			
 
				 	t int    // track start
			
 
				+	// a int    // available
			
 
				 }
			
 
				 
			
 
				 func (z *bytesDecReader) reset(in []byte) {
			
 
				 	z.b = in
			
 
				-	z.a = len(in)
			
 
				+	// z.a = len(in)
			
 
				 	z.c = 0
			
 
				 	z.t = 0
			
 
				 }
			
@@ -870,7 +863,7 @@ func (z *bytesDecReader) unreadn1() {
 
				 		panic(errBytesDecReaderCannotUnread)
			
 
				 	}
			
 
				 	z.c--
			
 
				-	z.a++
			
 
				+	// z.a++
			
 
				 	return
			
 
				 }
			
 
				 
			
@@ -896,14 +889,14 @@ func (z *bytesDecReader) readx(n int) (bs []byte) {
 
				 	if n <= 0 {
			
 
				 		return
			
 
				 	}
			
 
				-	if z.a == 0 {
			
 
				+	if z.c == len(z.b) {
			
 
				 		panic(io.EOF)
			
 
				 	}
			
 
				-	if n > z.a {
			
 
				+	if n > len(z.b)-z.c {
			
 
				 		panic(io.ErrUnexpectedEOF)
			
 
				 	}
			
 
				 
			
 
				-	z.a -= n
			
 
				+	// z.a -= n
			
 
				 	z.c += n
			
 
				 	return z.b[z.c-n : z.c]
			
 
				 }
			
@@ -913,12 +906,12 @@ func (z *bytesDecReader) readb(bs []byte) {
 
				 }
			
 
				 
			
 
				 func (z *bytesDecReader) readn1() (v uint8) {
			
 
				-	if z.a == 0 {
			
 
				+	if z.c == len(z.b) {
			
 
				 		panic(io.EOF)
			
 
				 	}
			
 
				 	v = z.b[z.c]
			
 
				 	z.c++
			
 
				-	z.a--
			
 
				+	// z.a--
			
 
				 	return
			
 
				 }
			
 
				 
			
@@ -935,10 +928,12 @@ func (z *bytesDecReader) readn1() (v uint8) {
 
				 
			
 
				 // // go:noinline
			
 
				 func (z *bytesDecReader) skip(accept *bitset256) (token byte) {
			
 
				-	if z.a == 0 {
			
 
				-		return
			
 
				-	}
			
 
				+	i := z.c
			
 
				 	blen := len(z.b)
			
 
				+	if z.c == blen {
			
 
				+		goto END
			
 
				+		// panic(io.EOF)
			
 
				+	}
			
 
				 	// Replace loop with goto construct, so that this can be inlined
			
 
				 	// for i := z.c; i < blen; i++ {
			
 
				 	// 	if !accept.isset(z.b[i]) {
			
@@ -949,29 +944,32 @@ func (z *bytesDecReader) skip(accept *bitset256) (token byte) {
 
				 	// 		return
			
 
				 	// 	}
			
 
				 	// }
			
 
				-	i := z.c
			
 
				+
			
 
				+	// i := z.c
			
 
				 LOOP:
			
 
				 	if i < blen {
			
 
				-		if accept.isset(z.b[i]) {
			
 
				-			i++
			
 
				-			goto LOOP
			
 
				-		}
			
 
				 		token = z.b[i]
			
 
				 		i++
			
 
				-		z.a -= (i - z.c)
			
 
				+		if accept.isset(token) {
			
 
				+			goto LOOP
			
 
				+		}
			
 
				+		// z.a -= (i - z.c)
			
 
				 		z.c = i
			
 
				 		return
			
 
				 	}
			
 
				-	z.a, z.c = 0, blen
			
 
				-	return
			
 
				+END:
			
 
				+	panic(io.EOF)
			
 
				+	// // z.a = 0
			
 
				+	// z.c = blen
			
 
				+	// return
			
 
				 }
			
 
				 
			
 
				 // // go:noinline
			
 
				 func (z *bytesDecReader) readTo(_ []byte, accept *bitset256) (out []byte) {
			
 
				-	if z.a == 0 {
			
 
				-		return
			
 
				-	}
			
 
				 	blen := len(z.b)
			
 
				+	if z.c == blen {
			
 
				+		panic(io.EOF)
			
 
				+	}
			
 
				 
			
 
				 	// Replace loop with goto construct, so that this can be inlined
			
 
				 	// for i := z.c; i < blen; i++ {
			
@@ -1005,6 +1003,7 @@ func (z *bytesDecReader) readTo(_ []byte, accept *bitset256) (out []byte) {
 
				 	// 	return
			
 
				 
			
 
				 	i := z.c
			
 
				+	// c := i
			
 
				 LOOP:
			
 
				 	if i < blen {
			
 
				 		if accept.isset(z.b[i]) {
			
@@ -1013,17 +1012,19 @@ LOOP:
 
				 		}
			
 
				 	}
			
 
				 	out = z.b[z.c:i]
			
 
				-	z.a -= (i - z.c)
			
 
				+	// z.a -= (i - z.c)
			
 
				 	z.c = i
			
 
				-	return
			
 
				+	return // z.b[c:i]
			
 
				+	// z.c, i = i, z.c
			
 
				+	// return z.b[i:z.c]
			
 
				 }
			
 
				 
			
 
				 // // go:noinline
			
 
				 func (z *bytesDecReader) readUntil(_ []byte, stop byte) (out []byte) {
			
 
				-	if z.a == 0 {
			
 
				+	blen := len(z.b)
			
 
				+	if z.c == blen {
			
 
				 		panic(io.EOF)
			
 
				 	}
			
 
				-	blen := len(z.b)
			
 
				 	// Replace loop with goto construct, so that this can be inlined
			
 
				 	// for i := z.c; i < blen; i++ {
			
 
				 	// 	if z.b[i] == stop {
			
@@ -1040,14 +1041,15 @@ LOOP:
 
				 		if z.b[i] == stop {
			
 
				 			i++
			
 
				 			out = z.b[z.c:i]
			
 
				-			z.a -= (i - z.c)
			
 
				+			// z.a -= (i - z.c)
			
 
				 			z.c = i
			
 
				 			return
			
 
				 		}
			
 
				 		i++
			
 
				 		goto LOOP
			
 
				 	}
			
 
				-	z.a, z.c = 0, blen
			
 
				+	// z.a = 0
			
 
				+	// z.c = blen
			
 
				 	panic(io.EOF)
			
 
				 }
			
 
				 
			
@@ -1362,8 +1364,15 @@ func (d *Decoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 
				 					d.decodeValue(sfn.field(si), nil, true)
			
 
				 				}
			
 
				 			} else if mf != nil {
			
 
				+				// store rvkencname in new []byte, as it previously shares Decoder.b, which is used in decode
			
 
				+				name2 := rvkencname
			
 
				+				rvkencname = make([]byte, len(rvkencname))
			
 
				+				copy(rvkencname, name2)
			
 
				+
			
 
				 				var f interface{}
			
 
				+				// xdebugf("kStruct: mf != nil: before decode: rvkencname: %s", rvkencname)
			
 
				 				d.decode(&f)
			
 
				+				// xdebugf("kStruct: mf != nil: after decode: rvkencname: %s", rvkencname)
			
 
				 				if !mf.CodecMissingField(rvkencname, f) && d.h.ErrorIfNoField {
			
 
				 					d.errorf("no matching struct field found when decoding stream map with key: %s ",
			
 
				 						stringView(rvkencname))
			
@@ -1924,15 +1933,6 @@ type rtid2rv struct {
 
				 
			
 
				 // --------------
			
 
				 
			
 
				-type decReaderType uint8
			
 
				-
			
 
				-const (
			
 
				-	decReaderTypeBytes decReaderType = iota // make this 0, so a comparison is cheap
			
 
				-	decReaderTypeIo
			
 
				-	decReaderTypeBufio
			
 
				-	decReaderTypeUnset = 255
			
 
				-)
			
 
				-
			
 
				 type decReaderSwitch struct {
			
 
				 	rb bytesDecReader
			
 
				 	// ---- cpu cache line boundary?
			
@@ -1946,18 +1946,19 @@ type decReaderSwitch struct {
 
				 	jsms bool // is json handle, and MapKeyAsString
			
 
				 	esep bool // has elem separators
			
 
				 
			
 
				-	bytes bool
			
 
				-	typ   decReaderType
			
 
				-	// bytes bool // is bytes reader
			
 
				-	// bufio bool // is this a bufioDecReader?
			
 
				+	// typ   entryType
			
 
				+	bytes bool // is bytes reader
			
 
				+	bufio bool // is this a bufioDecReader?
			
 
				 }
			
 
				 
			
 
				-// these first 3 functions all always inlined, as they just check int fields, etc.
			
 
				+// numread, track and stopTrack are always inlined, as they just check int fields, etc.
			
 
				+
			
 
				+/*
			
 
				 func (z *decReaderSwitch) numread() int {
			
 
				 	switch z.typ {
			
 
				-	case decReaderTypeBytes:
			
 
				+	case entryTypeBytes:
			
 
				 		return z.rb.numread()
			
 
				-	case decReaderTypeIo:
			
 
				+	case entryTypeIo:
			
 
				 		return z.ri.numread()
			
 
				 	default:
			
 
				 		return z.bi.numread()
			
@@ -1965,9 +1966,9 @@ func (z *decReaderSwitch) numread() int {
 
				 }
			
 
				 func (z *decReaderSwitch) track() {
			
 
				 	switch z.typ {
			
 
				-	case decReaderTypeBytes:
			
 
				+	case entryTypeBytes:
			
 
				 		z.rb.track()
			
 
				-	case decReaderTypeIo:
			
 
				+	case entryTypeIo:
			
 
				 		z.ri.track()
			
 
				 	default:
			
 
				 		z.bi.track()
			
@@ -1975,9 +1976,9 @@ func (z *decReaderSwitch) track() {
 
				 }
			
 
				 func (z *decReaderSwitch) stopTrack() []byte {
			
 
				 	switch z.typ {
			
 
				-	case decReaderTypeBytes:
			
 
				+	case entryTypeBytes:
			
 
				 		return z.rb.stopTrack()
			
 
				-	case decReaderTypeIo:
			
 
				+	case entryTypeIo:
			
 
				 		return z.ri.stopTrack()
			
 
				 	default:
			
 
				 		return z.bi.stopTrack()
			
@@ -1986,9 +1987,9 @@ func (z *decReaderSwitch) stopTrack() []byte {
 
				 
			
 
				 func (z *decReaderSwitch) unreadn1() {
			
 
				 	switch z.typ {
			
 
				-	case decReaderTypeBytes:
			
 
				+	case entryTypeBytes:
			
 
				 		z.rb.unreadn1()
			
 
				-	case decReaderTypeIo:
			
 
				+	case entryTypeIo:
			
 
				 		z.ri.unreadn1()
			
 
				 	default:
			
 
				 		z.bi.unreadn1()
			
@@ -1996,9 +1997,9 @@ func (z *decReaderSwitch) unreadn1() {
 
				 }
			
 
				 func (z *decReaderSwitch) readx(n int) []byte {
			
 
				 	switch z.typ {
			
 
				-	case decReaderTypeBytes:
			
 
				+	case entryTypeBytes:
			
 
				 		return z.rb.readx(n)
			
 
				-	case decReaderTypeIo:
			
 
				+	case entryTypeIo:
			
 
				 		return z.ri.readx(n)
			
 
				 	default:
			
 
				 		return z.bi.readx(n)
			
@@ -2006,9 +2007,9 @@ func (z *decReaderSwitch) readx(n int) []byte {
 
				 }
			
 
				 func (z *decReaderSwitch) readb(s []byte) {
			
 
				 	switch z.typ {
			
 
				-	case decReaderTypeBytes:
			
 
				+	case entryTypeBytes:
			
 
				 		z.rb.readb(s)
			
 
				-	case decReaderTypeIo:
			
 
				+	case entryTypeIo:
			
 
				 		z.ri.readb(s)
			
 
				 	default:
			
 
				 		z.bi.readb(s)
			
@@ -2016,9 +2017,9 @@ func (z *decReaderSwitch) readb(s []byte) {
 
				 }
			
 
				 func (z *decReaderSwitch) readn1() uint8 {
			
 
				 	switch z.typ {
			
 
				-	case decReaderTypeBytes:
			
 
				+	case entryTypeBytes:
			
 
				 		return z.rb.readn1()
			
 
				-	case decReaderTypeIo:
			
 
				+	case entryTypeIo:
			
 
				 		return z.ri.readn1()
			
 
				 	default:
			
 
				 		return z.bi.readn1()
			
@@ -2026,9 +2027,9 @@ func (z *decReaderSwitch) readn1() uint8 {
 
				 }
			
 
				 func (z *decReaderSwitch) skip(accept *bitset256) (token byte) {
			
 
				 	switch z.typ {
			
 
				-	case decReaderTypeBytes:
			
 
				+	case entryTypeBytes:
			
 
				 		return z.rb.skip(accept)
			
 
				-	case decReaderTypeIo:
			
 
				+	case entryTypeIo:
			
 
				 		return z.ri.skip(accept)
			
 
				 	default:
			
 
				 		return z.bi.skip(accept)
			
@@ -2036,9 +2037,9 @@ func (z *decReaderSwitch) skip(accept *bitset256) (token byte) {
 
				 }
			
 
				 func (z *decReaderSwitch) readTo(in []byte, accept *bitset256) (out []byte) {
			
 
				 	switch z.typ {
			
 
				-	case decReaderTypeBytes:
			
 
				+	case entryTypeBytes:
			
 
				 		return z.rb.readTo(in, accept)
			
 
				-	case decReaderTypeIo:
			
 
				+	case entryTypeIo:
			
 
				 		return z.ri.readTo(in, accept)
			
 
				 	default:
			
 
				 		return z.bi.readTo(in, accept)
			
@@ -2046,16 +2047,16 @@ func (z *decReaderSwitch) readTo(in []byte, accept *bitset256) (out []byte) {
 
				 }
			
 
				 func (z *decReaderSwitch) readUntil(in []byte, stop byte) (out []byte) {
			
 
				 	switch z.typ {
			
 
				-	case decReaderTypeBytes:
			
 
				+	case entryTypeBytes:
			
 
				 		return z.rb.readUntil(in, stop)
			
 
				-	case decReaderTypeIo:
			
 
				+	case entryTypeIo:
			
 
				 		return z.ri.readUntil(in, stop)
			
 
				 	default:
			
 
				 		return z.bi.readUntil(in, stop)
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-/*
			
 
				+*/
			
 
				 
			
 
				 // the if/else-if/else block is expensive to inline.
			
 
				 // Each node of this construct costs a lot and dominates the budget.
			
@@ -2066,6 +2067,34 @@ func (z *decReaderSwitch) readUntil(in []byte, stop byte) (out []byte) {
 
				 // This allows for the inlining of the common path when z.bytes=true.
			
 
				 // Go 1.12+ supports inlining methods with up to 1 inlined function (or 2 if no other constructs).
			
 
				 
			
 
				+func (z *decReaderSwitch) numread() int {
			
 
				+	if z.bytes {
			
 
				+		return z.rb.numread()
			
 
				+	} else if z.bufio {
			
 
				+		return z.bi.numread()
			
 
				+	} else {
			
 
				+		return z.ri.numread()
			
 
				+	}
			
 
				+}
			
 
				+func (z *decReaderSwitch) track() {
			
 
				+	if z.bytes {
			
 
				+		z.rb.track()
			
 
				+	} else if z.bufio {
			
 
				+		z.bi.track()
			
 
				+	} else {
			
 
				+		z.ri.track()
			
 
				+	}
			
 
				+}
			
 
				+func (z *decReaderSwitch) stopTrack() []byte {
			
 
				+	if z.bytes {
			
 
				+		return z.rb.stopTrack()
			
 
				+	} else if z.bufio {
			
 
				+		return z.bi.stopTrack()
			
 
				+	} else {
			
 
				+		return z.ri.stopTrack()
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 func (z *decReaderSwitch) unreadn1() {
			
 
				 	if z.bytes {
			
 
				 		z.rb.unreadn1()
			
@@ -2074,7 +2103,7 @@ func (z *decReaderSwitch) unreadn1() {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-//go:noinline
			
 
				+// //go:noinline
			
 
				 func (z *decReaderSwitch) unreadn1IO() {
			
 
				 	if z.bufio {
			
 
				 		z.bi.unreadn1()
			
@@ -2089,7 +2118,7 @@ func (z *decReaderSwitch) readx(n int) []byte {
 
				 	return z.readxIO(n)
			
 
				 }
			
 
				 
			
 
				-//go:noinline
			
 
				+// //go:noinline
			
 
				 func (z *decReaderSwitch) readxIO(n int) []byte {
			
 
				 	if z.bufio {
			
 
				 		return z.bi.readx(n)
			
@@ -2119,7 +2148,7 @@ func (z *decReaderSwitch) readn1() uint8 {
 
				 	return z.readn1IO()
			
 
				 }
			
 
				 
			
 
				-//go:noinline
			
 
				+// //go:noinline
			
 
				 func (z *decReaderSwitch) readn1IO() uint8 {
			
 
				 	if z.bufio {
			
 
				 		return z.bi.readn1()
			
@@ -2133,7 +2162,7 @@ func (z *decReaderSwitch) skip(accept *bitset256) (token byte) {
 
				 	return z.skipIO(accept)
			
 
				 }
			
 
				 
			
 
				-//go:noinline
			
 
				+// //go:noinline
			
 
				 func (z *decReaderSwitch) skipIO(accept *bitset256) (token byte) {
			
 
				 	if z.bufio {
			
 
				 		return z.bi.skip(accept)
			
@@ -2161,7 +2190,7 @@ func (z *decReaderSwitch) readUntil(in []byte, stop byte) (out []byte) {
 
				 	return z.readUntilIO(in, stop)
			
 
				 }
			
 
				 
			
 
				-//go:noinline
			
 
				+// //go:noinline
			
 
				 func (z *decReaderSwitch) readUntilIO(in []byte, stop byte) (out []byte) {
			
 
				 	if z.bufio {
			
 
				 		return z.bi.readUntil(in, stop)
			
@@ -2169,8 +2198,6 @@ func (z *decReaderSwitch) readUntilIO(in []byte, stop byte) (out []byte) {
 
				 	return z.ri.readUntil(in, stop)
			
 
				 }
			
 
				 
			
 
				-*/
			
 
				-
			
 
				 // A Decoder reads and decodes an object from an input stream in the codec format.
			
 
				 type Decoder struct {
			
 
				 	panicHdl
			
@@ -2196,6 +2223,7 @@ type Decoder struct {
 
				 	// ---- cpu cache line boundary?
			
 
				 	decReaderSwitch
			
 
				 
			
 
				+	// ---- cpu cache line boundary?
			
 
				 	codecFnPooler
			
 
				 	// cr containerStateRecv
			
 
				 	err error
			
@@ -2204,10 +2232,11 @@ type Decoder struct {
 
				 	maxdepth int16
			
 
				 	_        [4]uint8 // padding
			
 
				 
			
 
				+	is map[string]string // used for interning strings
			
 
				+
			
 
				 	// ---- cpu cache line boundary?
			
 
				 	b [decScratchByteArrayLen]byte // scratch buffer, used by Decoder and xxxEncDrivers
			
 
				 
			
 
				-	is map[string]string // used for interning strings
			
 
				 	// padding - false sharing help // modify 232 if Decoder struct changes.
			
 
				 	// _ [cacheLineSize - 232%cacheLineSize]byte
			
 
				 }
			
@@ -2282,7 +2311,7 @@ func (d *Decoder) Reset(r io.Reader) {
 
				 		return
			
 
				 	}
			
 
				 	d.bytes = false
			
 
				-	d.typ = decReaderTypeUnset
			
 
				+	// d.typ = entryTypeUnset
			
 
				 	if d.h.ReaderBufferSize > 0 {
			
 
				 		if d.bi == nil {
			
 
				 			d.bi = new(bufioDecReader)
			
@@ -2294,7 +2323,8 @@ func (d *Decoder) Reset(r io.Reader) {
 
				 		}
			
 
				 		d.bi.reset(r)
			
 
				 		// d.r = d.bi
			
 
				-		d.typ = decReaderTypeBufio
			
 
				+		// d.typ = entryTypeBufio
			
 
				+		d.bufio = true
			
 
				 	} else {
			
 
				 		// d.ri.x = &d.b
			
 
				 		// d.s = d.sa[:0]
			
@@ -2303,7 +2333,8 @@ func (d *Decoder) Reset(r io.Reader) {
 
				 		}
			
 
				 		d.ri.reset(r)
			
 
				 		// d.r = d.ri
			
 
				-		d.typ = decReaderTypeIo
			
 
				+		// d.typ = entryTypeIo
			
 
				+		d.bufio = false
			
 
				 	}
			
 
				 	d.resetCommon()
			
 
				 }
			
@@ -2315,7 +2346,8 @@ func (d *Decoder) ResetBytes(in []byte) {
 
				 		return
			
 
				 	}
			
 
				 	d.bytes = true
			
 
				-	d.typ = decReaderTypeBytes
			
 
				+	d.bufio = false
			
 
				+	// d.typ = entryTypeBytes
			
 
				 	d.rb.reset(in)
			
 
				 	// d.r = &d.rb
			
 
				 	d.resetCommon()
			
@@ -2402,9 +2434,12 @@ func (d *Decoder) naked() *decNaked {
 
				 // Note: we allow nil values in the stream anywhere except for map keys.
			
 
				 // A nil value in the encoded stream where a map key is expected is treated as an error.
			
 
				 func (d *Decoder) Decode(v interface{}) (err error) {
			
 
				+	// tried to use closure, as runtime optimizes defer with no params.
			
 
				+	// This seemed to be causing weird issues (like circular reference found, unexpected panic, etc).
			
 
				+	// Also, see https://github.com/golang/go/issues/14939#issuecomment-417836139
			
 
				+	// defer func() { d.deferred(&err) }()
			
 
				+	// { x, y := d, &err; defer func() { x.deferred(y) }() }
			
 
				 	defer d.deferred(&err)
			
 
				-	// defer func() { d.deferred(&err) }() // use closure, as runtime optimizes defer with no params
			
 
				-	// { x := d; y := &err; defer func() { x.deferred(y) }() } // https://github.com/golang/go/issues/14939#issuecomment-417836139
			
 
				 	d.MustDecode(v)
			
 
				 	return
			
 
				 }
			
--- a/codec/encode.go
+++ b/codec/encode.go
@@ -4,7 +4,6 @@
 
				 package codec
			
 
				 
			
 
				 import (
			
 
				-	"bufio"
			
 
				 	"encoding"
			
 
				 	"errors"
			
 
				 	"fmt"
			
@@ -66,10 +65,6 @@ type encDriver interface {
 
				 	atEndOfEncode()
			
 
				 }
			
 
				 
			
 
				-type ioEncStringWriter interface {
			
 
				-	WriteString(s string) (n int, err error)
			
 
				-}
			
 
				-
			
 
				 type encDriverAsis interface {
			
 
				 	EncodeAsis(v []byte)
			
 
				 }
			
@@ -186,6 +181,12 @@ type EncodeOptions struct {
 
				 
			
 
				 // ---------------------------------------------
			
 
				 
			
 
				+/*
			
 
				+
			
 
				+type ioEncStringWriter interface {
			
 
				+	WriteString(s string) (n int, err error)
			
 
				+}
			
 
				+
			
 
				 // ioEncWriter implements encWriter and can write to an io.Writer implementation
			
 
				 type ioEncWriter struct {
			
 
				 	w  io.Writer
			
@@ -196,6 +197,19 @@ type ioEncWriter struct {
 
				 	b  [8]byte
			
 
				 }
			
 
				 
			
 
				+func (z *ioEncWriter) reset(w io.Writer) {
			
 
				+	z.w = w
			
 
				+	var ok bool
			
 
				+	if z.bw, ok = w.(io.ByteWriter); !ok {
			
 
				+		z.bw = z
			
 
				+	}
			
 
				+	if z.sw, ok = w.(ioEncStringWriter); !ok {
			
 
				+		z.sw = z
			
 
				+	}
			
 
				+	z.fw, _ = w.(ioFlusher)
			
 
				+	z.ww = w
			
 
				+}
			
 
				+
			
 
				 func (z *ioEncWriter) WriteByte(b byte) (err error) {
			
 
				 	z.b[0] = b
			
 
				 	_, err = z.w.Write(z.b[:1])
			
@@ -250,6 +264,96 @@ func (z *ioEncWriter) atEndOfEncode() {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+*/
			
 
				+
			
 
				+// ---------------------------------------------
			
 
				+
			
 
				+// bufioEncWriter
			
 
				+type bufioEncWriter struct {
			
 
				+	buf []byte
			
 
				+	w   io.Writer
			
 
				+	n   int
			
 
				+	// _   [2]uint64 // padding
			
 
				+	// a int
			
 
				+	// b   [4]byte
			
 
				+	// err
			
 
				+}
			
 
				+
			
 
				+func (z *bufioEncWriter) reset(w io.Writer, bufsize int) {
			
 
				+	z.w = w
			
 
				+	z.n = 0
			
 
				+	if bufsize == 0 {
			
 
				+		z.buf = make([]byte, 256)
			
 
				+	} else if cap(z.buf) < bufsize {
			
 
				+		z.buf = make([]byte, bufsize)
			
 
				+	} else {
			
 
				+		z.buf = z.buf[:bufsize]
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//go:noinline
			
 
				+func (z *bufioEncWriter) flush() {
			
 
				+	n, err := z.w.Write(z.buf[:z.n])
			
 
				+	z.n -= n
			
 
				+	if z.n > 0 && err == nil {
			
 
				+		err = io.ErrShortWrite
			
 
				+	}
			
 
				+	if err != nil {
			
 
				+		if n > 0 && z.n > 0 {
			
 
				+			copy(z.buf, z.buf[n:z.n+n])
			
 
				+		}
			
 
				+		panic(err)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (z *bufioEncWriter) writeb(s []byte) {
			
 
				+LOOP:
			
 
				+	a := len(z.buf) - z.n
			
 
				+	if len(s) > a {
			
 
				+		z.n += copy(z.buf[z.n:], s[:a])
			
 
				+		s = s[a:]
			
 
				+		z.flush()
			
 
				+		goto LOOP
			
 
				+	}
			
 
				+	z.n += copy(z.buf[z.n:], s)
			
 
				+}
			
 
				+
			
 
				+func (z *bufioEncWriter) writestr(s string) {
			
 
				+	// z.writeb(bytesView(s)) // inlined below
			
 
				+LOOP:
			
 
				+	a := len(z.buf) - z.n
			
 
				+	if len(s) > a {
			
 
				+		z.n += copy(z.buf[z.n:], s[:a])
			
 
				+		s = s[a:]
			
 
				+		z.flush()
			
 
				+		goto LOOP
			
 
				+	}
			
 
				+	z.n += copy(z.buf[z.n:], s)
			
 
				+}
			
 
				+
			
 
				+func (z *bufioEncWriter) writen1(b1 byte) {
			
 
				+	if 1 > len(z.buf)-z.n {
			
 
				+		z.flush()
			
 
				+	}
			
 
				+	z.buf[z.n] = b1
			
 
				+	z.n++
			
 
				+}
			
 
				+
			
 
				+func (z *bufioEncWriter) writen2(b1, b2 byte) {
			
 
				+	if 2 > len(z.buf)-z.n {
			
 
				+		z.flush()
			
 
				+	}
			
 
				+	z.buf[z.n+1] = b2
			
 
				+	z.buf[z.n] = b1
			
 
				+	z.n += 2
			
 
				+}
			
 
				+
			
 
				+func (z *bufioEncWriter) atEndOfEncode() {
			
 
				+	if z.n > 0 {
			
 
				+		z.flush()
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 // ---------------------------------------------
			
 
				 
			
 
				 // bytesEncAppender implements encWriter and can write to an byte slice.
			
@@ -967,17 +1071,111 @@ func (e *Encoder) kMapCanonical(rtkey reflect.Type, rv reflect.Value, mks []refl
 
				 // // --------------------------------------------------
			
 
				 
			
 
				 type encWriterSwitch struct {
			
 
				-	wi   *ioEncWriter
			
 
				-	wb   bytesEncAppender
			
 
				-	wx   bool      // if bytes, wx=true
			
 
				-	esep bool      // whether it has elem separators
			
 
				-	isas bool      // whether e.as != nil
			
 
				-	js   bool      // here, so that no need to piggy back on *codecFner for this
			
 
				-	be   bool      // here, so that no need to piggy back on *codecFner for this
			
 
				-	_    [3]byte   // padding
			
 
				-	_    [2]uint64 // padding
			
 
				+	// wi   *ioEncWriter
			
 
				+	wf bufioEncWriter
			
 
				+	wb bytesEncAppender
			
 
				+	// typ  entryType
			
 
				+	wx   bool    // if bytes, wx=true
			
 
				+	esep bool    // whether it has elem separators
			
 
				+	isas bool    // whether e.as != nil
			
 
				+	js   bool    // captured here, so that no need to piggy back on *codecFner for this
			
 
				+	be   bool    // captured here, so that no need to piggy back on *codecFner for this
			
 
				+	_    [2]byte // padding
			
 
				+	// _    [2]uint64 // padding
			
 
				+	// _    uint64    // padding
			
 
				 }
			
 
				 
			
 
				+func (z *encWriterSwitch) writeb(s []byte) {
			
 
				+	if z.wx {
			
 
				+		z.wb.writeb(s)
			
 
				+	} else {
			
 
				+		z.wf.writeb(s)
			
 
				+	}
			
 
				+}
			
 
				+func (z *encWriterSwitch) writestr(s string) {
			
 
				+	if z.wx {
			
 
				+		z.wb.writestr(s)
			
 
				+	} else {
			
 
				+		z.wf.writestr(s)
			
 
				+	}
			
 
				+}
			
 
				+func (z *encWriterSwitch) writen1(b1 byte) {
			
 
				+	if z.wx {
			
 
				+		z.wb.writen1(b1)
			
 
				+	} else {
			
 
				+		z.wf.writen1(b1)
			
 
				+	}
			
 
				+}
			
 
				+func (z *encWriterSwitch) writen2(b1, b2 byte) {
			
 
				+	if z.wx {
			
 
				+		z.wb.writen2(b1, b2)
			
 
				+	} else {
			
 
				+		z.wf.writen2(b1, b2)
			
 
				+	}
			
 
				+}
			
 
				+func (z *encWriterSwitch) atEndOfEncode() {
			
 
				+	if z.wx {
			
 
				+		z.wb.atEndOfEncode()
			
 
				+	} else {
			
 
				+		z.wf.atEndOfEncode()
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+
			
 
				+// ------------------------------------------
			
 
				+func (z *encWriterSwitch) writeb(s []byte) {
			
 
				+	switch z.typ {
			
 
				+	case entryTypeBytes:
			
 
				+		z.wb.writeb(s)
			
 
				+	case entryTypeIo:
			
 
				+		z.wi.writeb(s)
			
 
				+	default:
			
 
				+		z.wf.writeb(s)
			
 
				+	}
			
 
				+}
			
 
				+func (z *encWriterSwitch) writestr(s string) {
			
 
				+	switch z.typ {
			
 
				+	case entryTypeBytes:
			
 
				+		z.wb.writestr(s)
			
 
				+	case entryTypeIo:
			
 
				+		z.wi.writestr(s)
			
 
				+	default:
			
 
				+		z.wf.writestr(s)
			
 
				+	}
			
 
				+}
			
 
				+func (z *encWriterSwitch) writen1(b1 byte) {
			
 
				+	switch z.typ {
			
 
				+	case entryTypeBytes:
			
 
				+		z.wb.writen1(b1)
			
 
				+	case entryTypeIo:
			
 
				+		z.wi.writen1(b1)
			
 
				+	default:
			
 
				+		z.wf.writen1(b1)
			
 
				+	}
			
 
				+}
			
 
				+func (z *encWriterSwitch) writen2(b1, b2 byte) {
			
 
				+	switch z.typ {
			
 
				+	case entryTypeBytes:
			
 
				+		z.wb.writen2(b1, b2)
			
 
				+	case entryTypeIo:
			
 
				+		z.wi.writen2(b1, b2)
			
 
				+	default:
			
 
				+		z.wf.writen2(b1, b2)
			
 
				+	}
			
 
				+}
			
 
				+func (z *encWriterSwitch) atEndOfEncode() {
			
 
				+	switch z.typ {
			
 
				+	case entryTypeBytes:
			
 
				+		z.wb.atEndOfEncode()
			
 
				+	case entryTypeIo:
			
 
				+		z.wi.atEndOfEncode()
			
 
				+	default:
			
 
				+		z.wf.atEndOfEncode()
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// ------------------------------------------
			
 
				 func (z *encWriterSwitch) writeb(s []byte) {
			
 
				 	if z.wx {
			
 
				 		z.wb.writeb(s)
			
@@ -1014,6 +1212,8 @@ func (z *encWriterSwitch) atEndOfEncode() {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+*/
			
 
				+
			
 
				 // An Encoder writes an object to an output stream in the codec format.
			
 
				 type Encoder struct {
			
 
				 	panicHdl
			
@@ -1031,10 +1231,9 @@ type Encoder struct {
 
				 
			
 
				 	h *BasicHandle
			
 
				 
			
 
				-	// ---- cpu cache line boundary?
			
 
				+	// ---- cpu cache line boundary? + 3
			
 
				 	encWriterSwitch
			
 
				 
			
 
				-	// ---- cpu cache line boundary?
			
 
				 	codecFnPooler
			
 
				 	ci set
			
 
				 
			
@@ -1043,7 +1242,7 @@ type Encoder struct {
 
				 	// ---- cpu cache line boundary?
			
 
				 	// b [scratchByteArrayLen]byte
			
 
				 	// _ [cacheLineSize - scratchByteArrayLen]byte // padding
			
 
				-	b [cacheLineSize + 8]byte // used for encoding a chan or (non-addressable) array of bytes
			
 
				+	b [cacheLineSize - (8 * 2)]byte // used for encoding a chan or (non-addressable) array of bytes
			
 
				 }
			
 
				 
			
 
				 // NewEncoder returns an Encoder for encoding into an io.Writer.
			
@@ -1096,28 +1295,30 @@ func (e *Encoder) Reset(w io.Writer) {
 
				 	if w == nil {
			
 
				 		return
			
 
				 	}
			
 
				-	if e.wi == nil {
			
 
				-		e.wi = new(ioEncWriter)
			
 
				-	}
			
 
				-	var ok bool
			
 
				+	// var ok bool
			
 
				 	e.wx = false
			
 
				-	e.wi.w = w
			
 
				-	if e.h.WriterBufferSize > 0 {
			
 
				-		bw := bufio.NewWriterSize(w, e.h.WriterBufferSize)
			
 
				-		e.wi.bw = bw
			
 
				-		e.wi.sw = bw
			
 
				-		e.wi.fw = bw
			
 
				-		e.wi.ww = bw
			
 
				-	} else {
			
 
				-		if e.wi.bw, ok = w.(io.ByteWriter); !ok {
			
 
				-			e.wi.bw = e.wi
			
 
				-		}
			
 
				-		if e.wi.sw, ok = w.(ioEncStringWriter); !ok {
			
 
				-			e.wi.sw = e.wi
			
 
				-		}
			
 
				-		e.wi.fw, _ = w.(ioFlusher)
			
 
				-		e.wi.ww = w
			
 
				-	}
			
 
				+	// e.typ = entryTypeUnset
			
 
				+	// if e.h.WriterBufferSize > 0 {
			
 
				+	// 	// bw := bufio.NewWriterSize(w, e.h.WriterBufferSize)
			
 
				+	// 	// e.wi.bw = bw
			
 
				+	// 	// e.wi.sw = bw
			
 
				+	// 	// e.wi.fw = bw
			
 
				+	// 	// e.wi.ww = bw
			
 
				+	// 	if e.wf == nil {
			
 
				+	// 		e.wf = new(bufioEncWriter)
			
 
				+	// 	}
			
 
				+	// 	e.wf.reset(w, e.h.WriterBufferSize)
			
 
				+	// 	e.typ = entryTypeBufio
			
 
				+	// } else {
			
 
				+	// 	if e.wi == nil {
			
 
				+	// 		e.wi = new(ioEncWriter)
			
 
				+	// 	}
			
 
				+	// 	e.wi.reset(w)
			
 
				+	// 	e.typ = entryTypeIo
			
 
				+	// }
			
 
				+	e.wf.reset(w, e.h.WriterBufferSize)
			
 
				+	// e.typ = entryTypeBufio
			
 
				+
			
 
				 	// e.w = e.wi
			
 
				 	e.resetCommon()
			
 
				 }
			
@@ -1135,6 +1336,7 @@ func (e *Encoder) ResetBytes(out *[]byte) {
 
				 		in = make([]byte, defEncByteBufSize)
			
 
				 	}
			
 
				 	e.wx = true
			
 
				+	// e.typ = entryTypeBytes
			
 
				 	e.wb.reset(in, out)
			
 
				 	// e.w = &e.wb
			
 
				 	e.resetCommon()
			
@@ -1224,9 +1426,12 @@ func (e *Encoder) ResetBytes(out *[]byte) {
 
				 // Some formats support symbols (e.g. binc) and will properly encode the string
			
 
				 // only once in the stream, and use a tag to refer to it thereafter.
			
 
				 func (e *Encoder) Encode(v interface{}) (err error) {
			
 
				+	// tried to use closure, as runtime optimizes defer with no params.
			
 
				+	// This seemed to be causing weird issues (like circular reference found, unexpected panic, etc).
			
 
				+	// Also, see https://github.com/golang/go/issues/14939#issuecomment-417836139
			
 
				+	// defer func() { e.deferred(&err) }() }
			
 
				+	// { x, y := e, &err; defer func() { x.deferred(y) }() }
			
 
				 	defer e.deferred(&err)
			
 
				-	// defer func() { e.deferred(&err) }() } // use closure, as runtime optimizes defer with no params
			
 
				-	// { x := e; y := &err; defer func() { x.deferred(y) }() } // https://github.com/golang/go/issues/14939#issuecomment-417836139
			
 
				 	e.MustEncode(v)
			
 
				 	return
			
 
				 }
			
--- a/codec/helper.go
+++ b/codec/helper.go
@@ -163,6 +163,15 @@ type clsErr struct {
 
				 	errClosed error // error on closing
			
 
				 }
			
 
				 
			
 
				+// type entryType uint8
			
 
				+
			
 
				+// const (
			
 
				+// 	entryTypeBytes entryType = iota // make this 0, so a comparison is cheap
			
 
				+// 	entryTypeIo
			
 
				+// 	entryTypeBufio
			
 
				+// 	entryTypeUnset = 255
			
 
				+// )
			
 
				+
			
 
				 type charEncoding uint8
			
 
				 
			
 
				 const (
			
@@ -2342,8 +2351,9 @@ func (x *bitset32) set(pos byte) {
 
				 type pooler struct {
			
 
				 	dn                                          sync.Pool // for decNaked
			
 
				 	cfn                                         sync.Pool // for codecFner
			
 
				-	tiload                                      sync.Pool
			
 
				+	tiload                                      sync.Pool // for type info loading
			
 
				 	strRv8, strRv16, strRv32, strRv64, strRv128 sync.Pool // for stringRV
			
 
				+	buf64, buf128, buf256, buf512, buf1024      sync.Pool // for [...]byte
			
 
				 }
			
 
				 
			
 
				 func (p *pooler) init() {
			
@@ -2352,8 +2362,17 @@ func (p *pooler) init() {
 
				 	p.strRv32.New = func() interface{} { return new([32]sfiRv) }
			
 
				 	p.strRv64.New = func() interface{} { return new([64]sfiRv) }
			
 
				 	p.strRv128.New = func() interface{} { return new([128]sfiRv) }
			
 
				+
			
 
				+	p.buf64.New = func() interface{} { return new([64]byte) }
			
 
				+	p.buf128.New = func() interface{} { return new([128]byte) }
			
 
				+	p.buf256.New = func() interface{} { return new([256]byte) }
			
 
				+	p.buf512.New = func() interface{} { return new([512]byte) }
			
 
				+	p.buf1024.New = func() interface{} { return new([1024]byte) }
			
 
				+
			
 
				 	p.dn.New = func() interface{} { x := new(decNaked); x.init(); return x }
			
 
				+
			
 
				 	p.tiload.New = func() interface{} { return new(typeInfoLoadArray) }
			
 
				+
			
 
				 	p.cfn.New = func() interface{} { return new(codecFner) }
			
 
				 }
			
 
				 
			
@@ -2372,6 +2391,23 @@ func (p *pooler) sfiRv64() (sp *sync.Pool, v interface{}) {
 
				 func (p *pooler) sfiRv128() (sp *sync.Pool, v interface{}) {
			
 
				 	return &p.strRv128, p.strRv128.Get()
			
 
				 }
			
 
				+
			
 
				+func (p *pooler) bytes64() (sp *sync.Pool, v interface{}) {
			
 
				+	return &p.buf64, p.buf64.Get()
			
 
				+}
			
 
				+func (p *pooler) bytes128() (sp *sync.Pool, v interface{}) {
			
 
				+	return &p.buf128, p.buf128.Get()
			
 
				+}
			
 
				+func (p *pooler) bytes256() (sp *sync.Pool, v interface{}) {
			
 
				+	return &p.buf256, p.buf256.Get()
			
 
				+}
			
 
				+func (p *pooler) bytes512() (sp *sync.Pool, v interface{}) {
			
 
				+	return &p.buf512, p.buf512.Get()
			
 
				+}
			
 
				+func (p *pooler) bytes1024() (sp *sync.Pool, v interface{}) {
			
 
				+	return &p.buf1024, p.buf1024.Get()
			
 
				+}
			
 
				+
			
 
				 func (p *pooler) decNaked() (sp *sync.Pool, v interface{}) {
			
 
				 	return &p.dn, p.dn.Get()
			
 
				 }
			
@@ -2406,6 +2442,8 @@ func (p *pooler) tiLoad() (sp *sync.Pool, v interface{}) {
 
				 // 	p.tiload.Put(v)
			
 
				 // }
			
 
				 
			
 
				+// ----------------------------------------------------
			
 
				+
			
 
				 type panicHdl struct{}
			
 
				 
			
 
				 func (panicHdl) errorv(err error) {
			
@@ -2429,6 +2467,8 @@ func (panicHdl) errorf(format string, params ...interface{}) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+// ----------------------------------------------------
			
 
				+
			
 
				 type errDecorator interface {
			
 
				 	wrapErr(in interface{}, out *error)
			
 
				 }
			
@@ -2437,6 +2477,8 @@ type errDecoratorDef struct{}
 
				 
			
 
				 func (errDecoratorDef) wrapErr(v interface{}, e *error) { *e = fmt.Errorf("%v", v) }
			
 
				 
			
 
				+// ----------------------------------------------------
			
 
				+
			
 
				 type must struct{}
			
 
				 
			
 
				 func (must) String(s string, err error) string {
			
--- a/codec/values_flex_test.go
+++ b/codec/values_flex_test.go
@@ -75,6 +75,7 @@ type missingFielderT1 struct {
 
				 }
			
 
				 
			
 
				 func (t *missingFielderT1) CodecMissingField(field []byte, value interface{}) bool {
			
 
				+	// xdebugf(">> calling CodecMissingField with field: %s, value: %v", field, value)
			
 
				 	switch string(field) {
			
 
				 	case "F":
			
 
				 		t.f = value.(float64)
			
--- a/codec/z_all_test.go
+++ b/codec/z_all_test.go
@@ -68,6 +68,7 @@ func testSuite(t *testing.T, f func(t *testing.T)) {
 
				 	testUseMust = true
			
 
				 	testInternStr = true
			
 
				 	testUseIoEncDec = 0
			
 
				+	// xdebugf("setting StructToArray=true")
			
 
				 	testStructToArray = true
			
 
				 	testCheckCircRef = true
			
 
				 	testUseReset = true
			
@@ -76,6 +77,8 @@ func testSuite(t *testing.T, f func(t *testing.T)) {
 
				 	testReinit()
			
 
				 	t.Run("optionsTrue", f)
			
 
				 
			
 
				+	// xdebugf("setting StructToArray=false")
			
 
				+	testStructToArray = false
			
 
				 	testDepth = 6
			
 
				 	testReinit()
			
 
				 	t.Run("optionsTrue-deepstruct", f)