Browse Source

codec: refactor json appendStringAsBytes and use pools for all []byte alloc

json: refactor appendStringAsBytes

    Previously, appendStringAsBytes would update jsonDecDriver.bs field.

    Now, it returns an appropriate []byte result which may be the output
    of readUntil.

    This way, there isn't any unnecessary copying.

readTo/readUntil does not take in a []byte parameter

Use pools for all byte array allocation, via bytesBufPooler(Plus)
Ugorji Nwoke 6 years ago
parent
commit
258d29cb75

+ 29 - 16
codec/codec_test.go

@@ -1130,6 +1130,9 @@ func testCodecChan(t *testing.T, h Handle) {
 		}
 		if err := deepEqual(sl1, sl2); err != nil {
 			t.Logf("FAIL: Not Match: %v; len: %v, %v", err, len(sl1), len(sl2))
+			if testVerbose {
+				t.Logf("sl1: %#v, sl2: %#v", sl1, sl2)
+			}
 			t.FailNow()
 		}
 	}
@@ -1160,6 +1163,9 @@ func testCodecChan(t *testing.T, h Handle) {
 		}
 		if err := deepEqual(sl1, sl2); err != nil {
 			t.Logf("FAIL: Not Match: %v; len: %v, %v", err, len(sl1), len(sl2))
+			if testVerbose {
+				t.Logf("sl1: %#v, sl2: %#v", sl1, sl2)
+			}
 			t.FailNow()
 		}
 	}
@@ -3012,14 +3018,17 @@ after the new line
 	}
 }
 
-func TestBufioDecReader(t *testing.T) {
+func doTestBufioDecReader(t *testing.T, bufsize int) {
 	testOnce.Do(testInitAll)
+	bufsizehalf := (bufsize + 1) / 2
+	// TODO: add testing when the buffer size is smaller than the string length.
+
 	// try to read 85 bytes in chunks of 7 at a time.
 	var s = strings.Repeat("01234'56789      ", 5)
 	// fmt.Printf("s: %s\n", s)
 	var r = strings.NewReader(s)
-	var br = &bufioDecReader{buf: make([]byte, 0, 13)}
-	br.r = r
+	var br bufioDecReader
+	br.reset(r, bufsize)
 	b, err := ioutil.ReadAll(br.r)
 	if err != nil {
 		panic(err)
@@ -3035,8 +3044,7 @@ func TestBufioDecReader(t *testing.T) {
 	// readUntil: see: 56789
 	var out []byte
 	var token byte
-	br = &bufioDecReader{buf: make([]byte, 0, 7)}
-	br.r = strings.NewReader(s)
+	br.reset(strings.NewReader(s), bufsizehalf)
 	// println()
 	for _, v2 := range [...]string{
 		`01234'`,
@@ -3044,33 +3052,31 @@ func TestBufioDecReader(t *testing.T) {
 		`56789      01234'`,
 		`56789      01234'`,
 	} {
-		out = br.readUntil(nil, '\'')
+		out = br.readUntil('\'')
 		testDeepEqualErr(string(out), v2, t, "-")
 		// fmt.Printf("readUntil: out: `%s`\n", out)
 	}
-	br = &bufioDecReader{buf: make([]byte, 0, 7)}
-	br.r = strings.NewReader(s)
+	br.reset(strings.NewReader(s), bufsizehalf)
 	// println()
 	for range [4]struct{}{} {
-		out = br.readTo(nil, &jsonNumSet)
+		out = br.readTo(&jsonNumSet)
 		testDeepEqualErr(string(out), `01234`, t, "-")
 		// fmt.Printf("readTo: out: `%s`\n", out)
-		out = br.readUntil(nil, '\'')
+		out = br.readUntil('\'')
 		testDeepEqualErr(string(out), "'", t, "-")
 		// fmt.Printf("readUntil: out: `%s`\n", out)
-		out = br.readTo(nil, &jsonNumSet)
+		out = br.readTo(&jsonNumSet)
 		testDeepEqualErr(string(out), `56789`, t, "-")
 		// fmt.Printf("readTo: out: `%s`\n", out)
-		out = br.readUntil(nil, '0')
+		out = br.readUntil('0')
 		testDeepEqualErr(string(out), `      0`, t, "-")
 		// fmt.Printf("readUntil: out: `%s`\n", out)
 		br.unreadn1()
 	}
-	br = &bufioDecReader{buf: make([]byte, 0, 7)}
-	br.r = strings.NewReader(s)
+	br.reset(strings.NewReader(s), bufsizehalf)
 	// println()
 	for range [4]struct{}{} {
-		out = br.readUntil(nil, ' ')
+		out = br.readUntil(' ')
 		testDeepEqualErr(string(out), `01234'56789 `, t, "-")
 		// fmt.Printf("readUntil: out: |%s|\n", out)
 		token = br.skip(&jsonCharWhitespaceSet)
@@ -3081,6 +3087,13 @@ func TestBufioDecReader(t *testing.T) {
 	// println()
 }
 
+func TestBufioDecReader(t *testing.T) {
+	doTestBufioDecReader(t, 13)
+	doTestBufioDecReader(t, 3)
+	doTestBufioDecReader(t, 5)
+	doTestBufioDecReader(t, 127)
+}
+
 func TestAtomic(t *testing.T) {
 	testOnce.Do(testInitAll)
 	// load, store, load, confirm
@@ -3408,7 +3421,7 @@ func TestJsonStdEncIntf(t *testing.T) {
 	doTestStdEncIntf(t, testJsonH)
 }
 
-func TestJsonMammoth(t *testing.T) {
+func TestJsonMammothA(t *testing.T) {
 	testMammoth(t, testJsonH)
 }
 

+ 36 - 27
codec/decode.go

@@ -36,9 +36,11 @@ const (
 	// when a 'nil' was encountered in the stream.
 	decContainerLenNil = math.MinInt32
 
-	// decFailNonEmptyIntf configures whether we error when decoding naked into a non-empty interface.
+	// decFailNonEmptyIntf configures whether we error
+	// when decoding naked into a non-empty interface.
 	//
-	// Typically, we cannot decode non-nil stream value into nil interface with methods (e.g. io.Reader).
+	// Typically, we cannot decode non-nil stream value into
+	// nil interface with methods (e.g. io.Reader).
 	// However, in some scenarios, this should be allowed:
 	//   - MapType
 	//   - SliceType
@@ -782,9 +784,9 @@ func (d *Decoder) kSlice(f *codecFnInfo, rv reflect.Value) {
 
 	var fn *codecFn
 
-	var rvCanset = rv.CanSet()
-	var rvChanged bool
 	var rv0 = rv
+	var rvChanged bool
+	var rvCanset = rv.CanSet()
 	var rv9 reflect.Value
 
 	rvlen := rvGetSliceLen(rv)
@@ -1362,9 +1364,15 @@ type Decoder struct {
 	// ---- writable fields during execution --- *try* to keep in sep cache line
 	maxdepth int16
 	depth    int16
-	c        containerState
-	_        [3]byte                      // padding
-	b        [decScratchByteArrayLen]byte // scratch buffer, used by Decoder and xxxDecDrivers
+
+	// Extensions can call Decode() within a current Decode() call.
+	// We need to know when the top level Decode() call returns,
+	// so we can decide whether to Release() or not.
+	calls uint16 // what depth in mustDecode are we in now.
+
+	c containerState
+	_ [1]byte                      // padding
+	b [decScratchByteArrayLen]byte // scratch buffer, used by Decoder and xxxDecDrivers
 
 	// padding - false sharing help // modify 232 if Decoder struct changes.
 	// _ [cacheLineSize - 232%cacheLineSize]byte
@@ -1427,6 +1435,7 @@ func (d *Decoder) resetCommon() {
 	d.d.reset()
 	d.err = nil
 	d.depth = 0
+	d.calls = 0
 	d.maxdepth = d.h.MaxDepth
 	if d.maxdepth <= 0 {
 		d.maxdepth = decDefMaxDepth
@@ -1595,22 +1604,22 @@ func (d *Decoder) mustDecode(v interface{}) {
 	// xdebug2f(".... mustDecode: v: %#v", v)
 	// TODO: Top-level: ensure that v is a pointer and not nil.
 
-	if d.bi == nil {
-		// if d.d.TryDecodeAsNil() {
-		// 	setZero(v)
-		// } else {
-		// 	d.decode(v)
-		// }
-		d.decode(v)
-		d.d.atEndOfDecode()
-		// release
-		if !d.h.ExplicitRelease {
-			if d.jdec != nil {
-				d.jdec.release()
-			}
-		}
-		return
-	}
+	// if d.bi == nil {
+	// 	// if d.d.TryDecodeAsNil() {
+	// 	// 	setZero(v)
+	// 	// } else {
+	// 	// 	d.decode(v)
+	// 	// }
+	// 	d.decode(v)
+	// 	d.d.atEndOfDecode()
+	// 	// release
+	// 	if !d.h.ExplicitRelease {
+	// 		if d.jdec != nil {
+	// 			d.jdec.release()
+	// 		}
+	// 	}
+	// 	return
+	// }
 
 	// if d.d.TryDecodeAsNil() {
 	// 	setZero(v)
@@ -1619,14 +1628,14 @@ func (d *Decoder) mustDecode(v interface{}) {
 	// 	d.decode(v)
 	// 	d.bi.calls--
 	// }
-	d.bi.calls++
+	d.calls++
 	d.decode(v)
-	d.bi.calls--
-	if d.bi.calls == 0 {
+	d.calls--
+	if d.calls == 0 {
 		d.d.atEndOfDecode()
 		// release
 		if !d.h.ExplicitRelease {
-			d.bi.release()
+			d.decReaderSwitch.release()
 			if d.jdec != nil {
 				d.jdec.release()
 			}

+ 0 - 1
codec/gen-dec-array.go.tmpl

@@ -59,7 +59,6 @@ if {{var "l"}} == 0 {
         {{if isChan}}{{ $x := printf "%[1]vvcx%[2]v" .TempVar .Rand }}var {{$x}} {{ .Typ }}
 		{{ decLineVar $x -}}
 		{{var "v"}} <- {{ $x }}
-        // println(">>>> sending ", {{ $x }}, " into ", {{var "v"}}) // TODO: remove this
         {{else}}{{/* // if indefinite, etc, then expand the slice if necessary */ -}}
 		var {{var "db"}} bool
 		if {{var "j"}} >= len({{var "v"}}) {

+ 0 - 1
codec/gen.generated.go

@@ -125,7 +125,6 @@ if {{var "l"}} == 0 {
         {{if isChan}}{{ $x := printf "%[1]vvcx%[2]v" .TempVar .Rand }}var {{$x}} {{ .Typ }}
 		{{ decLineVar $x -}}
 		{{var "v"}} <- {{ $x }}
-        // println(">>>> sending ", {{ $x }}, " into ", {{var "v"}}) // TODO: remove this
         {{else}}{{/* // if indefinite, etc, then expand the slice if necessary */ -}}
 		var {{var "db"}} bool
 		if {{var "j"}} >= len({{var "v"}}) {

+ 171 - 41
codec/helper.go

@@ -1373,11 +1373,11 @@ type structFieldInfo struct {
 	// _ [1]byte // padding
 }
 
-func (si *structFieldInfo) setToZeroValue(v reflect.Value) {
-	if v, valid := si.field(v, false); valid {
-		v.Set(reflect.Zero(v.Type()))
-	}
-}
+// func (si *structFieldInfo) setToZeroValue(v reflect.Value) {
+// 	if v, valid := si.field(v, false); valid {
+// 		v.Set(reflect.Zero(v.Type()))
+// 	}
+// }
 
 // rv returns the field of the struct.
 // If anonymous, it returns an Invalid
@@ -2742,6 +2742,8 @@ func (must) Float(s float64, err error) float64 {
 
 // -------------------
 
+const bytesBufPoolerMaxSize = 32 * 1024
+
 type bytesBufPooler struct {
 	pool    *sync.Pool
 	poolbuf interface{}
@@ -2754,44 +2756,106 @@ func (z *bytesBufPooler) end() {
 	}
 }
 
+func (z *bytesBufPooler) capacity() (c int) {
+	switch z.pool {
+	case nil:
+	case &pool.buf256:
+		c = 256
+	case &pool.buf1k:
+		c = 1024
+	case &pool.buf2k:
+		c = 2 * 1024
+	case &pool.buf4k:
+		c = 4 * 1024
+	case &pool.buf8k:
+		c = 8 * 1024
+	case &pool.buf16k:
+		c = 16 * 1024
+	case &pool.buf32k:
+		c = 32 * 1024
+	}
+	return
+}
+
+// func (z *bytesBufPooler) ensureCap(newcap int, bs []byte) (bs2 []byte) {
+// 	if z.pool == nil {
+// 		bs2 = z.get(newcap)[:len(bs)]
+// 		copy(bs2, bs)
+// 		return
+// 	}
+// 	var bp2 bytesBufPooler
+// 	bs2 = bp2.get(newcap)[:len(bs)]
+// 	copy(bs2, bs)
+// 	z.end()
+// 	*z = bp2
+// 	return
+// }
+
+// func (z *bytesBufPooler) buf() (buf []byte) {
+// 	switch z.pool {
+// 	case nil:
+// 	case &pool.buf256:
+// 		buf = z.poolbuf.(*[256]byte)[:]
+// 	case &pool.buf1k:
+// 		buf = z.poolbuf.(*[1 * 1024]byte)[:]
+// 	case &pool.buf2k:
+// 		buf = z.poolbuf.(*[2 * 1024]byte)[:]
+// 	case &pool.buf4k:
+// 		buf = z.poolbuf.(*[4 * 1024]byte)[:]
+// 	case &pool.buf8k:
+// 		buf = z.poolbuf.(*[8 * 1024]byte)[:]
+// 	case &pool.buf16k:
+// 		buf = z.poolbuf.(*[16 * 1024]byte)[:]
+// 	case &pool.buf32k:
+// 		buf = z.poolbuf.(*[32 * 1024]byte)[:]
+// 	}
+// 	return
+// }
+
 func (z *bytesBufPooler) get(bufsize int) (buf []byte) {
-	if z.pool != nil {
-		switch z.pool {
-		case &pool.buf256:
-			if bufsize <= 256 {
-				buf = z.poolbuf.(*[256]byte)[:bufsize]
-			}
-		case &pool.buf1k:
-			if bufsize <= 1*1024 {
-				buf = z.poolbuf.(*[1 * 1024]byte)[:bufsize]
-			}
-		case &pool.buf2k:
-			if bufsize <= 2*1024 {
-				buf = z.poolbuf.(*[2 * 1024]byte)[:bufsize]
-			}
-		case &pool.buf4k:
-			if bufsize <= 4*1024 {
-				buf = z.poolbuf.(*[4 * 1024]byte)[:bufsize]
-			}
-		case &pool.buf8k:
-			if bufsize <= 8*1024 {
-				buf = z.poolbuf.(*[8 * 1024]byte)[:bufsize]
-			}
-		case &pool.buf16k:
-			if bufsize <= 16*1024 {
-				buf = z.poolbuf.(*[16 * 1024]byte)[:bufsize]
-			}
-		case &pool.buf32k:
-			if bufsize <= 32*1024 {
-				buf = z.poolbuf.(*[32 * 1024]byte)[:bufsize]
-			}
+	if bufsize > bytesBufPoolerMaxSize {
+		z.end()
+		return make([]byte, bufsize)
+	}
+
+	switch z.pool {
+	case nil:
+		goto NEW
+	case &pool.buf256:
+		if bufsize <= 256 {
+			buf = z.poolbuf.(*[256]byte)[:bufsize]
 		}
-		if buf != nil {
-			return
+	case &pool.buf1k:
+		if bufsize <= 1*1024 {
+			buf = z.poolbuf.(*[1 * 1024]byte)[:bufsize]
+		}
+	case &pool.buf2k:
+		if bufsize <= 2*1024 {
+			buf = z.poolbuf.(*[2 * 1024]byte)[:bufsize]
+		}
+	case &pool.buf4k:
+		if bufsize <= 4*1024 {
+			buf = z.poolbuf.(*[4 * 1024]byte)[:bufsize]
+		}
+	case &pool.buf8k:
+		if bufsize <= 8*1024 {
+			buf = z.poolbuf.(*[8 * 1024]byte)[:bufsize]
+		}
+	case &pool.buf16k:
+		if bufsize <= 16*1024 {
+			buf = z.poolbuf.(*[16 * 1024]byte)[:bufsize]
+		}
+	case &pool.buf32k:
+		if bufsize <= 32*1024 {
+			buf = z.poolbuf.(*[32 * 1024]byte)[:bufsize]
 		}
-		z.pool.Put(z.poolbuf)
-		z.pool, z.poolbuf = nil, nil
 	}
+	if buf != nil {
+		return
+	}
+	z.end()
+
+NEW:
 
 	// // Try to use binary search.
 	// // This is not optimal, as most folks select 1k or 2k buffers
@@ -2845,9 +2909,9 @@ func (z *bytesBufPooler) get(bufsize int) (buf []byte) {
 	} else if bufsize <= 16*1024 {
 		z.pool, z.poolbuf = &pool.buf16k, pool.buf16k.Get() // pool.bytes16k()
 		buf = z.poolbuf.(*[16 * 1024]byte)[:bufsize]
-	} else { // if bufsize <= 32*1024 {
+	} else if bufsize <= 32*1024 {
 		z.pool, z.poolbuf = &pool.buf32k, pool.buf32k.Get() // pool.bytes32k()
-		buf = z.poolbuf.(*[32 * 1024]byte)[:32*1024]
+		buf = z.poolbuf.(*[32 * 1024]byte)[:bufsize]
 		// } else {
 		// 	z.pool, z.poolbuf = &pool.buf64k, pool.buf64k.Get() // pool.bytes64k()
 		// 	buf = z.poolbuf.(*[64 * 1024]byte)[:]
@@ -2857,6 +2921,72 @@ func (z *bytesBufPooler) get(bufsize int) (buf []byte) {
 
 // ----------------
 
+type bytesBufPoolerPlus struct {
+	bytesBufPooler
+	buf []byte
+}
+
+func (z *bytesBufPoolerPlus) ensureExtraCap(num int) {
+	if cap(z.buf) < len(z.buf)+num {
+		z.ensureCap(len(z.buf) + num)
+	}
+}
+
+func (z *bytesBufPoolerPlus) ensureCap(newcap int) {
+	if cap(z.buf) >= newcap {
+		return
+	}
+	var bs2 []byte
+	if z.pool == nil {
+		bs2 = z.bytesBufPooler.get(newcap)[:len(z.buf)]
+		if z.buf == nil {
+			z.buf = bs2
+		} else {
+			copy(bs2, z.buf)
+			z.buf = bs2
+		}
+		return
+	}
+	var bp2 bytesBufPooler
+	if newcap > bytesBufPoolerMaxSize {
+		bs2 = make([]byte, newcap)
+	} else {
+		bs2 = bp2.get(newcap)
+	}
+	bs2 = bs2[:len(z.buf)]
+	copy(bs2, z.buf)
+	z.end()
+	z.buf = bs2
+	z.bytesBufPooler = bp2
+}
+
+func (z *bytesBufPoolerPlus) get(length int) {
+	z.buf = z.bytesBufPooler.get(length)
+}
+
+func (z *bytesBufPoolerPlus) append(b byte) {
+	z.ensureExtraCap(1)
+	z.buf = append(z.buf, b)
+}
+
+func (z *bytesBufPoolerPlus) appends(b []byte) {
+	z.ensureExtraCap(len(b))
+	z.buf = append(z.buf, b...)
+}
+
+func (z *bytesBufPoolerPlus) end() {
+	z.bytesBufPooler.end()
+	z.buf = nil
+}
+
+func (z *bytesBufPoolerPlus) resetBuf() {
+	if z.buf != nil {
+		z.buf = z.buf[:0]
+	}
+}
+
+// ----------------
+
 type sfiRvPooler struct {
 	pool  *sync.Pool
 	poolv interface{}

+ 1 - 1
codec/helper_unsafe.go

@@ -103,7 +103,7 @@ func isNil(v interface{}) (rv reflect.Value, isnil bool) {
 	tk := rv.Kind()
 	isnil = (tk == reflect.Interface || tk == reflect.Slice) && *(*unsafe.Pointer)(ui.word) == nil
 	return
-	// fmt.Printf(">>>> definitely nil: isnil: %v, TYPE: \t%T, word: %v, *word: %v, type: %v, nil: %v\n",
+	// fmt.Printf(">>>> isNil: isnil: %v, TYPE: %T, word: %v, *word: %v, type: %v, nil: %v\n",
 	// 	v == nil, v, word, *((*unsafe.Pointer)(word)), ui.typ, nil)
 }
 

+ 137 - 112
codec/json.go

@@ -606,16 +606,17 @@ type jsonDecDriver struct {
 	// b [jsonScratchArrayLen]byte // scratch 1, used for parsing strings or numbers or time.Time
 	// ---- cpu cache line boundary?
 	// ---- writable fields during execution --- *try* to keep in sep cache line
-	bs []byte // scratch - for parsing strings, bytes
+	// bs []byte // scratch - for parsing strings, bytes
+
+	bp bytesBufPoolerPlus
 	se interfaceExtWrapper
 
-	bp bytesBufPooler
+	// _ [4]uint64 // padding
 
 	// ---- cpu cache line boundary?
 
-	b2 [cacheLineSize + 32]byte // scratch 2, used only for readUntil, decNumBytes
+	// b2 [cacheLineSize + 32]byte // scratch 2, used only for readUntil, decNumBytes
 
-	// _ [3]uint64 // padding
 	// n jsonNum
 }
 
@@ -806,11 +807,13 @@ func (d *jsonDecDriver) DecodeBool() (v bool) {
 
 func (d *jsonDecDriver) DecodeTime() (t time.Time) {
 	// read string, and pass the string into json.unmarshal
-	d.appendStringAsBytes()
-	if d.fnil {
+	d.advance()
+	if d.tok == 'n' {
+		d.readLit4Null()
 		return
 	}
-	t, err := time.Parse(time.RFC3339, stringView(d.bs))
+	bs := d.readString()
+	t, err := time.Parse(time.RFC3339, stringView(bs))
 	if err != nil {
 		d.d.errorv(err)
 	}
@@ -842,14 +845,15 @@ func (d *jsonDecDriver) ContainerType() (vt valueType) {
 func (d *jsonDecDriver) decNumBytes() (bs []byte) {
 	d.advance()
 	if d.tok == '"' {
-		bs = d.r.readUntil(d.b2[:0], '"')
+		bs = d.r.readUntil('"')
 		bs = bs[:len(bs)-1]
 	} else if d.tok == 'n' {
 		d.readLit4Null()
 	} else {
 		d.r.unreadn1()
-		bs = d.r.readTo(d.b2[:0], &jsonNumSet)
+		bs = d.r.readTo(&jsonNumSet)
 	}
+	// xdebugf("decNumBytes: %s", bs)
 	d.tok = 0
 	return
 }
@@ -972,6 +976,27 @@ func (d *jsonDecDriver) DecodeExt(rv interface{}, xtag uint64, ext Ext) {
 	}
 }
 
+func (d *jsonDecDriver) decBytesFromArray(bs []byte) []byte {
+	if bs == nil {
+		bs = []byte{}
+	} else {
+		bs = bs[:0]
+	}
+	d.tok = 0
+	bs = append(bs, uint8(d.DecodeUint64()))
+	d.tok = d.r.skip(&jsonCharWhitespaceSet)
+	for d.tok != ']' {
+		if d.tok != ',' {
+			d.d.errorf("read array element - expect char '%c' but got char '%c'", ',', d.tok)
+		}
+		d.tok = 0
+		bs = append(bs, uint8(chkOvf.UintV(d.DecodeUint64(), 8)))
+		d.tok = d.r.skip(&jsonCharWhitespaceSet)
+	}
+	d.tok = 0
+	return bs
+}
+
 func (d *jsonDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 	// if decoding into raw bytes, and the RawBytesExt is configured, use it to decode.
 	if d.se.InterfaceExt != nil {
@@ -986,29 +1011,12 @@ func (d *jsonDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 		if zerocopy && len(bs) == 0 {
 			bs = d.d.b[:]
 		}
-		if bs == nil {
-			bs = []byte{}
-		} else {
-			bs = bs[:0]
-		}
-		d.tok = 0
-		bs = append(bs, uint8(d.DecodeUint64()))
-		d.tok = d.r.skip(&jsonCharWhitespaceSet)
-		for d.tok != ']' {
-			if d.tok != ',' {
-				d.d.errorf("read array element - expect char '%c' but got char '%c'", ',', d.tok)
-			}
-			d.tok = 0
-			bs = append(bs, uint8(chkOvf.UintV(d.DecodeUint64(), 8)))
-			d.tok = d.r.skip(&jsonCharWhitespaceSet)
-		}
-		d.tok = 0
-		return bs
+		return d.decBytesFromArray(bs)
 	}
 
 	// base64 encodes []byte{} as "", and we encode nil []byte as null.
 	// Consequently, base64 should decode null as a nil []byte, and "" as an empty []byte{}.
-	// appendStringAsBytes returns a zero-len slice for both, so as not to reset d.bs.
+	// appendStringAsBytes returns a zero-len slice for both, so as not to reset d.bp.buf.
 	// However, it sets a fnil field to true, so we can check if a null was found.
 
 	// d.appendStringAsBytes()
@@ -1021,32 +1029,21 @@ func (d *jsonDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 		return nil
 	}
 
-	if d.tok != '"' {
-		d.d.errorf("json bytes MUST be a base64 encoded string")
-		return
-	}
-
-	bs1 := d.r.readUntil(d.b2[:0], '"')
-	bs1 = bs1[:len(bs1)-1]
-	d.tok = 0
-
-	if len(bs1) == 0 {
-		return []byte{}
-	}
-
+	bs1 := d.readString()
 	slen := base64.StdEncoding.DecodedLen(len(bs1))
-	// TODO: what if slen == 0?
-	if slen <= cap(bs) {
+	if slen == 0 {
+		bsOut = []byte{}
+	} else if slen <= cap(bs) {
 		bsOut = bs[:slen]
 	} else if zerocopy {
-		// if d.bs == nil {
-		// 	d.bs = d.bp.get(slen)
+		// if d.bp.buf == nil {
+		// 	d.bp.buf = d.bp.get(slen)
 		// }
-		if slen <= cap(d.bs) {
-			bsOut = d.bs[:slen]
+		if slen <= cap(d.bp.buf) {
+			bsOut = d.bp.buf[:slen]
 		} else {
-			d.bs = d.bp.get(slen)
-			bsOut = d.bs
+			d.bp.get(slen)
+			bsOut = d.bp.buf
 			// bsOut = make([]byte, slen) // TODO: should i check pool? how to return it back?
 		}
 	} else {
@@ -1065,97 +1062,111 @@ func (d *jsonDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 
 // func (d *jsonDecDriver) DecodeString() (s string) {
 // 	d.appendStringAsBytes()
-// 	return d.bsToString()
+// 	return d.sliceToString()
 // }
 
 func (d *jsonDecDriver) DecodeStringAsBytes() (s []byte) {
 	// defer func() { xdebug2f("DecodeStringAsBytes: %s", s) }()
-	d.appendStringAsBytes()
-	if d.fnil {
-		return nil
-	}
-	return d.bs
-}
-
-func (d *jsonDecDriver) appendStringAsBytes() {
 	d.advance()
-	if d.bs == nil {
-		d.bs = d.bp.get(256)
-	}
-	// xdebug2f("appendStringAsBytes: found: '%c'", d.tok)
 	if d.tok != '"' {
 		// d.d.errorf("expect char '%c' but got char '%c'", '"', d.tok)
 		// handle non-string scalar: null, true, false or a number
 		switch d.tok {
 		case 'n':
 			d.readLit4Null()
-			d.bs = d.bs[:0]
+			return []byte{}
 		case 'f':
 			d.readLit4False()
-			d.bs = d.bs[:5]
-			copy(d.bs, "false")
+			return jsonLiteralFalse
 		case 't':
 			d.readLit4True()
-			d.bs = d.bs[:4]
-			copy(d.bs, "true")
-		default:
-			// try to parse a valid number
-			bs := d.decNumBytes()
-			if len(bs) <= cap(d.bs) {
-				d.bs = d.bs[:len(bs)]
-			} else {
-				d.bs = make([]byte, len(bs))
-			}
-			copy(d.bs, bs)
+			return jsonLiteralTrue
 		}
+		// try to parse a valid number
+		return d.decNumBytes()
+	}
+	s = d.appendStringAsBytes()
+	if d.fnil {
+		return nil
+	}
+	return
+}
+
+func (d *jsonDecDriver) readString() (bs []byte) {
+	if d.tok != '"' {
+		d.d.errorf("expecting string starting with '\"'; got '%c'", d.tok)
 		return
 	}
 
+	bs = d.r.readUntil('"')
+	bs = bs[:len(bs)-1]
 	d.tok = 0
-	r := d.r
-	var cs = r.readUntil(d.b2[:0], '"')
-	var cslen = uint(len(cs))
-	var c uint8
-	v := d.bs[:0]
+	return
+}
+
+func (d *jsonDecDriver) appendStringAsBytes() (bs []byte) {
+	// xdebug2f("appendStringAsBytes: found: '%c'", d.tok)
+
+	if d.bp.buf != nil {
+		d.bp.buf = d.bp.buf[:0]
+	}
+	d.tok = 0
+
+	// xdebug2f("start")
+	var cs = d.r.readUntil('"')
+	// xdebugf("appendStringAsBytes: len: %d, cs: %s", len(cs), cs)
+
 	// append on each byte seen can be expensive, so we just
 	// keep track of where we last read a contiguous set of
 	// non-special bytes (using cursor variable),
 	// and when we see a special byte
 	// e.g. end-of-slice, " or \,
 	// we will append the full range into the v slice before proceeding
+	var cslen = uint(len(cs))
+	var c uint8
 	var i, cursor uint
 	for {
 		if i == cslen {
-			v = append(v, cs[cursor:]...)
-			cs = r.readUntil(d.b2[:0], '"')
+			d.bp.appends(cs[cursor:])
+			// d.bp.ensureExtraCap(int(cslen - cursor))
+			// d.bp.buf = append(d.bp.buf, cs[cursor:]...)
+			cs = d.r.readUntil('"')
+			// xdebugf("appendStringAsBytes: len: %d, cs: %s", len(cs), cs)
 			cslen = uint(len(cs))
 			i, cursor = 0, 0
 		}
 		c = cs[i]
 		if c == '"' {
-			v = append(v, cs[cursor:i]...)
+			if len(d.bp.buf) > 0 {
+				d.bp.appends(cs[cursor:i])
+				// d.bp.ensureExtraCap(int(i - cursor))
+				// d.bp.buf = append(d.bp.buf, cs[cursor:i]...)
+			}
 			break
 		}
 		if c != '\\' {
 			i++
 			continue
 		}
-		v = append(v, cs[cursor:i]...)
+		d.bp.appends(cs[cursor:i])
+		// d.bp.ensureExtraCap(int(i - cursor))
+		// d.bp.buf = append(d.bp.buf, cs[cursor:i]...)
+		d.bp.ensureExtraCap(4) // NOTE: 1 is sufficient, but say 4 for now
 		i++
 		c = cs[i]
 		switch c {
 		case '"', '\\', '/', '\'':
-			v = append(v, c)
+			d.bp.buf = append(d.bp.buf, c)
 		case 'b':
-			v = append(v, '\b')
+			d.bp.buf = append(d.bp.buf, '\b')
 		case 'f':
-			v = append(v, '\f')
+			d.bp.buf = append(d.bp.buf, '\f')
 		case 'n':
-			v = append(v, '\n')
+			d.bp.buf = append(d.bp.buf, '\n')
 		case 'r':
-			v = append(v, '\r')
+			d.bp.buf = append(d.bp.buf, '\r')
 		case 't':
-			v = append(v, '\t')
+			d.bp.buf = append(d.bp.buf, '\t')
 		case 'u':
 			var r rune
 			var rr uint32
@@ -1209,14 +1220,30 @@ func (d *jsonDecDriver) appendStringAsBytes() {
 			}
 		encode_rune:
 			w2 := utf8.EncodeRune(d.bstr[:], r)
-			v = append(v, d.bstr[:w2]...)
+			d.bp.appends(d.bstr[:w2])
+			// d.bp.buf = append(d.bp.buf, d.bstr[:w2]...)
 		default:
 			d.d.errorf("unsupported escaped value: %c", c)
 		}
 		i++
 		cursor = i
 	}
-	d.bs = v
+	if len(d.bp.buf) == 0 {
+		// return cs[:len(cs)-1]
+		// returning cs was failing for bufio, as it seems bufio needs the buffer for other things.
+		// only return cs if bytesDecReader
+		cs = cs[:len(cs)-1]
+		if d.d.bytes {
+			return cs
+		}
+		d.bp.ensureExtraCap(len(cs))
+		d.bp.buf = d.bp.buf[:len(cs)]
+		copy(d.bp.buf, cs)
+		// xdebugf("cs: '%s', d.bp.buf: '%s'", cs, d.bp.buf)
+		return d.bp.buf
+	}
+	// xdebug2f("returning d.bp.buf: %s", d.bp.buf)
+	return d.bp.buf
 }
 
 func (d *jsonDecDriver) nakedNum(z *decNaked, bs []byte) (err error) {
@@ -1268,12 +1295,12 @@ F:
 	return
 }
 
-func (d *jsonDecDriver) bsToString() string {
+func (d *jsonDecDriver) sliceToString(bs []byte) string {
 	// if x := d.s.sc; x != nil && x.so && x.st == '}' { // map key
 	if d.d.is != nil && (jsonAlwaysReturnInternString || d.d.c == containerMapKey) {
-		return d.d.string(d.bs)
+		return d.d.string(bs)
 	}
-	return string(d.bs)
+	return string(bs)
 }
 
 func (d *jsonDecDriver) DecodeNaked() {
@@ -1281,6 +1308,7 @@ func (d *jsonDecDriver) DecodeNaked() {
 	// var decodeFurther bool
 
 	d.advance()
+	var bs []byte
 	switch d.tok {
 	case 'n':
 		d.readLit4Null()
@@ -1299,29 +1327,29 @@ func (d *jsonDecDriver) DecodeNaked() {
 		z.v = valueTypeArray // don't consume. kInterfaceNaked will call ReadArrayStart
 	case '"':
 		// if a string, and MapKeyAsString, then try to decode it as a nil, bool or number first
-		d.appendStringAsBytes()
-		if len(d.bs) > 0 && d.d.c == containerMapKey && d.h.MapKeyAsString {
-			if bytes.Equal(d.bs, jsonLiteralNull) {
+		bs = d.appendStringAsBytes()
+		if len(bs) > 0 && d.d.c == containerMapKey && d.h.MapKeyAsString {
+			if bytes.Equal(bs, jsonLiteralNull) {
 				z.v = valueTypeNil
-			} else if bytes.Equal(d.bs, jsonLiteralTrue) {
+			} else if bytes.Equal(bs, jsonLiteralTrue) {
 				z.v = valueTypeBool
 				z.b = true
-			} else if bytes.Equal(d.bs, jsonLiteralFalse) {
+			} else if bytes.Equal(bs, jsonLiteralFalse) {
 				z.v = valueTypeBool
 				z.b = false
 			} else {
 				// check if a number: float, int or uint
-				if err := d.nakedNum(z, d.bs); err != nil {
+				if err := d.nakedNum(z, bs); err != nil {
 					z.v = valueTypeString
-					z.s = d.bsToString()
+					z.s = d.sliceToString(bs)
 				}
 			}
 		} else {
 			z.v = valueTypeString
-			z.s = d.bsToString()
+			z.s = d.sliceToString(bs)
 		}
 	default: // number
-		bs := d.decNumBytes()
+		bs = d.decNumBytes()
 		if len(bs) == 0 {
 			d.d.errorf("decode number from empty string")
 			return
@@ -1446,7 +1474,6 @@ func (h *JsonHandle) newEncDriver(e *Encoder) (ee encDriver) {
 		hd = &v.jsonEncDriver
 	}
 	hd.e, hd.h = e, h
-	// hd.bs = hd.b[:0]
 	ee.reset()
 	return
 }
@@ -1454,7 +1481,6 @@ func (h *JsonHandle) newEncDriver(e *Encoder) (ee encDriver) {
 func (h *JsonHandle) newDecDriver(d *Decoder) decDriver {
 	// d := jsonDecDriver{r: r.(*bytesDecReader), h: h}
 	hd := jsonDecDriver{d: d, h: h}
-	// hd.bs = hd.b[:0]
 	hd.reset()
 	return &hd
 }
@@ -1478,19 +1504,18 @@ func (e *jsonEncDriver) reset() {
 func (d *jsonDecDriver) reset() {
 	d.r = d.d.r()
 	d.se.InterfaceExt = d.h.RawBytesExt
-	if d.bs != nil {
-		d.bs = d.bs[:0]
+	if d.bp.buf != nil {
+		d.bp.buf = d.bp.buf[:0]
 	}
 	d.tok = 0
 	d.fnil = false
-	// d.n.reset()
 }
 
 func (d *jsonDecDriver) atEndOfDecode() {}
 
 func (d *jsonDecDriver) release() {
-	if d.bs != nil {
-		d.bs = nil
+	l := d.bp.capacity()
+	if l > 0 {
 		d.bp.end()
 	}
 }

+ 220 - 193
codec/reader.go

@@ -26,9 +26,9 @@ type decReader interface {
 	// skip will skip any byte that matches, and return the first non-matching byte
 	skip(accept *bitset256) (token byte)
 	// readTo will read any byte that matches, stopping once no-longer matching.
-	readTo(in []byte, accept *bitset256) (out []byte)
+	readTo(accept *bitset256) (out []byte)
 	// readUntil will read, only stopping once it matches the 'stop' byte.
-	readUntil(in []byte, stop byte) (out []byte)
+	readUntil(stop byte) (out []byte)
 }
 
 */
@@ -45,6 +45,24 @@ const (
 	unreadByteCanUnread
 )
 
+// func appendPool(bs []byte, b byte, bufp *bytesBufPooler) []byte {
+// 	if cap(bs)-len(bs) < 1 {
+// 		bs = bufp.ensureCap(len(bs)+1, bs)
+// 	}
+// 	bs = append(bs, b)
+// 	return bs
+// }
+
+// func appendPoolMany(bs []byte, b []byte, bufp *bytesBufPooler) []byte {
+// 	if cap(bs)-len(bs) < 1 {
+// 		bs = bufp.ensureCap(len(bs)+1, bs)
+// 	}
+// 	bs = append(bs, b...)
+// 	return bs
+// }
+
+// --------------------
+
 type ioDecReaderCommon struct {
 	r io.Reader // the reader passed in
 
@@ -56,7 +74,8 @@ type ioDecReaderCommon struct {
 	_   bool
 	b   [4]byte // tiny buffer for reading single bytes
 
-	tr []byte // tracking bytes read
+	tr   bytesBufPoolerPlus // buffer for tracking bytes
+	bufr bytesBufPoolerPlus // buffer for readTo/readUntil
 }
 
 func (z *ioDecReaderCommon) reset(r io.Reader) {
@@ -64,9 +83,7 @@ func (z *ioDecReaderCommon) reset(r io.Reader) {
 	z.ls = unreadByteUndefined
 	z.l, z.n = 0, 0
 	z.trb = false
-	if z.tr != nil {
-		z.tr = z.tr[:0]
-	}
+	z.tr.end()
 }
 
 func (z *ioDecReaderCommon) numread() uint {
@@ -74,15 +91,24 @@ func (z *ioDecReaderCommon) numread() uint {
 }
 
 func (z *ioDecReaderCommon) track() {
-	if z.tr != nil {
-		z.tr = z.tr[:0]
-	}
+	z.tr.ensureCap(1)
+	z.tr.buf = z.tr.buf[:0]
 	z.trb = true
 }
 
 func (z *ioDecReaderCommon) stopTrack() (bs []byte) {
 	z.trb = false
-	return z.tr
+	return z.tr.buf
+}
+
+func (z *ioDecReaderCommon) resetBufr() {
+	z.bufr.ensureCap(1)
+	z.bufr.buf = z.bufr.buf[:0]
+}
+
+func (z *ioDecReaderCommon) release() {
+	z.tr.end()
+	z.bufr.end()
 }
 
 // ------------------------------------------
@@ -96,7 +122,7 @@ type ioDecReader struct {
 	rr io.Reader
 	br io.ByteScanner
 
-	x [scratchByteArrayLen + 8]byte // for: get struct field name, swallow valueTypeBytes, etc
+	x [scratchByteArrayLen]byte // for: get struct field name, swallow valueTypeBytes, etc
 	// _ [1]uint64                 // padding
 }
 
@@ -180,7 +206,7 @@ func (z *ioDecReader) readx(n uint) (bs []byte) {
 	}
 	z.n += uint(len(bs))
 	if z.trb {
-		z.tr = append(z.tr, bs...)
+		z.tr.appends(bs)
 	}
 	return
 }
@@ -194,7 +220,7 @@ func (z *ioDecReader) readb(bs []byte) {
 	}
 	z.n += uint(len(bs))
 	if z.trb {
-		z.tr = append(z.tr, bs...)
+		z.tr.appends(bs)
 	}
 }
 
@@ -203,7 +229,7 @@ func (z *ioDecReader) readn1eof() (b uint8, eof bool) {
 	if err == nil {
 		z.n++
 		if z.trb {
-			z.tr = append(z.tr, b)
+			z.tr.append(b)
 		}
 	} else if err == io.EOF {
 		eof = true
@@ -218,7 +244,7 @@ func (z *ioDecReader) readn1() (b uint8) {
 	if err == nil {
 		z.n++
 		if z.trb {
-			z.tr = append(z.tr, b)
+			z.tr.append(b)
 		}
 		return
 	}
@@ -248,7 +274,7 @@ LOOP:
 	return
 }
 
-func (z *ioDecReader) readTo(in []byte, accept *bitset256) []byte {
+func (z *ioDecReader) readTo(accept *bitset256) []byte {
 	// out = in
 
 	// for {
@@ -263,22 +289,22 @@ func (z *ioDecReader) readTo(in []byte, accept *bitset256) []byte {
 	// 		return
 	// 	}
 	// }
+	z.resetBufr()
 LOOP:
 	token, eof := z.readn1eof()
 	if eof {
-		return in
+		return z.bufr.buf
 	}
 	if accept.isset(token) {
 		// out = append(out, token)
-		in = append(in, token)
+		z.bufr.append(token)
 		goto LOOP
 	}
 	z.unreadn1()
-	return in
+	return z.bufr.buf
 }
 
-func (z *ioDecReader) readUntil(in []byte, stop byte) (out []byte) {
-	out = in
+func (z *ioDecReader) readUntil(stop byte) []byte {
 	// for {
 	// 	token, eof := z.readn1eof()
 	// 	if eof {
@@ -289,14 +315,15 @@ func (z *ioDecReader) readUntil(in []byte, stop byte) (out []byte) {
 	// 		return
 	// 	}
 	// }
+	z.resetBufr()
 LOOP:
 	token, eof := z.readn1eof()
 	if eof {
 		panic(io.EOF)
 	}
-	out = append(out, token)
+	z.bufr.append(token)
 	if token == stop {
-		return
+		return z.bufr.buf
 	}
 	goto LOOP
 }
@@ -309,8 +336,8 @@ func (z *ioDecReader) unreadn1() {
 	}
 	z.n--
 	if z.trb {
-		if l := len(z.tr) - 1; l >= 0 {
-			z.tr = z.tr[:l]
+		if l := len(z.tr.buf) - 1; l >= 0 {
+			z.tr.buf = z.tr.buf[:l]
 		}
 	}
 }
@@ -319,88 +346,79 @@ func (z *ioDecReader) unreadn1() {
 
 type bufioDecReader struct {
 	ioDecReaderCommon
-	_ uint64 // padding (cache-aligned)
+	// _ uint64 // padding (cache-aligned)
 
-	c   uint // cursor
-	buf []byte
+	c uint // cursor
+	// buf []byte
 
-	bytesBufPooler
+	bp bytesBufPoolerPlus
 
 	// err error
-
-	// Extensions can call Decode() within a current Decode() call.
-	// We need to know when the top level Decode() call returns,
-	// so we can decide whether to Release() or not.
-	calls uint16 // what depth in mustDecode are we in now.
-
-	_ [6]uint8 // padding
 }
 
 func (z *bufioDecReader) reset(r io.Reader, bufsize int) {
 	z.ioDecReaderCommon.reset(r)
 	z.c = 0
-	z.calls = 0
-	if cap(z.buf) >= bufsize {
-		z.buf = z.buf[:0]
-	} else {
-		z.buf = z.bytesBufPooler.get(bufsize)[:0]
+	if cap(z.bp.buf) < bufsize {
+		z.bp.get(bufsize)
 		// z.buf = make([]byte, 0, bufsize)
 	}
+	z.bp.buf = z.bp.buf[:0]
 }
 
 func (z *bufioDecReader) release() {
-	z.buf = nil
-	z.bytesBufPooler.end()
+	z.ioDecReaderCommon.release()
+	z.bp.end()
 }
 
 func (z *bufioDecReader) readb(p []byte) {
-	var n = uint(copy(p, z.buf[z.c:]))
+	var n = uint(copy(p, z.bp.buf[z.c:]))
 	z.n += n
 	z.c += n
 	if len(p) == int(n) {
 		if z.trb {
-			z.tr = append(z.tr, p...) // cost=9
+			z.tr.appends(p)
 		}
 	} else {
 		z.readbFill(p, n)
 	}
 }
 
-//go:noinline - fallback when z.buf is consumed
+//go:noinline - fallback when z.bp.buf is consumed
 func (z *bufioDecReader) readbFill(p0 []byte, n uint) {
-	// at this point, there's nothing in z.buf to read (z.buf is fully consumed)
+	// at this point, there's nothing in z.bp.buf to read (z.bp.buf is fully consumed)
 	p := p0[n:]
 	var n2 uint
 	var err error
-	if len(p) > cap(z.buf) {
+	if len(p) > cap(z.bp.buf) {
 		n2, err = decReadFull(z.r, p)
 		if err != nil {
 			panic(err)
 		}
 		n += n2
 		z.n += n2
-		// always keep last byte in z.buf
-		z.buf = z.buf[:1]
-		z.buf[0] = p[len(p)-1]
+		// always keep last byte in z.bp.buf
+		z.bp.buf = z.bp.buf[:1]
+		z.bp.buf[0] = p[len(p)-1]
 		z.c = 1
 		if z.trb {
-			z.tr = append(z.tr, p0[:n]...)
+			z.tr.appends(p0[:n])
 		}
 		return
 	}
-	// z.c is now 0, and len(p) <= cap(z.buf)
+	// z.c is now 0, and len(p) <= cap(z.bp.buf)
 LOOP:
 	// for len(p) > 0 && z.err == nil {
 	if len(p) > 0 {
-		z.buf = z.buf[0:cap(z.buf)]
+		z.bp.buf = z.bp.buf[0:cap(z.bp.buf)]
 		var n1 int
-		n1, err = z.r.Read(z.buf)
+		n1, err = z.r.Read(z.bp.buf)
 		n2 = uint(n1)
 		if n2 == 0 && err != nil {
 			panic(err)
 		}
-		z.buf = z.buf[:n2]
-		n2 = uint(copy(p, z.buf))
+		z.bp.buf = z.bp.buf[:n2]
+		n2 = uint(copy(p, z.bp.buf))
 		z.c = n2
 		n += n2
 		z.n += n2
@@ -408,25 +426,25 @@ LOOP:
 		goto LOOP
 	}
 	if z.c == 0 {
-		z.buf = z.buf[:1]
-		z.buf[0] = p[len(p)-1]
+		z.bp.buf = z.bp.buf[:1]
+		z.bp.buf[0] = p[len(p)-1]
 		z.c = 1
 	}
 	if z.trb {
-		z.tr = append(z.tr, p0[:n]...)
+		z.tr.appends(p0[:n])
 	}
 }
 
 func (z *bufioDecReader) readn1() (b byte) {
 	// fast-path, so we elide calling into Read() most of the time
-	if z.c < uint(len(z.buf)) {
-		b = z.buf[z.c]
+	if z.c < uint(len(z.bp.buf)) {
+		b = z.bp.buf[z.c]
 		z.c++
 		z.n++
 		if z.trb {
-			z.tr = append(z.tr, b)
+			z.tr.append(b)
 		}
-	} else { // meaning z.c == len(z.buf) or greater ... so need to fill
+	} else { // meaning z.c == len(z.bp.buf) or greater ... so need to fill
 		z.readbFill(z.b[:1], 0)
 		b = z.b[0]
 	}
@@ -440,24 +458,24 @@ func (z *bufioDecReader) unreadn1() {
 	z.c--
 	z.n--
 	if z.trb {
-		z.tr = z.tr[:len(z.tr)-1]
+		z.tr.buf = z.tr.buf[:len(z.tr.buf)-1]
 	}
 }
 
 func (z *bufioDecReader) readx(n uint) (bs []byte) {
 	if n == 0 {
 		// return
-	} else if z.c+n <= uint(len(z.buf)) {
-		bs = z.buf[z.c : z.c+n]
+	} else if z.c+n <= uint(len(z.bp.buf)) {
+		bs = z.bp.buf[z.c : z.c+n]
 		z.n += n
 		z.c += n
 		if z.trb {
-			z.tr = append(z.tr, bs...)
+			z.tr.appends(bs)
 		}
 	} else {
 		bs = make([]byte, n)
 		// n no longer used - can reuse
-		n = uint(copy(bs, z.buf[z.c:]))
+		n = uint(copy(bs, z.bp.buf[z.c:]))
 		z.n += n
 		z.c += n
 		z.readbFill(bs, n)
@@ -465,25 +483,25 @@ func (z *bufioDecReader) readx(n uint) (bs []byte) {
 	return
 }
 
-func (z *bufioDecReader) doTrack(y uint) {
-	z.tr = append(z.tr, z.buf[z.c:y]...) // cost=14???
-}
+// func (z *bufioDecReader) doTrack(y uint) {
+// 	z.tr = append(z.tr, z.bp.buf[z.c:y]...) // cost=14???
+// }
 
-func (z *bufioDecReader) skipLoopFn(i uint) {
-	z.n += (i - z.c) - 1
-	i++
-	if z.trb {
-		// z.tr = append(z.tr, z.buf[z.c:i]...)
-		z.doTrack(i)
-	}
-	z.c = i
-}
+// func (z *bufioDecReader) skipLoopFn(i uint) {
+// 	z.n += (i - z.c) - 1
+// 	i++
+// 	if z.trb {
+// 		// z.tr = append(z.tr, z.bp.buf[z.c:i]...)
+// 		z.doTrack(i)
+// 	}
+// 	z.c = i
+// }
 
 func (z *bufioDecReader) skip(accept *bitset256) (token byte) {
 	// token, _ = z.search(nil, accept, 0, 1); return
 
-	// for i := z.c; i < len(z.buf); i++ {
-	// 	if token = z.buf[i]; !accept.isset(token) {
+	// for i := z.c; i < len(z.bp.buf); i++ {
+	// 	if token = z.bp.buf[i]; !accept.isset(token) {
 	// 		z.skipLoopFn(i)
 	// 		return
 	// 	}
@@ -491,16 +509,16 @@ func (z *bufioDecReader) skip(accept *bitset256) (token byte) {
 
 	i := z.c
 LOOP:
-	if i < uint(len(z.buf)) {
+	if i < uint(len(z.bp.buf)) {
 		// inline z.skipLoopFn(i) and refactor, so cost is within inline budget
-		token = z.buf[i]
+		token = z.bp.buf[i]
 		i++
 		if accept.isset(token) {
 			goto LOOP
 		}
 		z.n += i - 2 - z.c
 		if z.trb {
-			z.doTrack(i)
+			z.tr.appends(z.bp.buf[z.c:i]) // z.doTrack(i)
 		}
 		z.c = i
 		return
@@ -509,70 +527,80 @@ LOOP:
 }
 
 func (z *bufioDecReader) skipFill(accept *bitset256) (token byte) {
-	z.n += uint(len(z.buf)) - z.c
+	// defer func() { xdebugf("skipFill '%c'", token) }()
+	z.n += uint(len(z.bp.buf)) - z.c
 	if z.trb {
-		z.tr = append(z.tr, z.buf[z.c:]...)
+		z.tr.appends(z.bp.buf[z.c:])
 	}
-	var n2 int
+	var i, n2 int
 	var err error
 	for {
 		z.c = 0
-		z.buf = z.buf[0:cap(z.buf)]
-		n2, err = z.r.Read(z.buf)
+		z.bp.buf = z.bp.buf[0:cap(z.bp.buf)]
+		n2, err = z.r.Read(z.bp.buf)
 		if n2 == 0 && err != nil {
 			panic(err)
 		}
-		z.buf = z.buf[:n2]
-		var i int
-		for i, token = range z.buf {
+		z.bp.buf = z.bp.buf[:n2]
+		for i, token = range z.bp.buf {
 			if !accept.isset(token) {
-				z.skipLoopFn(uint(i))
+				z.n += (uint(i) - z.c) - 1
+				z.loopFn(uint(i + 1))
 				return
 			}
 		}
 		// for i := 0; i < n2; i++ {
-		// 	if token = z.buf[i]; !accept.isset(token) {
+		// 	if token = z.bp.buf[i]; !accept.isset(token) {
 		// 		z.skipLoopFn(i)
 		// 		return
 		// 	}
 		// }
 		z.n += uint(n2)
 		if z.trb {
-			z.tr = append(z.tr, z.buf...)
+			z.tr.appends(z.bp.buf)
 		}
 	}
 }
 
-func (z *bufioDecReader) readToLoopFn(i uint, out0 []byte) (out []byte) {
-	// out0 is never nil
-	z.n += (i - z.c) - 1
-	out = append(out0, z.buf[z.c:i]...)
+// func (z *bufioDecReader) readLoopFn(i uint, out0 []byte) (out []byte) {
+// 	out = appendPool(out0, z.bp.buf[z.c:i]...)
+// 	z.loopFn(i)
+// 	return
+// }
+
+func (z *bufioDecReader) loopFn(i uint) {
 	if z.trb {
-		z.doTrack(i)
+		z.tr.appends(z.bp.buf[z.c:i]) // z.doTrack(i)
 	}
 	z.c = i
-	return
 }
 
-func (z *bufioDecReader) readTo(in []byte, accept *bitset256) (out []byte) {
+// func (z *bufioDecReader) readToLoopFn(i uint, out0 []byte) (out []byte) {
+// 	// out0 is never nil
+// 	z.n += (i - z.c) - 1
+// 	return z.readLoopFn(i, out0)
+// }
+
+func (z *bufioDecReader) readTo(accept *bitset256) (out []byte) {
+	// defer func() { xdebug2f("bufio: readTo: %s", out) }()
 	// _, out = z.search(in, accept, 0, 2); return
 
-	// for i := z.c; i < len(z.buf); i++ {
-	// 	if !accept.isset(z.buf[i]) {
+	// for i := z.c; i < len(z.bp.buf); i++ {
+	// 	if !accept.isset(z.bp.buf[i]) {
 	// 		return z.readToLoopFn(i, nil)
 	// 	}
 	// }
 
 	i := z.c
 LOOP:
-	if i < uint(len(z.buf)) {
-		if !accept.isset(z.buf[i]) {
+	if i < uint(len(z.bp.buf)) {
+		if !accept.isset(z.bp.buf[i]) {
 			// return z.readToLoopFn(i, nil)
 			// inline readToLoopFn here (for performance)
 			z.n += (i - z.c) - 1
-			out = z.buf[z.c:i]
+			out = z.bp.buf[z.c:i]
 			if z.trb {
-				z.doTrack(i)
+				z.tr.appends(z.bp.buf[z.c:i]) // z.doTrack(i)
 			}
 			z.c = i
 			return
@@ -580,78 +608,76 @@ LOOP:
 		i++
 		goto LOOP
 	}
-	return z.readToFill(in, accept)
+	return z.readToFill(accept)
 }
 
-func (z *bufioDecReader) readToFill(in []byte, accept *bitset256) (out []byte) {
-	z.n += uint(len(z.buf)) - z.c
-	out = append(in, z.buf[z.c:]...)
+func (z *bufioDecReader) readToFill(accept *bitset256) []byte {
+	z.resetBufr()
+	z.n += uint(len(z.bp.buf)) - z.c
+	z.bufr.appends(z.bp.buf[z.c:])
 	if z.trb {
-		z.tr = append(z.tr, z.buf[z.c:]...)
+		z.tr.appends(z.bp.buf[z.c:])
 	}
 	var n2 int
 	var err error
 	for {
 		z.c = 0
-		z.buf = z.buf[0:cap(z.buf)]
-		n2, err = z.r.Read(z.buf)
+		z.bp.buf = z.bp.buf[:cap(z.bp.buf)]
+		n2, err = z.r.Read(z.bp.buf)
 		if n2 == 0 && err != nil {
 			if err == io.EOF {
-				return // readTo should read until it matches or end is reached
+				return z.bufr.buf // readTo should read until it matches or end is reached
 			}
 			panic(err)
 		}
-		z.buf = z.buf[:n2]
-		for i, token := range z.buf {
+		z.bp.buf = z.bp.buf[:n2]
+		for i, token := range z.bp.buf {
 			if !accept.isset(token) {
-				return z.readToLoopFn(uint(i), out)
+				z.n += (uint(i) - z.c) - 1
+				z.bufr.appends(z.bp.buf[z.c:i])
+				z.loopFn(uint(i))
+				return z.bufr.buf
 			}
 		}
 		// for i := 0; i < n2; i++ {
-		// 	if !accept.isset(z.buf[i]) {
+		// 	if !accept.isset(z.bp.buf[i]) {
 		// 		return z.readToLoopFn(i, out)
 		// 	}
 		// }
-		out = append(out, z.buf...)
+		z.bufr.appends(z.bp.buf)
 		z.n += uint(n2)
 		if z.trb {
-			z.tr = append(z.tr, z.buf...)
+			z.tr.appends(z.bp.buf)
 		}
 	}
 }
 
-func (z *bufioDecReader) readUntilLoopFn(i uint, out0 []byte) (out []byte) {
-	z.n += (i - z.c) - 1
-	i++
-	out = append(out0, z.buf[z.c:i]...)
-	if z.trb {
-		// z.tr = append(z.tr, z.buf[z.c:i]...)
-		z.doTrack(i)
-	}
-	z.c = i
-	return
-}
+// func (z *bufioDecReader) readUntilLoopFn(i uint, out0 []byte) (out []byte) {
+// 	z.n += (i - z.c) - 1
+// 	return z.readLoopFn(i+1, out0)
+// }
 
-func (z *bufioDecReader) readUntil(in []byte, stop byte) (out []byte) {
+func (z *bufioDecReader) readUntil(stop byte) (out []byte) {
+	// defer func() { xdebug2f("bufio: readUntil: %s", out) }()
 	// _, out = z.search(in, nil, stop, 4); return
 
-	// for i := z.c; i < len(z.buf); i++ {
-	// 	if z.buf[i] == stop {
+	// for i := z.c; i < len(z.bp.buf); i++ {
+	// 	if z.bp.buf[i] == stop {
 	// 		return z.readUntilLoopFn(i, nil)
 	// 	}
 	// }
 
 	i := z.c
 LOOP:
-	if i < uint(len(z.buf)) {
-		if z.buf[i] == stop {
+	if i < uint(len(z.bp.buf)) {
+		if z.bp.buf[i] == stop {
 			// inline readUntilLoopFn
 			// return z.readUntilLoopFn(i, nil)
 			z.n += (i - z.c) - 1
 			i++
-			out = z.buf[z.c:i]
+			out = z.bp.buf[z.c:i]
 			if z.trb {
-				z.doTrack(i)
+				z.tr.appends(z.bp.buf[z.c:i]) // z.doTrack(i)
 			}
 			z.c = i
 			return
@@ -659,41 +685,42 @@ LOOP:
 		i++
 		goto LOOP
 	}
-	return z.readUntilFill(in, stop)
+	return z.readUntilFill(stop)
 }
 
-func (z *bufioDecReader) readUntilFill(in []byte, stop byte) (out []byte) {
-	z.n += uint(len(z.buf)) - z.c
-	out = append(in, z.buf[z.c:]...)
+func (z *bufioDecReader) readUntilFill(stop byte) []byte {
+	z.resetBufr()
+	z.n += uint(len(z.bp.buf)) - z.c
+	z.bufr.appends(z.bp.buf[z.c:])
 	if z.trb {
-		z.tr = append(z.tr, z.buf[z.c:]...)
+		z.tr.appends(z.bp.buf[z.c:])
 	}
-	var n1 int
-	var n2 uint
-	var err error
 	for {
 		z.c = 0
-		z.buf = z.buf[0:cap(z.buf)]
-		n1, err = z.r.Read(z.buf)
-		n2 = uint(n1)
-		if n2 == 0 && err != nil {
+		z.bp.buf = z.bp.buf[0:cap(z.bp.buf)]
+		n1, err := z.r.Read(z.bp.buf)
+		if n1 == 0 && err != nil {
 			panic(err)
 		}
-		z.buf = z.buf[:n2]
-		for i, token := range z.buf {
+		n2 := uint(n1)
+		z.bp.buf = z.bp.buf[:n2]
+		for i, token := range z.bp.buf {
 			if token == stop {
-				return z.readUntilLoopFn(uint(i), out)
+				z.n += (uint(i) - z.c) - 1
+				z.bufr.appends(z.bp.buf[z.c : i+1])
+				z.loopFn(uint(i + 1))
+				return z.bufr.buf
 			}
 		}
 		// for i := 0; i < n2; i++ {
-		// 	if z.buf[i] == stop {
+		// 	if z.bp.buf[i] == stop {
 		// 		return z.readUntilLoopFn(i, out)
 		// 	}
 		// }
-		out = append(out, z.buf...)
+		z.bufr.appends(z.bp.buf)
 		z.n += n2
 		if z.trb {
-			z.tr = append(z.tr, z.buf...)
+			z.tr.appends(z.bp.buf)
 		}
 	}
 }
@@ -841,11 +868,7 @@ LOOP:
 	// return
 }
 
-func (z *bytesDecReader) readTo(_ []byte, accept *bitset256) (out []byte) {
-	return z.readToNoInput(accept)
-}
-
-func (z *bytesDecReader) readToNoInput(accept *bitset256) (out []byte) {
+func (z *bytesDecReader) readTo(accept *bitset256) (out []byte) {
 	i := z.c
 	if i == uint(len(z.b)) {
 		panic(io.EOF)
@@ -899,11 +922,7 @@ LOOP:
 	// return z.b[i:z.c]
 }
 
-func (z *bytesDecReader) readUntil(_ []byte, stop byte) (out []byte) {
-	return z.readUntilNoInput(stop)
-}
-
-func (z *bytesDecReader) readUntilNoInput(stop byte) (out []byte) {
+func (z *bytesDecReader) readUntil(stop byte) (out []byte) {
 	i := z.c
 	// if i == len(z.b) {
 	// 	panic(io.EOF)
@@ -1047,24 +1066,24 @@ func (z *decReaderSwitch) skip(accept *bitset256) (token byte) {
 		return z.bi.skip(accept)
 	}
 }
-func (z *decReaderSwitch) readTo(in []byte, accept *bitset256) (out []byte) {
+func (z *decReaderSwitch) readTo(accept *bitset256) (out []byte) {
 	switch z.typ {
 	case entryTypeBytes:
-		return z.rb.readTo(in, accept)
+		return z.rb.readTo(accept)
 	case entryTypeIo:
-		return z.ri.readTo(in, accept)
+		return z.ri.readTo(accept)
 	default:
-		return z.bi.readTo(in, accept)
+		return z.bi.readTo(accept)
 	}
 }
-func (z *decReaderSwitch) readUntil(in []byte, stop byte) (out []byte) {
+func (z *decReaderSwitch) readUntil(stop byte) (out []byte) {
 	switch z.typ {
 	case entryTypeBytes:
-		return z.rb.readUntil(in, stop)
+		return z.rb.readUntil(stop)
 	case entryTypeIo:
-		return z.ri.readUntil(in, stop)
+		return z.ri.readUntil(stop)
 	default:
-		return z.bi.readUntil(in, stop)
+		return z.bi.readUntil(stop)
 	}
 }
 
@@ -1079,6 +1098,14 @@ func (z *decReaderSwitch) readUntil(in []byte, stop byte) (out []byte) {
 // This allows for the inlining of the common path when z.bytes=true.
 // Go 1.12+ supports inlining methods with up to 1 inlined function (or 2 if no other constructs).
 
+func (z *decReaderSwitch) release() {
+	if z.bytes {
+	} else if z.bufio {
+		z.bi.release()
+	} else {
+		z.ri.release()
+	}
+}
 func (z *decReaderSwitch) numread() uint {
 	if z.bytes {
 		return z.rb.numread()
@@ -1188,34 +1215,34 @@ func (z *decReaderSwitch) skipIO(accept *bitset256) (token byte) {
 	return z.ri.skip(accept)
 }
 
-func (z *decReaderSwitch) readTo(in []byte, accept *bitset256) (out []byte) {
+func (z *decReaderSwitch) readTo(accept *bitset256) (out []byte) {
 	if z.bytes {
-		return z.rb.readToNoInput(accept) // z.rb.readTo(in, accept)
+		return z.rb.readTo(accept)
 	}
-	return z.readToIO(in, accept)
+	return z.readToIO(accept)
 }
 
 //go:noinline - fallback for io, ensures z.bytes path is inlined
-func (z *decReaderSwitch) readToIO(in []byte, accept *bitset256) (out []byte) {
+func (z *decReaderSwitch) readToIO(accept *bitset256) (out []byte) {
 	if z.bufio {
-		return z.bi.readTo(in, accept)
+		return z.bi.readTo(accept)
 	}
-	return z.ri.readTo(in, accept)
+	return z.ri.readTo(accept)
 }
-func (z *decReaderSwitch) readUntil(in []byte, stop byte) (out []byte) {
+func (z *decReaderSwitch) readUntil(stop byte) (out []byte) {
 	if z.bytes {
-		return z.rb.readUntilNoInput(stop)
+		return z.rb.readUntil(stop)
 	}
-	return z.readUntilIO(in, stop)
+	return z.readUntilIO(stop)
 }
 
-func (z *decReaderSwitch) readUntilIO(in []byte, stop byte) (out []byte) {
+func (z *decReaderSwitch) readUntilIO(stop byte) (out []byte) {
 	if z.bufio {
-		return z.bi.readUntil(in, stop)
+		return z.bi.readUntil(stop)
 	}
-	return z.ri.readUntil(in, stop)
+	return z.ri.readUntil(stop)
 }
 
-// register these here, so that staticcheck stops barfing
-var _ = (*bytesDecReader).readTo
-var _ = (*bytesDecReader).readUntil
+// // register these here, so that staticcheck stops barfing
+// var _ = (*bytesDecReader).readTo
+// var _ = (*bytesDecReader).readUntil

+ 0 - 1
codec/values_codecgen_generated_test.go

@@ -13104,7 +13104,6 @@ func (x codecSelfer19780) decChanstring(v *chan string, d *Decoder) {
 			var yyvcx1 string
 			yyvcx1 = (string)(string(r.DecodeStringAsBytes()))
 			yyv1 <- yyvcx1
-			// println(">>>> sending ", yyvcx1, " into ", yyv1) // TODO: remove this
 		}
 	}
 	yyh1.End()

+ 2 - 3
codec/z_all_test.go

@@ -39,7 +39,6 @@ func testGroupResetFlags() {
 	testMaxInitLen = 0
 	testUseIoWrapper = false
 	testNumRepeatString = 8
-	testUseIoEncDec = -1
 	testDepth = 0
 	testDecodeOptions = DecodeOptions{}
 	testEncodeOptions = EncodeOptions{}
@@ -92,7 +91,7 @@ func testSuite(t *testing.T, f func(t *testing.T)) {
 	testReinit()
 	t.Run("optionsTrue-ioWrapper", f)
 
-	testUseIoEncDec = -1
+	// testUseIoEncDec = -1
 
 	// make buffer small enough so that we have to re-fill multiple times.
 	testSkipRPCTests = true
@@ -152,7 +151,7 @@ func testJsonGroup(t *testing.T) {
 	t.Run("TestJsonCodecsEmbeddedPointer", TestJsonCodecsEmbeddedPointer)
 	t.Run("TestJsonCodecChan", TestJsonCodecChan)
 	t.Run("TestJsonStdEncIntf", TestJsonStdEncIntf)
-	t.Run("TestJsonMammoth", TestJsonMammoth)
+	t.Run("TestJsonMammothA", TestJsonMammothA)
 	t.Run("TestJsonRaw", TestJsonRaw)
 	t.Run("TestJsonRpcGo", TestJsonRpcGo)
 	t.Run("TestJsonLargeInteger", TestJsonLargeInteger)