Browse Source

codec: shared pool for large values used in Encode/Decode

These are mostly: codecFner, decNaked, bufio []byte buffer

Also, implement (En|De)coder.Close() for releasing shared resources
back into the pool.

Changes are:
- bufioEncWriter and bufioDecReader are not initialized as part of Encoder/Decoder
- use a pool for the []byte buffer for bufioEncWriter/bufioDecReader.
  We support buffers of 1k, 2k, 4k, 8k or 16k max,
  and use the configured BufferSize as a hint.
- pool'ed resources that should be reused by Encoder/Decoder outside scope of a
  single function, are now retrieved on demand and returned to pool on explicit Close().
- defer functions on Encode and Decode now use a closure (not a method),
  so that hopefully the defer can be stack allocated/inlined/optimized.
- We have code to support calling SetFinalizer on Encoder/Decoder,
  but we turn it off via useFinalizers/removeFinalizersOnClose.
  Users can explicitly call SetFinalizer themselves.
- when testing, incorporate Encoder|Decoder.Close() if testUseReset=false
Ugorji Nwoke 7 years ago
parent
commit
f6bb70d25a
5 changed files with 294 additions and 135 deletions
  1. 1 0
      codec/build.sh
  2. 100 49
      codec/decode.go
  3. 105 55
      codec/encode.go
  4. 82 31
      codec/helper.go
  5. 6 0
      codec/shared_test.go

+ 1 - 0
codec/build.sh

@@ -206,6 +206,7 @@ _main() {
         esac
         esac
     done
     done
     shift $((OPTIND-1))
     shift $((OPTIND-1))
+    # echo ">>>> _main: extra args: $@"
     case "x$x" in
     case "x$x" in
         'xt') _tests "$@" ;;
         'xt') _tests "$@" ;;
         'xm') _make "$@" ;;
         'xm') _make "$@" ;;

+ 100 - 49
codec/decode.go

@@ -9,6 +9,7 @@ import (
 	"fmt"
 	"fmt"
 	"io"
 	"io"
 	"reflect"
 	"reflect"
+	"runtime"
 	"strconv"
 	"strconv"
 	"sync"
 	"sync"
 	"time"
 	"time"
@@ -547,14 +548,24 @@ type bufioDecReader struct {
 
 
 	c   uint // cursor
 	c   uint // cursor
 	buf []byte
 	buf []byte
+
+	bytesBufPooler
+
 	// err error
 	// err error
-	_ [1]uint64 // padding
-	// _[4]uint64 // padding
+
+	_ [2]uint64 // padding
 }
 }
 
 
-func (z *bufioDecReader) reset(r io.Reader) {
+func (z *bufioDecReader) reset(r io.Reader, bufsize int) {
 	z.ioDecReaderCommon.reset(r)
 	z.ioDecReaderCommon.reset(r)
 	z.c = 0
 	z.c = 0
+	if cap(z.buf) >= bufsize {
+		z.buf = z.buf[:0]
+	} else {
+		z.bytesBufPooler.end() // potentially return old one to pool
+		z.buf = z.bytesBufPooler.get(bufsize)[:0]
+		// z.buf = make([]byte, 0, bufsize)
+	}
 }
 }
 
 
 func (z *bufioDecReader) readb(p []byte) {
 func (z *bufioDecReader) readb(p []byte) {
@@ -2016,6 +2027,37 @@ func (n *decNaked) reset() {
 	n.li, n.lm, n.ln, n.ls = 0, 0, 0, 0
 	n.li, n.lm, n.ln, n.ls = 0, 0, 0, 0
 }
 }
 
 
+type decNakedPooler struct {
+	n   *decNaked
+	nsp *sync.Pool
+}
+
+// naked must be called before each call to .DecodeNaked, as they will use it.
+func (d *decNakedPooler) naked() *decNaked {
+	if d.n == nil {
+		// consider one of:
+		//   - get from sync.Pool  (if GC is frequent, there's no value here)
+		//   - new alloc           (safest. only init'ed if it a naked decode will be done)
+		//   - field in Decoder    (makes the Decoder struct very big)
+		// To support using a decoder where a DecodeNaked is not needed,
+		// we prefer #1 or #2.
+		// d.n = new(decNaked) // &d.nv // new(decNaked) // grab from a sync.Pool
+		// d.n.init()
+		var v interface{}
+		d.nsp, v = pool.decNaked()
+		d.n = v.(*decNaked)
+	}
+	return d.n
+}
+
+func (d *decNakedPooler) end() {
+	if d.n != nil {
+		// if n != nil, then nsp != nil (they are always set together)
+		d.nsp.Put(d.n)
+		d.n, d.nsp = nil, nil
+	}
+}
+
 // type rtid2rv struct {
 // type rtid2rv struct {
 // 	rtid uintptr
 // 	rtid uintptr
 // 	rv   reflect.Value
 // 	rv   reflect.Value
@@ -2307,8 +2349,7 @@ type Decoder struct {
 	mtid uintptr
 	mtid uintptr
 	stid uintptr
 	stid uintptr
 
 
-	n   *decNaked
-	nsp *sync.Pool
+	decNakedPooler
 
 
 	h *BasicHandle
 	h *BasicHandle
 
 
@@ -2355,6 +2396,11 @@ func NewDecoderBytes(in []byte, h Handle) *Decoder {
 
 
 func newDecoder(h Handle) *Decoder {
 func newDecoder(h Handle) *Decoder {
 	d := &Decoder{h: h.getBasicHandle(), err: errDecoderNotInitialized}
 	d := &Decoder{h: h.getBasicHandle(), err: errDecoderNotInitialized}
+	d.bytes = true
+	if useFinalizers {
+		runtime.SetFinalizer(d, (*Decoder).finalize)
+		// xdebugf(">>>> new(Decoder) with finalizer")
+	}
 	d.r = &d.decReaderSwitch
 	d.r = &d.decReaderSwitch
 	d.hh = h
 	d.hh = h
 	d.be = h.isBinary()
 	d.be = h.isBinary()
@@ -2408,12 +2454,7 @@ func (d *Decoder) Reset(r io.Reader) {
 		if d.bi == nil {
 		if d.bi == nil {
 			d.bi = new(bufioDecReader)
 			d.bi = new(bufioDecReader)
 		}
 		}
-		if cap(d.bi.buf) < d.h.ReaderBufferSize {
-			d.bi.buf = make([]byte, 0, d.h.ReaderBufferSize)
-		} else {
-			d.bi.buf = d.bi.buf[:0]
-		}
-		d.bi.reset(r)
+		d.bi.reset(r, d.h.ReaderBufferSize)
 		// d.r = d.bi
 		// d.r = d.bi
 		// d.typ = entryTypeBufio
 		// d.typ = entryTypeBufio
 		d.bufio = true
 		d.bufio = true
@@ -2445,25 +2486,6 @@ func (d *Decoder) ResetBytes(in []byte) {
 	d.resetCommon()
 	d.resetCommon()
 }
 }
 
 
-// naked must be called before each call to .DecodeNaked,
-// as they will use it.
-func (d *Decoder) naked() *decNaked {
-	if d.n == nil {
-		// consider one of:
-		//   - get from sync.Pool  (if GC is frequent, there's no value here)
-		//   - new alloc           (safest. only init'ed if it a naked decode will be done)
-		//   - field in Decoder    (makes the Decoder struct very big)
-		// To support using a decoder where a DecodeNaked is not needed,
-		// we prefer #1 or #2.
-		// d.n = new(decNaked) // &d.nv // new(decNaked) // grab from a sync.Pool
-		// d.n.init()
-		var v interface{}
-		d.nsp, v = pool.decNaked()
-		d.n = v.(*decNaked)
-	}
-	return d.n
-}
-
 // Decode decodes the stream from reader and stores the result in the
 // Decode decodes the stream from reader and stores the result in the
 // value pointed to by v. v cannot be a nil pointer. v can also be
 // value pointed to by v. v cannot be a nil pointer. v can also be
 // a reflect.Value of a pointer.
 // a reflect.Value of a pointer.
@@ -2531,44 +2553,73 @@ func (d *Decoder) Decode(v interface{}) (err error) {
 	// Also, see https://github.com/golang/go/issues/14939#issuecomment-417836139
 	// Also, see https://github.com/golang/go/issues/14939#issuecomment-417836139
 	// defer func() { d.deferred(&err) }()
 	// defer func() { d.deferred(&err) }()
 	// { x, y := d, &err; defer func() { x.deferred(y) }() }
 	// { x, y := d, &err; defer func() { x.deferred(y) }() }
-	defer d.deferred(&err)
-	d.MustDecode(v)
+	if d.err != nil {
+		return d.err
+	}
+	if recoverPanicToErr {
+		defer func() {
+			if x := recover(); x != nil {
+				panicValToErr(d, x, &d.err)
+				err = d.err
+			}
+		}()
+	}
+
+	// defer d.deferred(&err)
+	d.mustDecode(v)
 	return
 	return
 }
 }
 
 
 // MustDecode is like Decode, but panics if unable to Decode.
 // MustDecode is like Decode, but panics if unable to Decode.
 // This provides insight to the code location that triggered the error.
 // This provides insight to the code location that triggered the error.
 func (d *Decoder) MustDecode(v interface{}) {
 func (d *Decoder) MustDecode(v interface{}) {
-	// TODO: Top-level: ensure that v is a pointer and not nil.
 	if d.err != nil {
 	if d.err != nil {
 		panic(d.err)
 		panic(d.err)
 	}
 	}
+	d.mustDecode(v)
+}
+
+// MustDecode is like Decode, but panics if unable to Decode.
+// This provides insight to the code location that triggered the error.
+func (d *Decoder) mustDecode(v interface{}) {
+	// TODO: Top-level: ensure that v is a pointer and not nil.
 	if d.d.TryDecodeAsNil() {
 	if d.d.TryDecodeAsNil() {
 		setZero(v)
 		setZero(v)
 	} else {
 	} else {
 		d.decode(v)
 		d.decode(v)
 	}
 	}
-	d.alwaysAtEnd()
 	// xprintf(">>>>>>>> >>>>>>>> num decFns: %v\n", d.cf.sn)
 	// xprintf(">>>>>>>> >>>>>>>> num decFns: %v\n", d.cf.sn)
 }
 }
 
 
-func (d *Decoder) deferred(err1 *error) {
-	d.alwaysAtEnd()
-	if recoverPanicToErr {
-		if x := recover(); x != nil {
-			panicValToErr(d, x, err1)
-			panicValToErr(d, x, &d.err)
-		}
-	}
+// func (d *Decoder) deferred(err1 *error) {
+// 	if recoverPanicToErr {
+// 		if x := recover(); x != nil {
+// 			panicValToErr(d, x, err1)
+// 			panicValToErr(d, x, &d.err)
+// 		}
+// 	}
+// }
+
+//go:noinline -- as it is run by finalizer
+func (d *Decoder) finalize() {
+	// xdebugf("finalizing Decoder")
+	d.Close()
 }
 }
 
 
-func (d *Decoder) alwaysAtEnd() {
-	if d.n != nil {
-		// if n != nil, then nsp != nil (they are always set together)
-		d.nsp.Put(d.n)
-		d.n, d.nsp = nil, nil
-	}
-	d.codecFnPooler.alwaysAtEnd()
+// Close releases shared (pooled) resources.
+//
+// It is important to call Close() when done with a Decoder, so those resources
+// are released instantly for use by subsequently created Decoders.
+func (d *Decoder) Close() {
+	if useFinalizers && removeFinalizerOnClose {
+		runtime.SetFinalizer(d, nil)
+	}
+	if d.bi != nil && d.bi.bytesBufPooler.pool != nil {
+		d.bi.buf = nil
+		d.bi.bytesBufPooler.end()
+	}
+	d.decNakedPooler.end()
+	d.codecFnPooler.end()
 }
 }
 
 
 // // this is not a smart swallow, as it allocates objects and does unnecessary work.
 // // this is not a smart swallow, as it allocates objects and does unnecessary work.

+ 105 - 55
codec/encode.go

@@ -9,6 +9,7 @@ import (
 	"fmt"
 	"fmt"
 	"io"
 	"io"
 	"reflect"
 	"reflect"
+	"runtime"
 	"sort"
 	"sort"
 	"strconv"
 	"strconv"
 	"sync"
 	"sync"
@@ -32,7 +33,7 @@ type encWriter interface {
 	writestr(string)
 	writestr(string)
 	writen1(byte)
 	writen1(byte)
 	writen2(byte, byte)
 	writen2(byte, byte)
-	atEndOfEncode()
+	end()
 }
 }
 
 
 */
 */
@@ -262,7 +263,7 @@ func (z *ioEncWriter) writen2(b1, b2 byte) {
 // }
 // }
 
 
 //go:noinline - so *encWriterSwitch.XXX has the bytesEncAppender.XXX inlined
 //go:noinline - so *encWriterSwitch.XXX has the bytesEncAppender.XXX inlined
-func (z *ioEncWriter) atEndOfEncode() {
+func (z *ioEncWriter) end() {
 	if z.fw != nil {
 	if z.fw != nil {
 		if err := z.fw.Flush(); err != nil {
 		if err := z.fw.Flush(); err != nil {
 			panic(err)
 			panic(err)
@@ -279,7 +280,10 @@ type bufioEncWriter struct {
 	buf []byte
 	buf []byte
 	w   io.Writer
 	w   io.Writer
 	n   int
 	n   int
-	// _   [2]uint64 // padding
+
+	bytesBufPooler
+
+	_ [3]uint64 // padding
 	// a int
 	// a int
 	// b   [4]byte
 	// b   [4]byte
 	// err
 	// err
@@ -288,13 +292,15 @@ type bufioEncWriter struct {
 func (z *bufioEncWriter) reset(w io.Writer, bufsize int) {
 func (z *bufioEncWriter) reset(w io.Writer, bufsize int) {
 	z.w = w
 	z.w = w
 	z.n = 0
 	z.n = 0
-	if bufsize == 0 {
+	if bufsize <= 0 {
 		bufsize = defEncByteBufSize
 		bufsize = defEncByteBufSize
 	}
 	}
-	if cap(z.buf) < bufsize {
-		z.buf = make([]byte, bufsize)
+	if cap(z.buf) >= bufsize {
+		z.buf = z.buf[:cap(z.buf)]
 	} else {
 	} else {
-		z.buf = z.buf[:bufsize]
+		z.bytesBufPooler.end() // potentially return old one to pool
+		z.buf = z.bytesBufPooler.get(bufsize)
+		// z.buf = make([]byte, bufsize)
 	}
 	}
 }
 }
 
 
@@ -355,7 +361,7 @@ func (z *bufioEncWriter) writen2(b1, b2 byte) {
 	z.n += 2
 	z.n += 2
 }
 }
 
 
-func (z *bufioEncWriter) atEndOfEncode() {
+func (z *bufioEncWriter) end() {
 	if z.n > 0 {
 	if z.n > 0 {
 		z.flush()
 		z.flush()
 	}
 	}
@@ -381,7 +387,7 @@ func (z *bytesEncAppender) writen1(b1 byte) {
 func (z *bytesEncAppender) writen2(b1, b2 byte) {
 func (z *bytesEncAppender) writen2(b1, b2 byte) {
 	z.b = append(z.b, b1, b2)
 	z.b = append(z.b, b1, b2)
 }
 }
-func (z *bytesEncAppender) atEndOfEncode() {
+func (z *bytesEncAppender) end() {
 	*(z.out) = z.b
 	*(z.out) = z.b
 }
 }
 func (z *bytesEncAppender) reset(in []byte, out *[]byte) {
 func (z *bytesEncAppender) reset(in []byte, out *[]byte) {
@@ -1083,52 +1089,52 @@ func (e *Encoder) kMapCanonical(rtkey reflect.Type, rv reflect.Value, mks []refl
 
 
 type encWriterSwitch struct {
 type encWriterSwitch struct {
 	// wi   *ioEncWriter
 	// wi   *ioEncWriter
-	wf bufioEncWriter
 	wb bytesEncAppender
 	wb bytesEncAppender
+	wf *bufioEncWriter
 	// typ  entryType
 	// typ  entryType
-	wx   bool    // if bytes, wx=true
-	esep bool    // whether it has elem separators
-	isas bool    // whether e.as != nil
-	js   bool    // captured here, so that no need to piggy back on *codecFner for this
-	be   bool    // captured here, so that no need to piggy back on *codecFner for this
-	_    [2]byte // padding
+	bytes bool    // encoding to []byte
+	esep  bool    // whether it has elem separators
+	isas  bool    // whether e.as != nil
+	js    bool    // captured here, so that no need to piggy back on *codecFner for this
+	be    bool    // captured here, so that no need to piggy back on *codecFner for this
+	_     [2]byte // padding
 	// _    [2]uint64 // padding
 	// _    [2]uint64 // padding
 	// _    uint64    // padding
 	// _    uint64    // padding
 }
 }
 
 
 func (z *encWriterSwitch) writeb(s []byte) {
 func (z *encWriterSwitch) writeb(s []byte) {
-	if z.wx {
+	if z.bytes {
 		z.wb.writeb(s)
 		z.wb.writeb(s)
 	} else {
 	} else {
 		z.wf.writeb(s)
 		z.wf.writeb(s)
 	}
 	}
 }
 }
 func (z *encWriterSwitch) writestr(s string) {
 func (z *encWriterSwitch) writestr(s string) {
-	if z.wx {
+	if z.bytes {
 		z.wb.writestr(s)
 		z.wb.writestr(s)
 	} else {
 	} else {
 		z.wf.writestr(s)
 		z.wf.writestr(s)
 	}
 	}
 }
 }
 func (z *encWriterSwitch) writen1(b1 byte) {
 func (z *encWriterSwitch) writen1(b1 byte) {
-	if z.wx {
+	if z.bytes {
 		z.wb.writen1(b1)
 		z.wb.writen1(b1)
 	} else {
 	} else {
 		z.wf.writen1(b1)
 		z.wf.writen1(b1)
 	}
 	}
 }
 }
 func (z *encWriterSwitch) writen2(b1, b2 byte) {
 func (z *encWriterSwitch) writen2(b1, b2 byte) {
-	if z.wx {
+	if z.bytes {
 		z.wb.writen2(b1, b2)
 		z.wb.writen2(b1, b2)
 	} else {
 	} else {
 		z.wf.writen2(b1, b2)
 		z.wf.writen2(b1, b2)
 	}
 	}
 }
 }
-func (z *encWriterSwitch) atEndOfEncode() {
-	if z.wx {
-		z.wb.atEndOfEncode()
+func (z *encWriterSwitch) end() {
+	if z.bytes {
+		z.wb.end()
 	} else {
 	} else {
-		z.wf.atEndOfEncode()
+		z.wf.end()
 	}
 	}
 }
 }
 
 
@@ -1175,51 +1181,51 @@ func (z *encWriterSwitch) writen2(b1, b2 byte) {
 		z.wf.writen2(b1, b2)
 		z.wf.writen2(b1, b2)
 	}
 	}
 }
 }
-func (z *encWriterSwitch) atEndOfEncode() {
+func (z *encWriterSwitch) end() {
 	switch z.typ {
 	switch z.typ {
 	case entryTypeBytes:
 	case entryTypeBytes:
-		z.wb.atEndOfEncode()
+		z.wb.end()
 	case entryTypeIo:
 	case entryTypeIo:
-		z.wi.atEndOfEncode()
+		z.wi.end()
 	default:
 	default:
-		z.wf.atEndOfEncode()
+		z.wf.end()
 	}
 	}
 }
 }
 
 
 // ------------------------------------------
 // ------------------------------------------
 func (z *encWriterSwitch) writeb(s []byte) {
 func (z *encWriterSwitch) writeb(s []byte) {
-	if z.wx {
+	if z.bytes {
 		z.wb.writeb(s)
 		z.wb.writeb(s)
 	} else {
 	} else {
 		z.wi.writeb(s)
 		z.wi.writeb(s)
 	}
 	}
 }
 }
 func (z *encWriterSwitch) writestr(s string) {
 func (z *encWriterSwitch) writestr(s string) {
-	if z.wx {
+	if z.bytes {
 		z.wb.writestr(s)
 		z.wb.writestr(s)
 	} else {
 	} else {
 		z.wi.writestr(s)
 		z.wi.writestr(s)
 	}
 	}
 }
 }
 func (z *encWriterSwitch) writen1(b1 byte) {
 func (z *encWriterSwitch) writen1(b1 byte) {
-	if z.wx {
+	if z.bytes {
 		z.wb.writen1(b1)
 		z.wb.writen1(b1)
 	} else {
 	} else {
 		z.wi.writen1(b1)
 		z.wi.writen1(b1)
 	}
 	}
 }
 }
 func (z *encWriterSwitch) writen2(b1, b2 byte) {
 func (z *encWriterSwitch) writen2(b1, b2 byte) {
-	if z.wx {
+	if z.bytes {
 		z.wb.writen2(b1, b2)
 		z.wb.writen2(b1, b2)
 	} else {
 	} else {
 		z.wi.writen2(b1, b2)
 		z.wi.writen2(b1, b2)
 	}
 	}
 }
 }
-func (z *encWriterSwitch) atEndOfEncode() {
-	if z.wx {
-		z.wb.atEndOfEncode()
+func (z *encWriterSwitch) end() {
+	if z.bytes {
+		z.wb.end()
 	} else {
 	} else {
-		z.wi.atEndOfEncode()
+		z.wi.end()
 	}
 	}
 }
 }
 
 
@@ -1252,15 +1258,17 @@ type Encoder struct {
 	// ---- cpu cache line boundary? + 3
 	// ---- cpu cache line boundary? + 3
 	encWriterSwitch
 	encWriterSwitch
 
 
-	codecFnPooler
 	ci set
 	ci set
+	codecFnPooler
+
+	b [3 * 8]byte // for encoding chan or (non-addressable) [N]byte
 
 
 	// ---- writable fields during execution --- *try* to keep in sep cache line
 	// ---- writable fields during execution --- *try* to keep in sep cache line
 
 
 	// ---- cpu cache line boundary?
 	// ---- cpu cache line boundary?
 	// b [scratchByteArrayLen]byte
 	// b [scratchByteArrayLen]byte
 	// _ [cacheLineSize - scratchByteArrayLen]byte // padding
 	// _ [cacheLineSize - scratchByteArrayLen]byte // padding
-	b [cacheLineSize - (8 * 2)]byte // used for encoding a chan or (non-addressable) array of bytes
+	// b [cacheLineSize - (8 * 0)]byte // used for encoding a chan or (non-addressable) array of bytes
 }
 }
 
 
 // NewEncoder returns an Encoder for encoding into an io.Writer.
 // NewEncoder returns an Encoder for encoding into an io.Writer.
@@ -1286,9 +1294,15 @@ func NewEncoderBytes(out *[]byte, h Handle) *Encoder {
 
 
 func newEncoder(h Handle) *Encoder {
 func newEncoder(h Handle) *Encoder {
 	e := &Encoder{h: h.getBasicHandle(), err: errEncoderNotInitialized}
 	e := &Encoder{h: h.getBasicHandle(), err: errEncoderNotInitialized}
+	e.bytes = true
+	if useFinalizers {
+		runtime.SetFinalizer(e, (*Encoder).finalize)
+		// xdebugf(">>>> new(Encoder) with finalizer")
+	}
 	e.w = &e.encWriterSwitch
 	e.w = &e.encWriterSwitch
 	e.hh = h
 	e.hh = h
 	e.esep = h.hasElemSeparators()
 	e.esep = h.hasElemSeparators()
+
 	return e
 	return e
 }
 }
 
 
@@ -1314,7 +1328,10 @@ func (e *Encoder) Reset(w io.Writer) {
 		return
 		return
 	}
 	}
 	// var ok bool
 	// var ok bool
-	e.wx = false
+	e.bytes = false
+	if e.wf == nil {
+		e.wf = new(bufioEncWriter)
+	}
 	// e.typ = entryTypeUnset
 	// e.typ = entryTypeUnset
 	// if e.h.WriterBufferSize > 0 {
 	// if e.h.WriterBufferSize > 0 {
 	// 	// bw := bufio.NewWriterSize(w, e.h.WriterBufferSize)
 	// 	// bw := bufio.NewWriterSize(w, e.h.WriterBufferSize)
@@ -1353,7 +1370,7 @@ func (e *Encoder) ResetBytes(out *[]byte) {
 	if in == nil {
 	if in == nil {
 		in = make([]byte, defEncByteBufSize)
 		in = make([]byte, defEncByteBufSize)
 	}
 	}
-	e.wx = true
+	e.bytes = true
 	// e.typ = entryTypeBytes
 	// e.typ = entryTypeBytes
 	e.wb.reset(in, out)
 	e.wb.reset(in, out)
 	// e.w = &e.wb
 	// e.w = &e.wb
@@ -1449,8 +1466,21 @@ func (e *Encoder) Encode(v interface{}) (err error) {
 	// Also, see https://github.com/golang/go/issues/14939#issuecomment-417836139
 	// Also, see https://github.com/golang/go/issues/14939#issuecomment-417836139
 	// defer func() { e.deferred(&err) }() }
 	// defer func() { e.deferred(&err) }() }
 	// { x, y := e, &err; defer func() { x.deferred(y) }() }
 	// { x, y := e, &err; defer func() { x.deferred(y) }() }
-	defer e.deferred(&err)
-	e.MustEncode(v)
+	if e.err != nil {
+		return e.err
+	}
+	if recoverPanicToErr {
+		defer func() {
+			e.w.end()
+			if x := recover(); x != nil {
+				panicValToErr(e, x, &e.err)
+				err = e.err
+			}
+		}()
+	}
+
+	// defer e.deferred(&err)
+	e.mustEncode(v)
 	return
 	return
 }
 }
 
 
@@ -1460,25 +1490,45 @@ func (e *Encoder) MustEncode(v interface{}) {
 	if e.err != nil {
 	if e.err != nil {
 		panic(e.err)
 		panic(e.err)
 	}
 	}
+	e.mustEncode(v)
+}
+
+func (e *Encoder) mustEncode(v interface{}) {
 	e.encode(v)
 	e.encode(v)
 	e.e.atEndOfEncode()
 	e.e.atEndOfEncode()
-	e.w.atEndOfEncode()
-	e.alwaysAtEnd()
+	e.w.end()
 }
 }
 
 
-func (e *Encoder) deferred(err1 *error) {
-	e.alwaysAtEnd()
-	if recoverPanicToErr {
-		if x := recover(); x != nil {
-			panicValToErr(e, x, err1)
-			panicValToErr(e, x, &e.err)
-		}
-	}
+// func (e *Encoder) deferred(err1 *error) {
+// 	e.w.end()
+// 	if recoverPanicToErr {
+// 		if x := recover(); x != nil {
+// 			panicValToErr(e, x, err1)
+// 			panicValToErr(e, x, &e.err)
+// 		}
+// 	}
+// }
+
+//go:noinline -- as it is run by finalizer
+func (e *Encoder) finalize() {
+	xdebugf("finalizing Encoder")
+	e.Close()
 }
 }
 
 
-// func (e *Encoder) alwaysAtEnd() {
-// 	e.codecFnPooler.alwaysAtEnd()
-// }
+// Close releases shared (pooled) resources.
+//
+// It is important to call Close() when done with an Encoder, so those resources
+// are released instantly for use by subsequently created Encoders.
+func (e *Encoder) Close() {
+	if useFinalizers && removeFinalizerOnClose {
+		runtime.SetFinalizer(e, nil)
+	}
+	if e.wf != nil {
+		e.wf.buf = nil
+		e.wf.bytesBufPooler.end()
+	}
+	e.codecFnPooler.end()
+}
 
 
 func (e *Encoder) encode(iv interface{}) {
 func (e *Encoder) encode(iv interface{}) {
 	if iv == nil || definitelyNil(iv) {
 	if iv == nil || definitelyNil(iv) {

+ 82 - 31
codec/helper.go

@@ -135,14 +135,26 @@ const (
 	wordSizeBits = 32 << (^uint(0) >> 63) // strconv.IntSize
 	wordSizeBits = 32 << (^uint(0) >> 63) // strconv.IntSize
 	wordSize     = wordSizeBits / 8
 	wordSize     = wordSizeBits / 8
 
 
-	maxLevelsEmbedding = 14 // use this, so structFieldInfo fits into 8 bytes
-)
+	// so structFieldInfo fits into 8 bytes
+	maxLevelsEmbedding = 14
 
 
-var (
-	oneByteArr    = [1]byte{0}
-	zeroByteSlice = oneByteArr[:0:0]
+	// finalizers are used? to Close Encoder/Decoder when they are GC'ed
+	// so that their pooled resources are returned.
+	//
+	// Note that calling SetFinalizer is always expensive,
+	// as code must be run on the systemstack even for SetFinalizer(t, nil).
+	//
+	// We document that folks SHOULD call Close() when done, or can explicitly
+	// call SetFinalizer themselves e.g.
+	//    runtime.SetFinalizer(e, (*Encoder).Close)
+	//    runtime.SetFinalizer(d, (*Decoder).Close)
+	useFinalizers          = false
+	removeFinalizerOnClose = false
 )
 )
 
 
+var oneByteArr [1]byte
+var zeroByteSlice = oneByteArr[:0:0]
+
 var codecgen bool
 var codecgen bool
 
 
 var refBitset bitset256
 var refBitset bitset256
@@ -1986,7 +1998,7 @@ func (d *codecFnPooler) cfer() *codecFner {
 	return d.cf
 	return d.cf
 }
 }
 
 
-func (d *codecFnPooler) alwaysAtEnd() {
+func (d *codecFnPooler) end() {
 	if d.cf != nil {
 	if d.cf != nil {
 		d.cfp.Put(d.cf)
 		d.cfp.Put(d.cf)
 		d.cf, d.cfp = nil, nil
 		d.cf, d.cfp = nil, nil
@@ -2339,30 +2351,35 @@ func (x *bitset256) set(pos byte) {
 // ------------
 // ------------
 
 
 type pooler struct {
 type pooler struct {
-	dn                                          sync.Pool // for decNaked
-	cfn                                         sync.Pool // for codecFner
+	// function-scoped pooled resources
 	tiload                                      sync.Pool // for type info loading
 	tiload                                      sync.Pool // for type info loading
 	strRv8, strRv16, strRv32, strRv64, strRv128 sync.Pool // for stringRV
 	strRv8, strRv16, strRv32, strRv64, strRv128 sync.Pool // for stringRV
-	// buf64, buf128, buf256, buf512, buf1024      sync.Pool // for [...]byte
+
+	// lifetime-scoped pooled resources
+	dn                                 sync.Pool // for decNaked
+	cfn                                sync.Pool // for codecFner
+	buf1k, buf2k, buf4k, buf8k, buf16k sync.Pool // for [N]byte
 }
 }
 
 
 func (p *pooler) init() {
 func (p *pooler) init() {
+	// function-scoped pooled resources
+	p.tiload.New = func() interface{} { return new(typeInfoLoadArray) }
+
 	p.strRv8.New = func() interface{} { return new([8]sfiRv) }
 	p.strRv8.New = func() interface{} { return new([8]sfiRv) }
 	p.strRv16.New = func() interface{} { return new([16]sfiRv) }
 	p.strRv16.New = func() interface{} { return new([16]sfiRv) }
 	p.strRv32.New = func() interface{} { return new([32]sfiRv) }
 	p.strRv32.New = func() interface{} { return new([32]sfiRv) }
 	p.strRv64.New = func() interface{} { return new([64]sfiRv) }
 	p.strRv64.New = func() interface{} { return new([64]sfiRv) }
 	p.strRv128.New = func() interface{} { return new([128]sfiRv) }
 	p.strRv128.New = func() interface{} { return new([128]sfiRv) }
 
 
-	// p.buf64.New = func() interface{} { return new([64]byte) }
-	// p.buf128.New = func() interface{} { return new([128]byte) }
-	// p.buf256.New = func() interface{} { return new([256]byte) }
-	// p.buf512.New = func() interface{} { return new([512]byte) }
-	// p.buf1024.New = func() interface{} { return new([1024]byte) }
+	// lifetime-scoped pooled resources
+	p.buf1k.New = func() interface{} { return new([1 * 1024]byte) }
+	p.buf2k.New = func() interface{} { return new([2 * 1024]byte) }
+	p.buf4k.New = func() interface{} { return new([4 * 1024]byte) }
+	p.buf8k.New = func() interface{} { return new([8 * 1024]byte) }
+	p.buf16k.New = func() interface{} { return new([16 * 1024]byte) }
 
 
 	p.dn.New = func() interface{} { x := new(decNaked); x.init(); return x }
 	p.dn.New = func() interface{} { x := new(decNaked); x.init(); return x }
 
 
-	p.tiload.New = func() interface{} { return new(typeInfoLoadArray) }
-
 	p.cfn.New = func() interface{} { return new(codecFner) }
 	p.cfn.New = func() interface{} { return new(codecFner) }
 }
 }
 
 
@@ -2382,21 +2399,21 @@ func (p *pooler) sfiRv128() (sp *sync.Pool, v interface{}) {
 	return &p.strRv128, p.strRv128.Get()
 	return &p.strRv128, p.strRv128.Get()
 }
 }
 
 
-// func (p *pooler) bytes64() (sp *sync.Pool, v interface{}) {
-// 	return &p.buf64, p.buf64.Get()
-// }
-// func (p *pooler) bytes128() (sp *sync.Pool, v interface{}) {
-// 	return &p.buf128, p.buf128.Get()
-// }
-// func (p *pooler) bytes256() (sp *sync.Pool, v interface{}) {
-// 	return &p.buf256, p.buf256.Get()
-// }
-// func (p *pooler) bytes512() (sp *sync.Pool, v interface{}) {
-// 	return &p.buf512, p.buf512.Get()
-// }
-// func (p *pooler) bytes1024() (sp *sync.Pool, v interface{}) {
-// 	return &p.buf1024, p.buf1024.Get()
-// }
+func (p *pooler) bytes1k() (sp *sync.Pool, v interface{}) {
+	return &p.buf1k, p.buf1k.Get()
+}
+func (p *pooler) bytes2k() (sp *sync.Pool, v interface{}) {
+	return &p.buf2k, p.buf2k.Get()
+}
+func (p *pooler) bytes4k() (sp *sync.Pool, v interface{}) {
+	return &p.buf4k, p.buf4k.Get()
+}
+func (p *pooler) bytes8k() (sp *sync.Pool, v interface{}) {
+	return &p.buf8k, p.buf8k.Get()
+}
+func (p *pooler) bytes16k() (sp *sync.Pool, v interface{}) {
+	return &p.buf16k, p.buf16k.Get()
+}
 
 
 func (p *pooler) decNaked() (sp *sync.Pool, v interface{}) {
 func (p *pooler) decNaked() (sp *sync.Pool, v interface{}) {
 	return &p.dn, p.dn.Get()
 	return &p.dn, p.dn.Get()
@@ -2496,6 +2513,40 @@ func (must) Float(s float64, err error) float64 {
 	return s
 	return s
 }
 }
 
 
+// -------------------
+
+type bytesBufPooler struct {
+	pool    *sync.Pool
+	poolbuf interface{}
+}
+
+func (z *bytesBufPooler) end() {
+	if z.pool != nil {
+		z.pool.Put(z.poolbuf)
+		z.pool, z.poolbuf = nil, nil
+	}
+}
+
+func (z *bytesBufPooler) get(bufsize int) (buf []byte) {
+	if bufsize <= 1*1024 {
+		z.pool, z.poolbuf = pool.bytes1k()
+		buf = z.poolbuf.(*[1 * 1024]byte)[:]
+	} else if bufsize <= 2*1024 {
+		z.pool, z.poolbuf = pool.bytes2k()
+		buf = z.poolbuf.(*[2 * 1024]byte)[:]
+	} else if bufsize <= 4*1024 {
+		z.pool, z.poolbuf = pool.bytes4k()
+		buf = z.poolbuf.(*[4 * 1024]byte)[:]
+	} else if bufsize <= 8*1024 {
+		z.pool, z.poolbuf = pool.bytes8k()
+		buf = z.poolbuf.(*[8 * 1024]byte)[:]
+	} else {
+		z.pool, z.poolbuf = pool.bytes16k()
+		buf = z.poolbuf.(*[16 * 1024]byte)[:]
+	}
+	return
+}
+
 // xdebugf prints the message in red on the terminal.
 // xdebugf prints the message in red on the terminal.
 // Use it in place of fmt.Printf (which it calls internally)
 // Use it in place of fmt.Printf (which it calls internally)
 func xdebugf(pattern string, args ...interface{}) {
 func xdebugf(pattern string, args ...interface{}) {

+ 6 - 0
codec/shared_test.go

@@ -235,6 +235,9 @@ func sTestCodecEncode(ts interface{}, bsIn []byte, fn func([]byte) *bytes.Buffer
 		bs = buf.Bytes()
 		bs = buf.Bytes()
 		bh.WriterBufferSize = oldWriteBufferSize
 		bh.WriterBufferSize = oldWriteBufferSize
 	}
 	}
+	if !testUseReset {
+		e.Close()
+	}
 	return
 	return
 }
 }
 
 
@@ -267,6 +270,9 @@ func sTestCodecDecode(bs []byte, ts interface{}, h Handle, bh *BasicHandle) (err
 	if testUseIoEncDec >= 0 {
 	if testUseIoEncDec >= 0 {
 		bh.ReaderBufferSize = oldReadBufferSize
 		bh.ReaderBufferSize = oldReadBufferSize
 	}
 	}
+	if !testUseReset {
+		d.Close()
+	}
 	return
 	return
 }
 }