Browse Source

codec: re-factor structs so critical ones try to fill up cache lines

This should afford better performance, especially by eliminating false sharing.
See notes at bottom of 0doc.go for the list of types to watch.
Ugorji Nwoke 8 years ago
parent
commit
adc90d0202
12 changed files with 159 additions and 114 deletions
  1. 21 5
      codec/0doc.go
  2. 10 8
      codec/binc.go
  3. 13 8
      codec/cbor.go
  4. 6 5
      codec/decode.go
  5. 2 2
      codec/gen-helper.generated.go
  6. 2 2
      codec/gen-helper.go.tmpl
  7. 35 27
      codec/helper.go
  8. 1 1
      codec/helper_not_unsafe.go
  9. 2 1
      codec/helper_unsafe.go
  10. 45 43
      codec/json.go
  11. 13 8
      codec/msgpack.go
  12. 9 4
      codec/simple.go

+ 21 - 5
codec/0doc.go

@@ -232,6 +232,9 @@ package codec
 //     However, it will only  be inlined if mid-stack inlining is enabled,
 //     as we call panic to raise errors, and panic currently prevents inlining.
 //
+//   - Unexport BasicHandle.
+//     If godoc can now show the embedded options, then unexport it.
+// 
 // PUNTED:
 //   - To make Handle comparable, make extHandle in BasicHandle a non-embedded pointer,
 //     and use overlay methods on *BasicHandle to call through to extHandle after initializing
@@ -239,9 +242,22 @@ package codec
 //
 // BEFORE EACH RELEASE:
 //   - Look through and fix padding for each type, to eliminate false sharing
-//     - pooled objects: decNaked, codecFner, typeInfoLoadArray, typeInfo,
-//     - small objects that we allocate and modify much (should be in owned cache lines)
-//     - Objects used a lot (must live in own cache lines)
-//       Decoder, Encoder, etc
+//     - critical shared objects that are read many times
+//       TypeInfos
+//     - pooled objects:
+//       decNaked, decNakedContainers, codecFner, typeInfoLoadArray, 
+//     - small objects allocated independently, that we read/use much across threads:
+//       codecFn, typeInfo
+//     - Objects allocated independently and used a lot
+//       Decoder, Encoder,
+//       xxxHandle, xxxEncDriver, xxxDecDriver (xxx = json, msgpack, cbor, binc, simple)
 //     - In all above, arrange values modified together to be close to each other.
-//     Note: we MOSTLY care about the bottom part.
+//
+//     For all of these, either ensure that they occupy full cache lines,
+//     or ensure that the things just past the cache line boundary are hardly read/written
+//     e.g. JsonHandle.RawBytesExt - which is copied into json(En|De)cDriver at init
+//
+//     Occupying full cache lines means they occupy 8*N words (where N is an integer).
+//     Check this out by running: ./run.sh -z
+//     - look at those tagged ****, meaning they are not occupying full cache lines
+//     - look at those tagged <<<<, meaning they are larger than 32 words (something to watch)

+ 10 - 8
codec/binc.go

@@ -60,8 +60,8 @@ type bincEncDriver struct {
 	h *BincHandle
 	w encWriter
 	m map[string]uint16 // symbols
-	b [scratchByteArrayLen]byte
-	s uint16 // symbols sequencer
+	b [16]byte          // scratch, used for encoding numbers - bigendian style
+	s uint16            // symbols sequencer
 	// c containerState
 	encDriverTrackContainerWriter
 	noBuiltInTypes
@@ -328,6 +328,9 @@ type bincDecSymbol struct {
 }
 
 type bincDecDriver struct {
+	decDriverNoopContainerReader
+	noBuiltInTypes
+
 	d      *Decoder
 	h      *BincHandle
 	r      decReader
@@ -336,15 +339,14 @@ type bincDecDriver struct {
 	bd     byte
 	vd     byte
 	vs     byte
-	// noStreamingCodec
-	// decNoSeparator
-	b [scratchByteArrayLen]byte
-
+	_      [3]byte // padding
 	// linear searching on this slice is ok,
 	// because we typically expect < 32 symbols in each stream.
 	s []bincDecSymbol
-	decDriverNoopContainerReader
-	noBuiltInTypes
+
+	// noStreamingCodec
+	// decNoSeparator
+	b [8 * 8]byte // scratch
 }
 
 func (d *bincDecDriver) readNextBd() {

+ 13 - 8
codec/cbor.go

@@ -70,6 +70,7 @@ type cborEncDriver struct {
 	w encWriter
 	h *CborHandle
 	x [8]byte
+	_ [3 * 8]byte // padding
 }
 
 func (e *cborEncDriver) EncodeNil() {
@@ -242,16 +243,17 @@ func (e *cborEncDriver) encStringBytesS(bb byte, v string) {
 // ----------------------
 
 type cborDecDriver struct {
-	d      *Decoder
-	h      *CborHandle
-	r      decReader
-	b      [scratchByteArrayLen]byte
+	d *Decoder
+	h *CborHandle
+	r decReader
+	// b      [scratchByteArrayLen]byte
 	br     bool // bytes reader
 	bdRead bool
 	bd     byte
 	noBuiltInTypes
 	// decNoSeparator
 	decDriverNoopContainerReader
+	_ [3 * 8]byte // padding
 }
 
 func (d *cborDecDriver) readNextBd() {
@@ -470,6 +472,9 @@ func (d *cborDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 	if d.bd == cborBdIndefiniteBytes || d.bd == cborBdIndefiniteString {
 		d.bdRead = false
 		if bs == nil {
+			if zerocopy {
+				return d.decAppendIndefiniteBytes(d.d.b[:0])
+			}
 			return d.decAppendIndefiniteBytes(zeroByteSlice)
 		}
 		return d.decAppendIndefiniteBytes(bs[:0])
@@ -485,18 +490,18 @@ func (d *cborDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 		if d.br {
 			return d.r.readx(clen)
 		} else if len(bs) == 0 {
-			bs = d.b[:]
+			bs = d.d.b[:]
 		}
 	}
-	return decByteSlice(d.r, clen, d.d.h.MaxInitLen, bs)
+	return decByteSlice(d.r, clen, d.h.MaxInitLen, bs)
 }
 
 func (d *cborDecDriver) DecodeString() (s string) {
-	return string(d.DecodeBytes(d.b[:], true))
+	return string(d.DecodeBytes(d.d.b[:], true))
 }
 
 func (d *cborDecDriver) DecodeStringAsBytes() (s []byte) {
-	return d.DecodeBytes(d.b[:], true)
+	return d.DecodeBytes(d.d.b[:], true)
 }
 
 func (d *cborDecDriver) DecodeTime() (t time.Time) {

+ 6 - 5
codec/decode.go

@@ -1138,7 +1138,7 @@ func (d *Decoder) kInterface(f *codecFnInfo, rv reflect.Value) {
 	rv.Set(rvn2)
 }
 
-func decStructFieldKey(dd decDriver, keyType valueType, b *[scratchByteArrayLen]byte) (rvkencname []byte) {
+func decStructFieldKey(dd decDriver, keyType valueType, b *[decScratchByteArrayLen]byte) (rvkencname []byte) {
 	// use if-else-if, not switch (which compiles to binary-search)
 	// since keyType is typically valueTypeString, branch prediction is pretty good.
 
@@ -1681,7 +1681,7 @@ type decNaked struct {
 	// ---- cpu cache line boundary?
 	ri, rf, rl, rs, rt, rb reflect.Value // mapping to the primitives above
 
-	// _ [6 * wordSize]byte // padding // TODO: ??? too big padding???
+	_ [6 * 8]byte // padding // TODO: ??? too big padding???
 }
 
 func (n *decNaked) init() {
@@ -1800,6 +1800,8 @@ type decReaderSwitch struct {
 // 	return z.ri.readUntil(in, stop)
 // }
 
+const decScratchByteArrayLen = cacheLineSize - 8
+
 // A Decoder reads and decodes an object from an input stream in the codec format.
 type Decoder struct {
 	panicHdl
@@ -1827,9 +1829,8 @@ type Decoder struct {
 	err error
 
 	// ---- cpu cache line boundary?
-	b  [scratchByteArrayLen]byte
-	is map[string]string                             // used for interning strings
-	_  [cacheLineSize - 8 - scratchByteArrayLen]byte // padding
+	b  [decScratchByteArrayLen]byte // scratch buffer, used by Decoder and xxxEncDrivers
+	is map[string]string            // used for interning strings
 
 	// padding - false sharing help // modify 232 if Decoder struct changes.
 	// _ [cacheLineSize - 232%cacheLineSize]byte

+ 2 - 2
codec/gen-helper.generated.go

@@ -61,7 +61,7 @@ type genHelperDecDriver struct {
 }
 
 func (x genHelperDecDriver) DecodeBuiltin(rt uintptr, v interface{}) {}
-func (x genHelperDecDriver) DecStructFieldKey(keyType valueType, buf *[scratchByteArrayLen]byte) []byte {
+func (x genHelperDecDriver) DecStructFieldKey(keyType valueType, buf *[decScratchByteArrayLen]byte) []byte {
 	return decStructFieldKey(x.decDriver, keyType, buf)
 }
 func (x genHelperDecDriver) DecodeInt(bitsize uint8) (i int64) {
@@ -210,7 +210,7 @@ func (f genHelperDecoder) DecScratchBuffer() []byte {
 }
 
 // FOR USE BY CODECGEN ONLY. IT *WILL* CHANGE WITHOUT NOTICE. *DO NOT USE*
-func (f genHelperDecoder) DecScratchArrayBuffer() *[scratchByteArrayLen]byte {
+func (f genHelperDecoder) DecScratchArrayBuffer() *[decScratchByteArrayLen]byte {
 	return &f.d.b
 }
 

+ 2 - 2
codec/gen-helper.go.tmpl

@@ -61,7 +61,7 @@ type genHelperDecDriver struct {
 }
 
 func (x genHelperDecDriver) DecodeBuiltin(rt uintptr, v interface{}) {}
-func (x genHelperDecDriver) DecStructFieldKey(keyType valueType, buf *[scratchByteArrayLen]byte) []byte {
+func (x genHelperDecDriver) DecStructFieldKey(keyType valueType, buf *[decScratchByteArrayLen]byte) []byte {
 	return decStructFieldKey(x.decDriver, keyType, buf)
 }
 func (x genHelperDecDriver) DecodeInt(bitsize uint8) (i int64) {
@@ -193,7 +193,7 @@ func (f genHelperDecoder) DecScratchBuffer() []byte {
 	return f.d.b[:]
 }
 // FOR USE BY CODECGEN ONLY. IT *WILL* CHANGE WITHOUT NOTICE. *DO NOT USE*
-func (f genHelperDecoder) DecScratchArrayBuffer() *[scratchByteArrayLen]byte {
+func (f genHelperDecoder) DecScratchArrayBuffer() *[decScratchByteArrayLen]byte {
 	return &f.d.b
 }
 // FOR USE BY CODECGEN ONLY. IT *WILL* CHANGE WITHOUT NOTICE. *DO NOT USE*

+ 35 - 27
codec/helper.go

@@ -266,7 +266,7 @@ type typeInfoLoadArray struct {
 	etypes   [typeInfoLoadArrayLen]uintptr
 	sfis     [typeInfoLoadArrayLen]*structFieldInfo
 	sfiidx   [typeInfoLoadArrayLen]sfiIdx
-	_        [32]bool // padding
+	_        [32]byte // padding
 }
 
 // mirror json.Marshaler and json.Unmarshaler here,
@@ -411,6 +411,9 @@ type MapBySlice interface {
 //
 // Deprecated: DO NOT USE DIRECTLY. EXPORTED FOR GODOC BENEFIT. WILL BE REMOVED.
 type BasicHandle struct {
+	// BasicHandle is always a part of a different type.
+	// It doesn't have to fit into it own cache lines.
+
 	// TypeInfos is used to get the type info for any type.
 	//
 	// If not configured, the default TypeInfos is used, which uses struct tag keys: codec, json
@@ -423,7 +426,7 @@ type BasicHandle struct {
 	EncodeOptions
 	DecodeOptions
 	RPCOptions
-	noBuiltInTypeChecker
+	// noBuiltInTypeChecker
 }
 
 func (x *BasicHandle) getBasicHandle() *BasicHandle {
@@ -450,7 +453,7 @@ type Handle interface {
 	newDecDriver(r *Decoder) decDriver
 	isBinary() bool
 	hasElemSeparators() bool
-	IsBuiltinType(rtid uintptr) bool
+	// IsBuiltinType(rtid uintptr) bool
 }
 
 // Raw represents raw formatted bytes.
@@ -568,11 +571,11 @@ func (textEncodingType) isBinary() bool { return false }
 // noBuiltInTypes is embedded into many types which do not support builtins
 // e.g. msgpack, simple, cbor.
 
-type noBuiltInTypeChecker struct{}
-
-func (noBuiltInTypeChecker) IsBuiltinType(rt uintptr) bool { return false }
+// type noBuiltInTypeChecker struct{}
+// func (noBuiltInTypeChecker) IsBuiltinType(rt uintptr) bool { return false }
+// type noBuiltInTypes struct{ noBuiltInTypeChecker }
 
-type noBuiltInTypes struct{ noBuiltInTypeChecker }
+type noBuiltInTypes struct{}
 
 func (noBuiltInTypes) EncodeBuiltin(rt uintptr, v interface{}) {}
 func (noBuiltInTypes) DecodeBuiltin(rt uintptr, v interface{}) {}
@@ -622,9 +625,9 @@ type extHandle []extTypeTagFn
 // To deregister an Ext, call AddExt with nil encfn and/or nil decfn.
 //
 // Deprecated: Use SetBytesExt or SetInterfaceExt on the Handle instead.
-func (o *extHandle) AddExt(
-	rt reflect.Type, tag byte,
-	encfn func(reflect.Value) ([]byte, error), decfn func(reflect.Value, []byte) error) (err error) {
+func (o *extHandle) AddExt(rt reflect.Type, tag byte,
+	encfn func(reflect.Value) ([]byte, error),
+	decfn func(reflect.Value, []byte) error) (err error) {
 	if encfn == nil || decfn == nil {
 		return o.SetExt(rt, uint64(tag), nil)
 	}
@@ -924,22 +927,22 @@ type typeInfo struct {
 	// ---- cpu cache line boundary?
 	// format of marshal type fields below: [btj][mu]p? OR csp?
 
-	bm  bool    // T is a binaryMarshaler
-	bmp bool    // *T is a binaryMarshaler
-	bu  bool    // T is a binaryUnmarshaler
-	bup bool    // *T is a binaryUnmarshaler
-	tm  bool    // T is a textMarshaler
-	tmp bool    // *T is a textMarshaler
-	tu  bool    // T is a textUnmarshaler
-	tup bool    // *T is a textUnmarshaler
-	jm  bool    // T is a jsonMarshaler
-	jmp bool    // *T is a jsonMarshaler
-	ju  bool    // T is a jsonUnmarshaler
-	jup bool    // *T is a jsonUnmarshaler
-	cs  bool    // T is a Selfer
-	csp bool    // *T is a Selfer
-	_   [3]byte // padding
-
+	bm  bool        // T is a binaryMarshaler
+	bmp bool        // *T is a binaryMarshaler
+	bu  bool        // T is a binaryUnmarshaler
+	bup bool        // *T is a binaryUnmarshaler
+	tm  bool        // T is a textMarshaler
+	tmp bool        // *T is a textMarshaler
+	tu  bool        // T is a textUnmarshaler
+	tup bool        // *T is a textUnmarshaler
+	jm  bool        // T is a jsonMarshaler
+	jmp bool        // *T is a jsonMarshaler
+	ju  bool        // T is a jsonUnmarshaler
+	jup bool        // *T is a jsonUnmarshaler
+	cs  bool        // T is a Selfer
+	csp bool        // *T is a Selfer
+	_   [3]byte     // padding
+	_   [4 * 8]byte // padding
 }
 
 // define length beyond which we do a binary search instead of a linear search.
@@ -985,9 +988,11 @@ type rtid2ti struct {
 // It is configured with a set of tag keys, which are used to get
 // configuration for the type.
 type TypeInfos struct {
-	infos atomicTypeInfoSlice // formerly map[uintptr]*typeInfo, now *[]rtid2ti
+	// infos: formerly map[uintptr]*typeInfo, now *[]rtid2ti, 2 words expected
+	infos atomicTypeInfoSlice
 	mu    sync.Mutex
 	tags  []string
+	_     [16]byte // padding
 }
 
 // NewTypeInfos creates a TypeInfos given a set of struct tags keys.
@@ -1396,6 +1401,7 @@ type codecFn struct {
 	i  codecFnInfo
 	fe func(*Encoder, *codecFnInfo, reflect.Value)
 	fd func(*Decoder, *codecFnInfo, reflect.Value)
+	_  [8]byte // padding
 }
 
 type codecRtidFn struct {
@@ -1409,6 +1415,8 @@ type codecFner struct {
 	s  []codecRtidFn
 	be bool
 	js bool
+	_  [6]byte     // padding
+	_  [3 * 8]byte // padding
 }
 
 func (c *codecFner) reset(hh Handle) {

+ 1 - 1
codec/helper_not_unsafe.go

@@ -83,7 +83,7 @@ func i2rtid(i interface{}) uintptr {
 // }
 
 // --------------------------
-type atomicTypeInfoSlice struct {
+type atomicTypeInfoSlice struct { // expected to be 2 words
 	v atomic.Value
 }
 

+ 2 - 1
codec/helper_unsafe.go

@@ -137,8 +137,9 @@ func i2rtid(i interface{}) uintptr {
 // }
 
 // --------------------------
-type atomicTypeInfoSlice struct {
+type atomicTypeInfoSlice struct { // expected to be 2 words
 	v unsafe.Pointer
+	_ [8]byte // padding
 }
 
 func (x *atomicTypeInfoSlice) load() *[]rtid2ti {

+ 45 - 43
codec/json.go

@@ -74,17 +74,6 @@ const (
 	jsonScratchArrayLen = 64
 )
 
-var (
-	// jsonTabs and jsonSpaces are used as caches for indents
-	jsonTabs, jsonSpaces string
-
-	jsonCharHtmlSafeSet   bitset128
-	jsonCharSafeSet       bitset128
-	jsonCharWhitespaceSet bitset256
-	jsonNumSet            bitset256
-	// jsonIsFloatSet        bitset256
-)
-
 const (
 	// If !jsonValidateSymbols, decoding will be faster, by skipping some checks:
 	//   - If we see first character of null, false or true,
@@ -101,17 +90,22 @@ const (
 	jsonAlwaysReturnInternString = false
 )
 
-func init() {
-	var bs [jsonSpacesOrTabsLen]byte
-	for i := 0; i < jsonSpacesOrTabsLen; i++ {
-		bs[i] = ' '
-	}
-	jsonSpaces = string(bs[:])
+var (
+	// jsonTabs and jsonSpaces are used as caches for indents
+	jsonTabs, jsonSpaces [jsonSpacesOrTabsLen]byte
+
+	jsonCharHtmlSafeSet   bitset128
+	jsonCharSafeSet       bitset128
+	jsonCharWhitespaceSet bitset256
+	jsonNumSet            bitset256
+	// jsonIsFloatSet        bitset256
+)
 
+func init() {
 	for i := 0; i < jsonSpacesOrTabsLen; i++ {
-		bs[i] = '\t'
+		jsonSpaces[i] = ' '
+		jsonTabs[i] = '\t'
 	}
-	jsonTabs = string(bs[:])
 
 	// populate the safe values as true: note: ASCII control characters are (0-31)
 	// jsonCharSafeSet:     all true except (0-31) " \
@@ -230,16 +224,18 @@ func (e *jsonEncDriverTypical) atEndOfEncode() {
 // ----------------
 
 type jsonEncDriverGeneric struct {
-	w  encWriter // encWriter // *encWriterSwitch
-	b  *[jsonScratchArrayLen]byte
-	ds string // indent string
-	d  bool   // indent
-	dt bool   // indent using tabs
-	dl uint16 // indent level
-	ks bool   // map key as string
-	is byte   // integer as string
-	tw bool   // term white space
-	c  containerState
+	w encWriter // encWriter // *encWriterSwitch
+	b *[jsonScratchArrayLen]byte
+	c containerState
+	// ds string // indent string
+	di int8    // indent per
+	d  bool    // indenting?
+	dt bool    // indent using tabs
+	dl uint16  // indent level
+	ks bool    // map key as string
+	is byte    // integer as string
+	tw bool    // term white space
+	_  [7]byte // padding
 }
 
 // indent is done as below:
@@ -252,15 +248,15 @@ func (e *jsonEncDriverGeneric) reset(ee *jsonEncDriver) {
 	e.b = &ee.b
 	e.tw = ee.h.TermWhitespace
 	e.c = 0
-	e.d, e.dt, e.dl, e.ds = false, false, 0, ""
+	e.d, e.dt, e.dl, e.di = false, false, 0, 0
 	h := ee.h
 	if h.Indent > 0 {
 		e.d = true
-		e.ds = jsonSpaces[:h.Indent]
+		e.di = int8(h.Indent)
 	} else if h.Indent < 0 {
 		e.d = true
 		e.dt = true
-		e.ds = jsonTabs[:-(h.Indent)]
+		e.di = int8(-h.Indent)
 	}
 	e.ks = h.MapKeyAsString
 	e.is = h.IntegerAsString
@@ -335,16 +331,19 @@ func (e *jsonEncDriverGeneric) WriteMapEnd() {
 
 func (e *jsonEncDriverGeneric) writeIndent() {
 	e.w.writen1('\n')
-	if x := len(e.ds) * int(e.dl); x <= jsonSpacesOrTabsLen {
-		if e.dt {
-			e.w.writestr(jsonTabs[:x])
-		} else {
-			e.w.writestr(jsonSpaces[:x])
+	x := int(e.di) * int(e.dl)
+	if e.dt {
+		for x > jsonSpacesOrTabsLen {
+			e.w.writeb(jsonTabs[:])
+			x -= jsonSpacesOrTabsLen
 		}
+		e.w.writeb(jsonTabs[:x])
 	} else {
-		for i := uint16(0); i < e.dl; i++ {
-			e.w.writestr(e.ds)
+		for x > jsonSpacesOrTabsLen {
+			e.w.writeb(jsonSpaces[:])
+			x -= jsonSpacesOrTabsLen
 		}
+		e.w.writeb(jsonSpaces[:x])
 	}
 }
 
@@ -604,6 +603,7 @@ type jsonDecDriver struct {
 	b  [jsonScratchArrayLen]byte // scratch 1, used for parsing strings or numbers or time.Time
 	b2 [jsonScratchArrayLen]byte // scratch 2, used only for readUntil, decNumBytes
 
+	_ [3 * 8]byte // padding
 	// n jsonNum
 }
 
@@ -1229,10 +1229,6 @@ type JsonHandle struct {
 	textEncodingType
 	BasicHandle
 
-	// RawBytesExt, if configured, is used to encode and decode raw bytes in a custom way.
-	// If not configured, raw bytes are encoded to/from base64 text.
-	RawBytesExt InterfaceExt
-
 	// Indent indicates how a value is encoded.
 	//   - If positive, indent by that number of spaces.
 	//   - If negative, indent by that number of tabs.
@@ -1274,6 +1270,12 @@ type JsonHandle struct {
 	// Use this to enforce strict json output.
 	// The only caveat is that nil value is ALWAYS written as null (never as "null")
 	MapKeyAsString bool
+
+	// ---- cache line
+
+	// RawBytesExt, if configured, is used to encode and decode raw bytes in a custom way.
+	// If not configured, raw bytes are encoded to/from base64 text.
+	RawBytesExt InterfaceExt
 }
 
 // Name returns the name of the handle: json

+ 13 - 8
codec/msgpack.go

@@ -114,6 +114,7 @@ type msgpackEncDriver struct {
 	w encWriter
 	h *MsgpackHandle
 	x [8]byte
+	_ [3 * 8]byte // padding
 }
 
 func (e *msgpackEncDriver) EncodeNil() {
@@ -324,10 +325,10 @@ func (e *msgpackEncDriver) writeContainerLen(ct msgpackContainerType, l int) {
 //---------------------------------------------
 
 type msgpackDecDriver struct {
-	d      *Decoder
-	r      decReader // *Decoder decReader decReaderT
-	h      *MsgpackHandle
-	b      [scratchByteArrayLen]byte
+	d *Decoder
+	r decReader // *Decoder decReader decReaderT
+	h *MsgpackHandle
+	// b      [scratchByteArrayLen]byte
 	bd     byte
 	bdRead bool
 	br     bool // bytes reader
@@ -335,6 +336,7 @@ type msgpackDecDriver struct {
 	// noStreamingCodec
 	// decNoSeparator
 	decDriverNoopContainerReader
+	_ [3 * 8]byte // padding
 }
 
 // Note: This returns either a primitive (int, bool, etc) for non-containers,
@@ -594,6 +596,9 @@ func (d *msgpackDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte)
 	case valueTypeString:
 		clen = d.readContainerLen(msgpackContainerStr)
 	case valueTypeArray:
+		if zerocopy && len(bs) == 0 {
+			bs = d.d.b[:]
+		}
 		bsOut, _ = fastpathTV.DecSliceUint8V(bs, true, d.d)
 		return
 	default:
@@ -611,18 +616,18 @@ func (d *msgpackDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte)
 		if d.br {
 			return d.r.readx(clen)
 		} else if len(bs) == 0 {
-			bs = d.b[:]
+			bs = d.d.b[:]
 		}
 	}
-	return decByteSlice(d.r, clen, d.d.h.MaxInitLen, bs)
+	return decByteSlice(d.r, clen, d.h.MaxInitLen, bs)
 }
 
 func (d *msgpackDecDriver) DecodeString() (s string) {
-	return string(d.DecodeBytes(d.b[:], true))
+	return string(d.DecodeBytes(d.d.b[:], true))
 }
 
 func (d *msgpackDecDriver) DecodeStringAsBytes() (s []byte) {
-	return d.DecodeBytes(d.b[:], true)
+	return d.DecodeBytes(d.d.b[:], true)
 }
 
 func (d *msgpackDecDriver) readNextBd() {

+ 9 - 4
codec/simple.go

@@ -41,6 +41,7 @@ type simpleEncDriver struct {
 	// c containerState
 	encDriverTrackContainerWriter
 	// encDriverNoopContainerWriter
+	_ [2 * 8]byte // padding
 }
 
 func (e *simpleEncDriver) EncodeNil() {
@@ -205,10 +206,11 @@ type simpleDecDriver struct {
 	bd     byte
 	br     bool // a bytes reader?
 	c      containerState
-	b      [scratchByteArrayLen]byte
+	// b      [scratchByteArrayLen]byte
 	noBuiltInTypes
 	// noStreamingCodec
 	decDriverNoopContainerReader
+	_ [3 * 8]byte // padding
 }
 
 func (d *simpleDecDriver) readNextBd() {
@@ -417,11 +419,11 @@ func (d *simpleDecDriver) decLen() int {
 }
 
 func (d *simpleDecDriver) DecodeString() (s string) {
-	return string(d.DecodeBytes(d.b[:], true))
+	return string(d.DecodeBytes(d.d.b[:], true))
 }
 
 func (d *simpleDecDriver) DecodeStringAsBytes() (s []byte) {
-	return d.DecodeBytes(d.b[:], true)
+	return d.DecodeBytes(d.d.b[:], true)
 }
 
 func (d *simpleDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
@@ -434,6 +436,9 @@ func (d *simpleDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 	}
 	// check if an "array" of uint8's (see ContainerType for how to infer if an array)
 	if d.bd >= simpleVdArray && d.bd <= simpleVdMap+4 {
+		if len(bs) == 0 && zerocopy {
+			bs = d.d.b[:]
+		}
 		bsOut, _ = fastpathTV.DecSliceUint8V(bs, true, d.d)
 		return
 	}
@@ -444,7 +449,7 @@ func (d *simpleDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 		if d.br {
 			return d.r.readx(clen)
 		} else if len(bs) == 0 {
-			bs = d.b[:]
+			bs = d.d.b[:]
 		}
 	}
 	return decByteSlice(d.r, clen, d.d.h.MaxInitLen, bs)