فهرست منبع

codec: false-sharing and cache-line optimizations

Take second look into false-sharing and cache-line optimizations
- Focus on ensuring that shared values occupy full cache lines
- Secondarily, try to help non-shared allocated values occupy full cache lines
- Move writable objects to be together in same cache line

Trim some structs by merging multiple bools into a uint8 flag,
and re-using an atomic value of size uint32.
Ugorji Nwoke 6 سال پیش
والد
کامیت
967be11608
9فایلهای تغییر یافته به همراه158 افزوده شده و 97 حذف شده
  1. 3 3
      codec/binc.go
  2. 13 8
      codec/build.sh
  3. 1 1
      codec/cbor.go
  4. 19 15
      codec/decode.go
  5. 72 29
      codec/helper.go
  6. 3 3
      codec/helper_unsafe.go
  7. 32 29
      codec/json.go
  8. 13 7
      codec/msgpack.go
  9. 2 2
      codec/simple.go

+ 3 - 3
codec/binc.go

@@ -110,7 +110,7 @@ type bincEncDriver struct {
 	encDriverTrackContainerWriter
 	noBuiltInTypes
 	// encNoSeparator
-	_ [1]uint64 // padding
+	// _ [1]uint64 // padding
 }
 
 func (e *bincEncDriver) EncodeNil() {
@@ -409,7 +409,7 @@ type bincDecDriver struct {
 	bd     byte
 	vd     byte
 	vs     byte
-	_      [3]byte // padding
+	// _      [3]byte // padding
 	// linear searching on this slice is ok,
 	// because we typically expect < 32 symbols in each stream.
 	s []bincDecSymbol
@@ -1007,7 +1007,7 @@ type BincHandle struct {
 	// - n: none
 	// - a: all: same as m, s, ...
 
-	// _ [1]uint64 // padding
+	_ [1]uint64 // padding (cache-aligned)
 }
 
 // Name returns the name of the handle: binc

+ 13 - 8
codec/build.sh

@@ -154,9 +154,9 @@ _codegenerators() {
 _prebuild() {
     echo "prebuild: zforce: $zforce"
     local d="$PWD"
-    zfin="test_values.generated.go"
-    zfin2="test_values_flex.generated.go"
-    zpkg="github.com/ugorji/go/codec"
+    local zfin="test_values.generated.go"
+    local zfin2="test_values_flex.generated.go"
+    local zpkg="github.com/ugorji/go/codec"
     # zpkg=${d##*/src/}
     # zgobase=${d%%/src/*}
     # rm -f *_generated_test.go 
@@ -169,13 +169,14 @@ _prebuild() {
         if [[ $zforce ]]; then go install ${zargs[*]} .; fi &&
         echo "prebuild done successfully"
     rm -f $d/$zfin $d/$zfin2
-    unset zfin zfin2 zpkg
+    # unset zfin zfin2 zpkg
 }
 
 _make() {
+    local makeforce=${zforce}
     zforce=1
     (cd codecgen && go install ${zargs[*]} .) && _prebuild && go install ${zargs[*]} .
-    unset zforce
+    zforce=${makeforce}
 }
 
 _clean() {
@@ -200,6 +201,7 @@ _release() {
 EOF
     # # go 1.6 and below kept giving memory errors on Mac OS X during SDK build or go run execution,
     # # that is fine, as we only explicitly test the last 3 releases and tip (2 years).
+    local makeforce=${zforce}
     zforce=1
     for i in 1.10 1.11 1.12 master
     do
@@ -216,7 +218,7 @@ EOF
             _tests "$@"
         if [[ "$?" != 0 ]]; then return 1; fi
     done
-    unset zforce
+    zforce=${makeforce}
     echo "++++++++ RELEASE TEST SUITES ALL PASSED ++++++++"
 }
 
@@ -232,7 +234,10 @@ EOF
 _main() {
     if [[ -z "$1" ]]; then _usage; return 1; fi
     local x
-    unset zforce
+    local zforce
+    local zargs
+    local zbenchflags
+    # unset zforce
     zargs=()
     zbenchflags=""
     OPTIND=1
@@ -261,7 +266,7 @@ _main() {
         'xz') _analyze "$@" ;;
         'xb') _bench "$@" ;;
     esac
-    unset zforce zargs zbenchflags
+    # unset zforce zargs zbenchflags
 }
 
 [ "." = `dirname $0` ] && _main "$@"

+ 1 - 1
codec/cbor.go

@@ -735,7 +735,7 @@ type CborHandle struct {
 	// If unset, we encode time.Time using seconds past epoch.
 	TimeRFC3339 bool
 
-	// _ [1]uint64 // padding
+	_ [1]uint64 // padding (cache-aligned)
 }
 
 // Name returns the name of the handle: cbor

+ 19 - 15
codec/decode.go

@@ -23,8 +23,8 @@ const (
 const (
 	decDefMaxDepth         = 1024 // maximum depth
 	decDefSliceCap         = 8
-	decDefChanCap          = 64            // should be large, as cap cannot be expanded
-	decScratchByteArrayLen = cacheLineSize // + (8 * 2) // - (8 * 1)
+	decDefChanCap          = 64                // should be large, as cap cannot be expanded
+	decScratchByteArrayLen = cacheLineSize - 4 // + (8 * 2) // - (8 * 1)
 )
 
 var (
@@ -326,8 +326,8 @@ type ioDecReader struct {
 	rr io.Reader
 	br io.ByteScanner
 
-	x [scratchByteArrayLen]byte // for: get struct field name, swallow valueTypeBytes, etc
-	_ [1]uint64                 // padding
+	x [scratchByteArrayLen + 8]byte // for: get struct field name, swallow valueTypeBytes, etc
+	// _ [1]uint64                 // padding
 }
 
 func (z *ioDecReader) reset(r io.Reader) {
@@ -549,6 +549,7 @@ func (z *ioDecReader) unreadn1() {
 
 type bufioDecReader struct {
 	ioDecReaderCommon
+	_ uint64 // padding (cache-aligned)
 
 	c   uint // cursor
 	buf []byte
@@ -563,8 +564,6 @@ type bufioDecReader struct {
 	calls uint16 // what depth in mustDecode are we in now.
 
 	_ [6]uint8 // padding
-
-	_ [1]uint64 // padding
 }
 
 func (z *bufioDecReader) reset(r io.Reader, bufsize int) {
@@ -1944,7 +1943,7 @@ type decNaked struct {
 
 	// state
 	v valueType
-	_ [6]bool // padding
+	// _ [6]bool // padding
 
 	// ru, ri, rf, rl, rs, rb, rt reflect.Value // mapping to the primitives above
 	//
@@ -2286,7 +2285,7 @@ type Decoder struct {
 
 	d decDriver
 
-	// NOTE: Decoder shouldn't call it's read methods,
+	// NOTE: Decoder shouldn't call its read methods,
 	// as the handler MAY need to do some coordination.
 	r *decReaderSwitch
 
@@ -2305,17 +2304,20 @@ type Decoder struct {
 	n decNaked
 
 	// cr containerStateRecv
-	err error
-
-	depth    int16
-	maxdepth int16
 
-	_ [4]uint8 // padding
+	// _ [4]uint8 // padding
 
 	is map[string]string // used for interning strings
 
+	_ uintptr // padding (so scratch is in its own cache line)
+
+	err error
+
 	// ---- cpu cache line boundary?
-	b [decScratchByteArrayLen]byte // scratch buffer, used by Decoder and xxxEncDrivers
+	// ---- writable fields during execution --- *try* to keep in sep cache line
+	maxdepth int16
+	depth    int16
+	b        [decScratchByteArrayLen]byte // scratch buffer, used by Decoder and xxxEncDrivers
 
 	// padding - false sharing help // modify 232 if Decoder struct changes.
 	// _ [cacheLineSize - 232%cacheLineSize]byte
@@ -2510,7 +2512,9 @@ func (d *Decoder) Decode(v interface{}) (err error) {
 		defer func() {
 			if x := recover(); x != nil {
 				panicValToErr(d, x, &d.err)
-				err = d.err
+				if d.err != err {
+					err = d.err
+				}
 			}
 		}()
 	}

+ 72 - 29
codec/helper.go

@@ -170,6 +170,14 @@ func init() {
 	refBitset.set(byte(reflect.Chan))
 }
 
+type handleFlag uint8
+
+const (
+	initedHandleFlag handleFlag = 1 << iota
+	binaryHandleFlag
+	jsonHandleFlag
+)
+
 type clsErr struct {
 	closed    bool  // is it closed?
 	errClosed error // error on closing
@@ -293,6 +301,7 @@ const (
 	typeInfoLoadArrayBLen      = 8 * 4
 )
 
+// typeInfoLoad is a transient object used while loading up a typeInfo.
 type typeInfoLoad struct {
 	// fNames   []string
 	// encNames []string
@@ -300,6 +309,8 @@ type typeInfoLoad struct {
 	sfis   []structFieldInfo
 }
 
+// typeInfoLoadArray is a cache object used to efficiently load up a typeInfo without
+// much allocation.
 type typeInfoLoadArray struct {
 	// fNames   [typeInfoLoadArrayLen]string
 	// encNames [typeInfoLoadArrayLen]string
@@ -309,6 +320,12 @@ type typeInfoLoadArray struct {
 	b      [typeInfoLoadArrayBLen]byte // scratch - used for struct field names
 }
 
+// // cacheLineSafer denotes that a type is safe for cache-line access.
+// // This could mean that
+// type cacheLineSafer interface {
+// 	cacheLineSafe()
+// }
+
 // mirror json.Marshaler and json.Unmarshaler here,
 // so we don't import the encoding/json package
 
@@ -522,10 +539,9 @@ type BasicHandle struct {
 
 	intf2impls
 
-	inited uint32
-	_      uint32 // padding
+	EncodeOptions
 
-	// ---- cache line
+	DecodeOptions
 
 	RPCOptions
 
@@ -559,23 +575,21 @@ type BasicHandle struct {
 	//    runtime.SetFinalizer(d, (*Decoder).Release)
 	ExplicitRelease bool
 
-	be bool   // is handle a binary encoding?
-	js bool   // is handle javascript handler?
-	n  byte   // first letter of handle name
-	_  uint16 // padding
+	// flags handleFlag // holds flag for if binaryEncoding, jsonHandler, etc
+	// be    bool       // is handle a binary encoding?
+	// js    bool       // is handle javascript handler?
+	// n  byte // first letter of handle name
+	// _  uint16 // padding
 
 	// ---- cache line
 
-	DecodeOptions
-
-	// ---- cache line
-
-	EncodeOptions
-
 	// noBuiltInTypeChecker
 
+	inited uint32 // holds if inited, and also handle flags (binary encoding, json handler, etc)
+	mu     sync.Mutex
+	// _      uint32 // padding
 	rtidFns atomicRtidFnSlice
-	mu      sync.Mutex
+
 	// r []uintptr     // rtids mapped to s above
 }
 
@@ -599,15 +613,29 @@ func basicHandle(hh Handle) (x *BasicHandle) {
 	return
 }
 
+func (x *BasicHandle) isJs() bool {
+	return handleFlag(x.inited)&jsonHandleFlag != 0
+}
+
+func (x *BasicHandle) isBe() bool {
+	return handleFlag(x.inited)&binaryHandleFlag != 0
+}
+
 //go:noinline
 func (x *BasicHandle) init(hh Handle) {
 	// make it uninlineable, as it is called at most once
 	x.mu.Lock()
 	if x.inited == 0 {
-		x.be = hh.isBinary()
-		_, x.js = hh.(*JsonHandle)
-		x.n = hh.Name()[0]
-		atomic.StoreUint32(&x.inited, 1)
+		var f = initedHandleFlag
+		if hh.isBinary() {
+			f |= binaryHandleFlag
+		}
+		if _, b := hh.(*JsonHandle); b {
+			f |= jsonHandleFlag
+		}
+		// _, x.js = hh.(*JsonHandle)
+		// x.n = hh.Name()[0]
+		atomic.StoreUint32(&x.inited, uint32(f))
 	}
 	x.mu.Unlock()
 }
@@ -691,20 +719,21 @@ func (x *BasicHandle) fn(rt reflect.Type, checkFastpath, checkCodecSelfer bool)
 		if rk == reflect.Struct || rk == reflect.Array {
 			fi.addrE = true
 		}
-	} else if supportMarshalInterfaces && c.be && (ti.bm || ti.bmp) && (ti.bu || ti.bup) {
+	} else if supportMarshalInterfaces && c.isBe() && (ti.bm || ti.bmp) && (ti.bu || ti.bup) {
 		fn.fe = (*Encoder).binaryMarshal
 		fn.fd = (*Decoder).binaryUnmarshal
 		fi.addrF = true
 		fi.addrD = ti.bup
 		fi.addrE = ti.bmp
-	} else if supportMarshalInterfaces && !c.be && c.js && (ti.jm || ti.jmp) && (ti.ju || ti.jup) {
+	} else if supportMarshalInterfaces && !c.isBe() && c.isJs() &&
+		(ti.jm || ti.jmp) && (ti.ju || ti.jup) {
 		//If JSON, we should check JSONMarshal before textMarshal
 		fn.fe = (*Encoder).jsonMarshal
 		fn.fd = (*Decoder).jsonUnmarshal
 		fi.addrF = true
 		fi.addrD = ti.jup
 		fi.addrE = ti.jmp
-	} else if supportMarshalInterfaces && !c.be && (ti.tm || ti.tmp) && (ti.tu || ti.tup) {
+	} else if supportMarshalInterfaces && !c.isBe() && (ti.tm || ti.tmp) && (ti.tu || ti.tup) {
 		fn.fe = (*Encoder).textMarshal
 		fn.fd = (*Decoder).textUnmarshal
 		fi.addrF = true
@@ -1057,7 +1086,7 @@ type extTypeTagFn struct {
 	rt      reflect.Type
 	tag     uint64
 	ext     Ext
-	_       [1]uint64 // padding
+	// _       [1]uint64 // padding
 }
 
 type extHandle []extTypeTagFn
@@ -1114,7 +1143,7 @@ func (o *extHandle) SetExt(rt reflect.Type, tag uint64, ext Ext) (err error) {
 		}
 	}
 	rtidptr := rt2id(reflect.PtrTo(rt))
-	*o = append(o2, extTypeTagFn{rtid, rtidptr, rt, tag, ext, [1]uint64{}})
+	*o = append(o2, extTypeTagFn{rtid, rtidptr, rt, tag, ext}) // , [1]uint64{}})
 	return
 }
 
@@ -1222,7 +1251,7 @@ type structFieldInfo struct {
 
 	encNameAsciiAlphaNum bool // the encName only contains ascii alphabet and numbers
 	structFieldInfoFlag
-	_ [1]byte // padding
+	// _ [1]byte // padding
 }
 
 func (si *structFieldInfo) setToZeroValue(v reflect.Value) {
@@ -1411,7 +1440,8 @@ const (
 	typeInfoFlagIsZeroerPtr
 )
 
-// typeInfo keeps information about each (non-ptr) type referenced in the encode/decode sequence.
+// typeInfo keeps static (non-changing readonly)information
+// about each (non-ptr) type referenced in the encode/decode sequence.
 //
 // During an encode/decode sequence, we work as below:
 //   - If base is a built in type, en/decode base value
@@ -1470,8 +1500,8 @@ type typeInfo struct {
 	flags              typeInfoFlag
 	infoFieldOmitempty bool
 
-	_ [6]byte   // padding
-	_ [2]uint64 // padding
+	// _ [6]byte   // padding
+	// _ [2]uint64 // padding
 }
 
 func (ti *typeInfo) isFlag(f typeInfoFlag) bool {
@@ -1509,8 +1539,9 @@ type TypeInfos struct {
 	// infos: formerly map[uintptr]*typeInfo, now *[]rtid2ti, 2 words expected
 	infos atomicTypeInfoSlice
 	mu    sync.Mutex
+	_     uint64 // padding (cache-aligned)
 	tags  []string
-	_     [2]uint64 // padding
+	_     uint64 // padding (cache-aligned)
 }
 
 // NewTypeInfos creates a TypeInfos given a set of struct tags keys.
@@ -2037,7 +2068,7 @@ type codecFn struct {
 	i  codecFnInfo
 	fe func(*Encoder, *codecFnInfo, reflect.Value)
 	fd func(*Decoder, *codecFnInfo, reflect.Value)
-	_  [1]uint64 // padding
+	_  [1]uint64 // padding (cache-aligned)
 }
 
 type codecRtidFn struct {
@@ -2161,6 +2192,8 @@ type set []uintptr
 func (s *set) add(v uintptr) (exists bool) {
 	// e.ci is always nil, or len >= 1
 	x := *s
+	// defer func() { xdebugf("set.add: len: %d", len(x)) }()
+
 	if x == nil {
 		x = make([]uintptr, 1, 8)
 		x[0] = v
@@ -2558,6 +2591,16 @@ func xdebugf(pattern string, args ...interface{}) {
 	fmt.Printf("\033[1;31m"+pattern+delim+"\033[0m", args...)
 }
 
+// xdebug2f printf. the message in blue on the terminal.
+// Use it in place of fmt.Printf (which it calls internally)
+func xdebug2f(pattern string, args ...interface{}) {
+	var delim string
+	if len(pattern) > 0 && pattern[len(pattern)-1] != '\n' {
+		delim = "\n"
+	}
+	fmt.Printf("\033[1;34m"+pattern+delim+"\033[0m", args...)
+}
+
 // func isImmutableKind(k reflect.Kind) (v bool) {
 // 	return false ||
 // 		k == reflect.Int ||

+ 3 - 3
codec/helper_unsafe.go

@@ -186,7 +186,7 @@ func isEmptyValue(v reflect.Value, tinfos *TypeInfos, deref, checkStruct bool) b
 // ----------------------
 type atomicTypeInfoSlice struct {
 	v unsafe.Pointer // *[]rtid2ti
-	_ uintptr        // padding (atomicXXX expected to be 2 words)
+	_ uint64         // padding (atomicXXX expected to be 2 words)
 }
 
 func (x *atomicTypeInfoSlice) load() (s []rtid2ti) {
@@ -204,7 +204,7 @@ func (x *atomicTypeInfoSlice) store(p []rtid2ti) {
 // --------------------------
 type atomicRtidFnSlice struct {
 	v unsafe.Pointer // *[]codecRtidFn
-	_ uintptr        // padding (atomicXXX expected to be 2 words)
+	// _ uint64         // padding (atomicXXX expected to be 2 words) (make 1 word so JsonHandle fits)
 }
 
 func (x *atomicRtidFnSlice) load() (s []codecRtidFn) {
@@ -222,7 +222,7 @@ func (x *atomicRtidFnSlice) store(p []codecRtidFn) {
 // --------------------------
 type atomicClsErr struct {
 	v unsafe.Pointer // *clsErr
-	_ uintptr        // padding (atomicXXX expected to be 2 words)
+	_ uint64         // padding (atomicXXX expected to be 2 words)
 }
 
 func (x *atomicClsErr) load() (e clsErr) {

+ 32 - 29
codec/json.go

@@ -61,7 +61,7 @@ const (
 	jsonU4Chk1 = 'a' - 10
 	jsonU4Chk0 = 'A' - 10
 
-	jsonScratchArrayLen = 64
+	jsonScratchArrayLen = cacheLineSize // 64
 )
 
 const (
@@ -122,6 +122,7 @@ func init() {
 
 type jsonEncDriverTypical struct {
 	jsonEncDriver
+	_ uint64 // padding
 }
 
 func (e *jsonEncDriverTypical) typical() {}
@@ -201,14 +202,14 @@ func (e *jsonEncDriverTypical) EncodeFloat32(f float32) {
 type jsonEncDriverGeneric struct {
 	jsonEncDriver
 	// ds string // indent string
-	di int8      // indent per
-	d  bool      // indenting?
-	dt bool      // indent using tabs
-	dl uint16    // indent level
-	ks bool      // map key as string
-	is byte      // integer as string
-	_  byte      // padding
-	_  [2]uint64 // padding
+	di int8   // indent per
+	d  bool   // indenting?
+	dt bool   // indent using tabs
+	dl uint16 // indent level
+	ks bool   // map key as string
+	is byte   // integer as string
+	// _  byte      // padding
+	// _  [2]uint64 // padding
 }
 
 // indent is done as below:
@@ -393,18 +394,16 @@ func (e *jsonEncDriverGeneric) EncodeFloat32(f float32) {
 
 type jsonEncDriver struct {
 	noBuiltInTypes
+	bs []byte // for encoding strings
+	// scratch: encode time, numbers, etc. Note: leave 1 byte for containerState
+	b [jsonScratchArrayLen - 24 - 1]byte // leave space for bs(len,cap), containerState
+	c containerState
+	// _ [2]uint64                 // padding
+	// ---- cpu cache line boundary?
 	e  *Encoder
 	h  *JsonHandle
 	w  *encWriterSwitch
 	se extWrapper
-	// ---- cpu cache line boundary?
-	bs []byte // scratch
-	// ---- cpu cache line boundary?
-	// scratch: encode time, etc.
-	// include scratch buffer and padding, but leave space for containerstate
-	b [jsonScratchArrayLen + 8 + 8 - 1]byte
-	c containerState
-	// _ [2]uint64                 // padding
 }
 
 func (e *jsonEncDriver) EncodeNil() {
@@ -610,15 +609,16 @@ type jsonDecDriver struct {
 
 	// ---- writable fields during execution --- *try* to keep in sep cache line
 
-	c containerState
-	// tok is used to store the token read right after skipWhiteSpace.
-	tok   uint8
-	fnull bool    // found null from appendStringAsBytes
-	bs    []byte  // scratch. Initialized from b. Used for parsing strings or numbers.
-	bstr  [8]byte // scratch used for string \UXXX parsing
 	// ---- cpu cache line boundary?
-	b  [jsonScratchArrayLen]byte // scratch 1, used for parsing strings or numbers or time.Time
-	b2 [jsonScratchArrayLen]byte // scratch 2, used only for readUntil, decNumBytes
+	bs []byte                         // scratch, initialized from b. For parsing strings or numbers.
+	b  [jsonScratchArrayLen - 16]byte // scratch 1, used for parsing strings or numbers or time.Time
+	// ---- cpu cache line boundary?
+	c     containerState
+	tok   uint8                         // used to store the token read right after skipWhiteSpace
+	fnull bool                          // found null from appendStringAsBytes
+	_     byte                          // padding
+	bstr  [4]byte                       // scratch used for string \UXXX parsing
+	b2    [jsonScratchArrayLen - 8]byte // scratch 2, used only for readUntil, decNumBytes
 
 	// _ [3]uint64 // padding
 	// n jsonNum
@@ -1346,15 +1346,16 @@ type JsonHandle struct {
 	// The only caveat is that nil value is ALWAYS written as null (never as "null")
 	MapKeyAsString bool
 
-	// _ [2]byte // padding
+	// _ uint64 // padding (cache line)
 
-	// Note: below, we store hardly-used items e.g. RawBytesExt is cached in the (en|de)cDriver.
+	// Note: below, we store hardly-used items
+	// e.g. RawBytesExt (which is already cached in the (en|de)cDriver).
 
 	// RawBytesExt, if configured, is used to encode and decode raw bytes in a custom way.
 	// If not configured, raw bytes are encoded to/from base64 text.
 	RawBytesExt InterfaceExt
 
-	_ [2]uint64 // padding
+	// _ [2]uint64 // padding
 }
 
 // Name returns the name of the handle: json
@@ -1407,7 +1408,9 @@ func (h *JsonHandle) newDecDriver(d *Decoder) decDriver {
 func (e *jsonEncDriver) reset() {
 	e.w = e.e.w
 	e.se.InterfaceExt = e.h.RawBytesExt
-	if e.bs != nil {
+	if e.bs == nil {
+		e.bs = e.b[:0]
+	} else {
 		e.bs = e.bs[:0]
 	}
 	e.c = 0

+ 13 - 7
codec/msgpack.go

@@ -705,9 +705,11 @@ func (d *msgpackDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte)
 		return
 	} else if bd == mpBin8 || bd == mpBin16 || bd == mpBin32 {
 		clen = d.readContainerLen(msgpackContainerBin) // binary
-	} else if bd == mpStr8 || bd == mpStr16 || bd == mpStr32 || (bd >= mpFixStrMin && bd <= mpFixStrMax) {
+	} else if bd == mpStr8 || bd == mpStr16 || bd == mpStr32 ||
+		(bd >= mpFixStrMin && bd <= mpFixStrMax) {
 		clen = d.readContainerLen(msgpackContainerStr) // string/raw
-	} else if bd == mpArray16 || bd == mpArray32 || (bd >= mpFixArrayMin && bd <= mpFixArrayMax) {
+	} else if bd == mpArray16 || bd == mpArray32 ||
+		(bd >= mpFixArrayMin && bd <= mpFixArrayMax) {
 		// check if an "array" of uint8's
 		if zerocopy && len(bs) == 0 {
 			bs = d.d.b[:]
@@ -759,9 +761,11 @@ func (d *msgpackDecDriver) ContainerType() (vt valueType) {
 	// 	// nil
 	// } else if bd == mpBin8 || bd == mpBin16 || bd == mpBin32 {
 	// 	// binary
-	// } else if bd == mpStr8 || bd == mpStr16 || bd == mpStr32 || (bd >= mpFixStrMin && bd <= mpFixStrMax) {
+	// } else if bd == mpStr8 || bd == mpStr16 || bd == mpStr32 ||
+	// (bd >= mpFixStrMin && bd <= mpFixStrMax) {
 	// 	// string/raw
-	// } else if bd == mpArray16 || bd == mpArray32 || (bd >= mpFixArrayMin && bd <= mpFixArrayMax) {
+	// } else if bd == mpArray16 || bd == mpArray32 ||
+	// (bd >= mpFixArrayMin && bd <= mpFixArrayMax) {
 	// 	// array
 	// } else if bd == mpMap16 || bd == mpMap32 || (bd >= mpFixMapMin && bd <= mpFixMapMax) {
 	// 	// map
@@ -770,7 +774,8 @@ func (d *msgpackDecDriver) ContainerType() (vt valueType) {
 		return valueTypeNil
 	} else if bd == mpBin8 || bd == mpBin16 || bd == mpBin32 {
 		return valueTypeBytes
-	} else if bd == mpStr8 || bd == mpStr16 || bd == mpStr32 || (bd >= mpFixStrMin && bd <= mpFixStrMax) {
+	} else if bd == mpStr8 || bd == mpStr16 || bd == mpStr32 ||
+		(bd >= mpFixStrMin && bd <= mpFixStrMax) {
 		if d.h.WriteExt || d.h.RawToString { // UTF-8 string (new spec)
 			return valueTypeString
 		}
@@ -870,7 +875,8 @@ func (d *msgpackDecDriver) DecodeTime() (t time.Time) {
 		return
 	} else if bd == mpBin8 || bd == mpBin16 || bd == mpBin32 {
 		clen = d.readContainerLen(msgpackContainerBin) // binary
-	} else if bd == mpStr8 || bd == mpStr16 || bd == mpStr32 || (bd >= mpFixStrMin && bd <= mpFixStrMax) {
+	} else if bd == mpStr8 || bd == mpStr16 || bd == mpStr32 ||
+		(bd >= mpFixStrMin && bd <= mpFixStrMax) {
 		clen = d.readContainerLen(msgpackContainerStr) // string/raw
 	} else {
 		// expect to see mpFixExt4,-1 OR mpFixExt8,-1 OR mpExt8,12,-1
@@ -983,7 +989,7 @@ type MsgpackHandle struct {
 	binaryEncodingType
 	noElemSeparators
 
-	// _ [1]uint64 // padding
+	_ [1]uint64 // padding (cache-aligned)
 }
 
 // Name returns the name of the handle: msgpack

+ 2 - 2
codec/simple.go

@@ -41,7 +41,7 @@ type simpleEncDriver struct {
 	// c containerState
 	encDriverTrackContainerWriter
 	// encDriverNoopContainerWriter
-	_ [3]uint64 // padding
+	_ [2]uint64 // padding (cache-aligned)
 }
 
 func (e *simpleEncDriver) EncodeNil() {
@@ -630,7 +630,7 @@ type SimpleHandle struct {
 	// EncZeroValuesAsNil says to encode zero values for numbers, bool, string, etc as nil
 	EncZeroValuesAsNil bool
 
-	// _ [1]uint64 // padding
+	_ [1]uint64 // padding (cache-aligned)
 }
 
 // Name returns the name of the handle: simple