Bläddra i källkod

codec: support max initial collection length; json fixes; improve perf & reduce alloc drastically

-------------
MaxInitLen Option:
It is possible to force an OutOfMemory crash when using the Decoder.
This is because we "make" a slice based on the pre-fix length in the stream
(e.g. for cbor, msgpack, etc).

To alleviate this, a user can configure a MaxInitLen, and we use that as the max
when making the initial collection (slice, map or chan). We then populate to that,
and do append's after that for any further items up to the length.

To use this, configure MaxInitLen in the Handle.

This support is exposed in runtime-reflection, fast-path and codecgen modes.

-------------
Growcap:
Add a smarter algorithm for append (which takes size class into consideration).
It also adds a simpler implementation for append, which doesn't use varargs
and just handles one append effectively.

-------------
ExpandSliceValue:
It also adds a ExpandSliceValue which does similar work to what
reflect.Append(...) does, but only the Grow part. This way, we use the smarter
append logic and do not require a slice of the varargs.

-------------
Performance: Reduce Allocation in encoding and decoding:
Previously, we wanted value methods, and kept the "optional" fields of
(en|de)cFnInfo in an embedded *(en|de)cFnInfoX struct.
This led to a lot of allocation.

However, we also store them in a slice field of the (En|De)coder.
It is thus easy to grab an internal pointer to them without doing an allocation.

To leverage this, we make all functions take a pointer receiver, and
just pass the internal pointer from the slice to them.

This reduced allocation by over 10% and improved performance.

-------------
sync.Pool for encoding structs while supporting omitEmpty:
Previously, the algorithm was incorrect. We incorrectly bypassed the Pool
about half of the time. The algorithm is now fixed.

-------------
Performance: JSON: reduce allocation using sync.Pool

JSON encoding/decoding uses a "stack" to manage the state as it transitions
in and out of arrays, maps/objects, etc.

The "stack" is implemented using a list. Making those lists leads to much allocation.

Since there's a clear start() and end(), we could use those to grab a slice from
the pool and return it when done parsing/encoding.

-------------
JSON: misc

Also, within DecodeBytes, if !isarray and zerocopy, try to use a scratch byte slice.

Also, remove some unnecessary code at the bottom of jsonStack.sep().
This reduced the function size and made sep inlinable.

Fixes $90
Ugorji Nwoke 10 år sedan
förälder
incheckning
45ce7596ac

+ 269 - 222
codec/decode.go

@@ -106,6 +106,14 @@ type DecodeOptions struct {
 
 	// If SignedInteger, use the int64 during schema-less decoding of unsigned values (not uint64).
 	SignedInteger bool
+
+	// MaxInitLen defines the initial length that we "make" a collection (slice, chan or map) with.
+	// If 0 or negative, we default to a sensible value based on the size of an element in the collection.
+	//
+	// For example, when decoding, a stream may say that it has MAX_UINT elements.
+	// We should not auto-matically provision a slice of that length, to prevent Out-Of-Memory crash.
+	// Instead, we provision up to MaxInitLen, fill that up, and start appending after that.
+	MaxInitLen int
 }
 
 // ------------------------------------
@@ -348,7 +356,7 @@ func (z *bytesDecReader) stopTrack() (bs []byte) {
 
 // ------------------------------------
 
-type decFnInfoX struct {
+type decFnInfo struct {
 	d     *Decoder
 	ti    *typeInfo
 	xfFn  Ext
@@ -356,40 +364,26 @@ type decFnInfoX struct {
 	seq   seqType
 }
 
-// decFnInfo has methods for handling decoding of a specific type
-// based on some characteristics (builtin, extension, reflect Kind, etc)
-type decFnInfo struct {
-	// use decFnInfo as a value receiver.
-	// keep most of it less-used variables accessible via a pointer (*decFnInfoX).
-	// As sweet spot for value-receiver is 3 words, keep everything except
-	// decDriver (which everyone needs) directly accessible.
-	// ensure decFnInfoX is set for everyone who needs it i.e.
-	// rawExt, ext, builtin, (selfer|binary|text)Marshal, kSlice, kStruct, kMap, kInterface, fastpath
-
-	dd decDriver
-	*decFnInfoX
-}
-
 // ----------------------------------------
 
 type decFn struct {
 	i decFnInfo
-	f func(decFnInfo, reflect.Value)
+	f func(*decFnInfo, reflect.Value)
 }
 
-func (f decFnInfo) builtin(rv reflect.Value) {
-	f.dd.DecodeBuiltin(f.ti.rtid, rv.Addr().Interface())
+func (f *decFnInfo) builtin(rv reflect.Value) {
+	f.d.d.DecodeBuiltin(f.ti.rtid, rv.Addr().Interface())
 }
 
-func (f decFnInfo) rawExt(rv reflect.Value) {
-	f.dd.DecodeExt(rv.Addr().Interface(), 0, nil)
+func (f *decFnInfo) rawExt(rv reflect.Value) {
+	f.d.d.DecodeExt(rv.Addr().Interface(), 0, nil)
 }
 
-func (f decFnInfo) ext(rv reflect.Value) {
-	f.dd.DecodeExt(rv.Addr().Interface(), f.xfTag, f.xfFn)
+func (f *decFnInfo) ext(rv reflect.Value) {
+	f.d.d.DecodeExt(rv.Addr().Interface(), f.xfTag, f.xfFn)
 }
 
-func (f decFnInfo) getValueForUnmarshalInterface(rv reflect.Value, indir int8) (v interface{}) {
+func (f *decFnInfo) getValueForUnmarshalInterface(rv reflect.Value, indir int8) (v interface{}) {
 	if indir == -1 {
 		v = rv.Addr().Interface()
 	} else if indir == 0 {
@@ -406,29 +400,29 @@ func (f decFnInfo) getValueForUnmarshalInterface(rv reflect.Value, indir int8) (
 	return
 }
 
-func (f decFnInfo) selferUnmarshal(rv reflect.Value) {
+func (f *decFnInfo) selferUnmarshal(rv reflect.Value) {
 	f.getValueForUnmarshalInterface(rv, f.ti.csIndir).(Selfer).CodecDecodeSelf(f.d)
 }
 
-func (f decFnInfo) binaryUnmarshal(rv reflect.Value) {
+func (f *decFnInfo) binaryUnmarshal(rv reflect.Value) {
 	bm := f.getValueForUnmarshalInterface(rv, f.ti.bunmIndir).(encoding.BinaryUnmarshaler)
-	xbs := f.dd.DecodeBytes(nil, false, true)
+	xbs := f.d.d.DecodeBytes(nil, false, true)
 	if fnerr := bm.UnmarshalBinary(xbs); fnerr != nil {
 		panic(fnerr)
 	}
 }
 
-func (f decFnInfo) textUnmarshal(rv reflect.Value) {
+func (f *decFnInfo) textUnmarshal(rv reflect.Value) {
 	tm := f.getValueForUnmarshalInterface(rv, f.ti.tunmIndir).(encoding.TextUnmarshaler)
-	fnerr := tm.UnmarshalText(f.dd.DecodeBytes(f.d.b[:], true, true))
+	fnerr := tm.UnmarshalText(f.d.d.DecodeBytes(f.d.b[:], true, true))
 	if fnerr != nil {
 		panic(fnerr)
 	}
 }
 
-func (f decFnInfo) jsonUnmarshal(rv reflect.Value) {
+func (f *decFnInfo) jsonUnmarshal(rv reflect.Value) {
 	tm := f.getValueForUnmarshalInterface(rv, f.ti.junmIndir).(jsonUnmarshaler)
-	// bs := f.dd.DecodeBytes(f.d.b[:], true, true)
+	// bs := f.d.d.DecodeBytes(f.d.b[:], true, true)
 	// grab the bytes to be read, as UnmarshalJSON wants the full JSON to unmarshal it itself.
 	f.d.r.track()
 	f.d.swallow()
@@ -440,71 +434,71 @@ func (f decFnInfo) jsonUnmarshal(rv reflect.Value) {
 	}
 }
 
-func (f decFnInfo) kErr(rv reflect.Value) {
+func (f *decFnInfo) kErr(rv reflect.Value) {
 	f.d.errorf("no decoding function defined for kind %v", rv.Kind())
 }
 
-func (f decFnInfo) kString(rv reflect.Value) {
-	rv.SetString(f.dd.DecodeString())
+func (f *decFnInfo) kString(rv reflect.Value) {
+	rv.SetString(f.d.d.DecodeString())
 }
 
-func (f decFnInfo) kBool(rv reflect.Value) {
-	rv.SetBool(f.dd.DecodeBool())
+func (f *decFnInfo) kBool(rv reflect.Value) {
+	rv.SetBool(f.d.d.DecodeBool())
 }
 
-func (f decFnInfo) kInt(rv reflect.Value) {
-	rv.SetInt(f.dd.DecodeInt(intBitsize))
+func (f *decFnInfo) kInt(rv reflect.Value) {
+	rv.SetInt(f.d.d.DecodeInt(intBitsize))
 }
 
-func (f decFnInfo) kInt64(rv reflect.Value) {
-	rv.SetInt(f.dd.DecodeInt(64))
+func (f *decFnInfo) kInt64(rv reflect.Value) {
+	rv.SetInt(f.d.d.DecodeInt(64))
 }
 
-func (f decFnInfo) kInt32(rv reflect.Value) {
-	rv.SetInt(f.dd.DecodeInt(32))
+func (f *decFnInfo) kInt32(rv reflect.Value) {
+	rv.SetInt(f.d.d.DecodeInt(32))
 }
 
-func (f decFnInfo) kInt8(rv reflect.Value) {
-	rv.SetInt(f.dd.DecodeInt(8))
+func (f *decFnInfo) kInt8(rv reflect.Value) {
+	rv.SetInt(f.d.d.DecodeInt(8))
 }
 
-func (f decFnInfo) kInt16(rv reflect.Value) {
-	rv.SetInt(f.dd.DecodeInt(16))
+func (f *decFnInfo) kInt16(rv reflect.Value) {
+	rv.SetInt(f.d.d.DecodeInt(16))
 }
 
-func (f decFnInfo) kFloat32(rv reflect.Value) {
-	rv.SetFloat(f.dd.DecodeFloat(true))
+func (f *decFnInfo) kFloat32(rv reflect.Value) {
+	rv.SetFloat(f.d.d.DecodeFloat(true))
 }
 
-func (f decFnInfo) kFloat64(rv reflect.Value) {
-	rv.SetFloat(f.dd.DecodeFloat(false))
+func (f *decFnInfo) kFloat64(rv reflect.Value) {
+	rv.SetFloat(f.d.d.DecodeFloat(false))
 }
 
-func (f decFnInfo) kUint8(rv reflect.Value) {
-	rv.SetUint(f.dd.DecodeUint(8))
+func (f *decFnInfo) kUint8(rv reflect.Value) {
+	rv.SetUint(f.d.d.DecodeUint(8))
 }
 
-func (f decFnInfo) kUint64(rv reflect.Value) {
-	rv.SetUint(f.dd.DecodeUint(64))
+func (f *decFnInfo) kUint64(rv reflect.Value) {
+	rv.SetUint(f.d.d.DecodeUint(64))
 }
 
-func (f decFnInfo) kUint(rv reflect.Value) {
-	rv.SetUint(f.dd.DecodeUint(uintBitsize))
+func (f *decFnInfo) kUint(rv reflect.Value) {
+	rv.SetUint(f.d.d.DecodeUint(uintBitsize))
 }
 
-func (f decFnInfo) kUintptr(rv reflect.Value) {
-	rv.SetUint(f.dd.DecodeUint(uintBitsize))
+func (f *decFnInfo) kUintptr(rv reflect.Value) {
+	rv.SetUint(f.d.d.DecodeUint(uintBitsize))
 }
 
-func (f decFnInfo) kUint32(rv reflect.Value) {
-	rv.SetUint(f.dd.DecodeUint(32))
+func (f *decFnInfo) kUint32(rv reflect.Value) {
+	rv.SetUint(f.d.d.DecodeUint(32))
 }
 
-func (f decFnInfo) kUint16(rv reflect.Value) {
-	rv.SetUint(f.dd.DecodeUint(16))
+func (f *decFnInfo) kUint16(rv reflect.Value) {
+	rv.SetUint(f.d.d.DecodeUint(16))
 }
 
-// func (f decFnInfo) kPtr(rv reflect.Value) {
+// func (f *decFnInfo) kPtr(rv reflect.Value) {
 // 	debugf(">>>>>>> ??? decode kPtr called - shouldn't get called")
 // 	if rv.IsNil() {
 // 		rv.Set(reflect.New(rv.Type().Elem()))
@@ -514,11 +508,11 @@ func (f decFnInfo) kUint16(rv reflect.Value) {
 
 // var kIntfCtr uint64
 
-func (f decFnInfo) kInterfaceNaked() (rvn reflect.Value) {
+func (f *decFnInfo) kInterfaceNaked() (rvn reflect.Value) {
 	// nil interface:
 	// use some hieristics to decode it appropriately
 	// based on the detected next value in the stream.
-	v, vt, decodeFurther := f.dd.DecodeNaked()
+	v, vt, decodeFurther := f.d.d.DecodeNaked()
 	if vt == valueTypeNil {
 		return
 	}
@@ -564,7 +558,7 @@ func (f decFnInfo) kInterfaceNaked() (rvn reflect.Value) {
 	}
 	if decodeFurther {
 		if useRvn {
-			f.d.decodeValue(rvn, decFn{})
+			f.d.decodeValue(rvn, nil)
 		} else if v != nil {
 			// this v is a pointer, so we need to dereference it when done
 			f.d.decode(v)
@@ -579,7 +573,7 @@ func (f decFnInfo) kInterfaceNaked() (rvn reflect.Value) {
 	return
 }
 
-func (f decFnInfo) kInterface(rv reflect.Value) {
+func (f *decFnInfo) kInterface(rv reflect.Value) {
 	// debugf("\t===> kInterface")
 
 	// Note:
@@ -601,65 +595,66 @@ func (f decFnInfo) kInterface(rv reflect.Value) {
 		// we just decode into it.
 		// Else we create a settable value, decode into it, and set on the interface.
 		if rve.CanSet() {
-			f.d.decodeValue(rve, decFn{})
+			f.d.decodeValue(rve, nil)
 		} else {
 			rve2 := reflect.New(rve.Type()).Elem()
 			rve2.Set(rve)
-			f.d.decodeValue(rve2, decFn{})
+			f.d.decodeValue(rve2, nil)
 			rv.Set(rve2)
 		}
 	}
 }
 
-func (f decFnInfo) kStruct(rv reflect.Value) {
+func (f *decFnInfo) kStruct(rv reflect.Value) {
 	fti := f.ti
 	d := f.d
-	if f.dd.IsContainerType(valueTypeMap) {
-		containerLen := f.dd.ReadMapStart()
+	dd := d.d
+	if dd.IsContainerType(valueTypeMap) {
+		containerLen := dd.ReadMapStart()
 		if containerLen == 0 {
-			f.dd.ReadEnd()
+			dd.ReadEnd()
 			return
 		}
 		tisfi := fti.sfi
 		hasLen := containerLen >= 0
 		if hasLen {
 			for j := 0; j < containerLen; j++ {
-				// rvkencname := f.dd.DecodeString()
-				rvkencname := stringView(f.dd.DecodeBytes(f.d.b[:], true, true))
+				// rvkencname := dd.DecodeString()
+				rvkencname := stringView(dd.DecodeBytes(f.d.b[:], true, true))
 				// rvksi := ti.getForEncName(rvkencname)
 				if k := fti.indexForEncName(rvkencname); k > -1 {
 					si := tisfi[k]
-					if f.dd.TryDecodeAsNil() {
+					if dd.TryDecodeAsNil() {
 						si.setToZeroValue(rv)
 					} else {
-						d.decodeValue(si.field(rv, true), decFn{})
+						d.decodeValue(si.field(rv, true), nil)
 					}
 				} else {
 					d.structFieldNotFound(-1, rvkencname)
 				}
 			}
 		} else {
-			for j := 0; !f.dd.CheckBreak(); j++ {
-				// rvkencname := f.dd.DecodeString()
-				rvkencname := stringView(f.dd.DecodeBytes(f.d.b[:], true, true))
+			for j := 0; !dd.CheckBreak(); j++ {
+				// rvkencname := dd.DecodeString()
+				rvkencname := stringView(dd.DecodeBytes(f.d.b[:], true, true))
 				// rvksi := ti.getForEncName(rvkencname)
 				if k := fti.indexForEncName(rvkencname); k > -1 {
 					si := tisfi[k]
-					if f.dd.TryDecodeAsNil() {
+					if dd.TryDecodeAsNil() {
 						si.setToZeroValue(rv)
 					} else {
-						d.decodeValue(si.field(rv, true), decFn{})
+						d.decodeValue(si.field(rv, true), nil)
 					}
 				} else {
 					d.structFieldNotFound(-1, rvkencname)
 				}
 			}
-			f.dd.ReadEnd()
+			dd.ReadEnd()
 		}
-	} else if f.dd.IsContainerType(valueTypeArray) {
-		containerLen := f.dd.ReadArrayStart()
+	} else if dd.IsContainerType(valueTypeArray) {
+		containerLen := dd.ReadArrayStart()
 		if containerLen == 0 {
-			f.dd.ReadEnd()
+			dd.ReadEnd()
 			return
 		}
 		// Not much gain from doing it two ways for array.
@@ -670,13 +665,13 @@ func (f decFnInfo) kStruct(rv reflect.Value) {
 				if j == containerLen {
 					break
 				}
-			} else if f.dd.CheckBreak() {
+			} else if dd.CheckBreak() {
 				break
 			}
-			if f.dd.TryDecodeAsNil() {
+			if dd.TryDecodeAsNil() {
 				si.setToZeroValue(rv)
 			} else {
-				d.decodeValue(si.field(rv, true), decFn{})
+				d.decodeValue(si.field(rv, true), nil)
 			}
 		}
 		if containerLen > len(fti.sfip) {
@@ -685,29 +680,32 @@ func (f decFnInfo) kStruct(rv reflect.Value) {
 				d.structFieldNotFound(j, "")
 			}
 		}
-		f.dd.ReadEnd()
+		dd.ReadEnd()
 	} else {
 		f.d.error(onlyMapOrArrayCanDecodeIntoStructErr)
 		return
 	}
 }
 
-func (f decFnInfo) kSlice(rv reflect.Value) {
+func (f *decFnInfo) kSlice(rv reflect.Value) {
 	// A slice can be set from a map or array in stream.
 	// This way, the order can be kept (as order is lost with map).
 	ti := f.ti
 	d := f.d
-	if f.dd.IsContainerType(valueTypeBytes) || f.dd.IsContainerType(valueTypeString) {
-		if ti.rtid == uint8SliceTypId || ti.rt.Elem().Kind() == reflect.Uint8 {
+	dd := d.d
+	rtelem0 := ti.rt.Elem()
+
+	if dd.IsContainerType(valueTypeBytes) || dd.IsContainerType(valueTypeString) {
+		if ti.rtid == uint8SliceTypId || rtelem0.Kind() == reflect.Uint8 {
 			if f.seq == seqTypeChan {
-				bs2 := f.dd.DecodeBytes(nil, false, true)
+				bs2 := dd.DecodeBytes(nil, false, true)
 				ch := rv.Interface().(chan<- byte)
 				for _, b := range bs2 {
 					ch <- b
 				}
 			} else {
 				rvbs := rv.Bytes()
-				bs2 := f.dd.DecodeBytes(rvbs, false, false)
+				bs2 := dd.DecodeBytes(rvbs, false, false)
 				if rvbs == nil && bs2 != nil || rvbs != nil && bs2 == nil || len(bs2) != len(rvbs) {
 					if rv.CanSet() {
 						rv.SetBytes(bs2)
@@ -724,72 +722,70 @@ func (f decFnInfo) kSlice(rv reflect.Value) {
 
 	slh, containerLenS := d.decSliceHelperStart()
 
+	var rvlen, numToRead int
+	var truncated bool // says that the len of the sequence is not same as the expected number of elements.
+
+	numToRead = containerLenS // if truncated, reset numToRead
+
 	// an array can never return a nil slice. so no need to check f.array here.
 	if rv.IsNil() {
 		// either chan or slice
+		if rvlen, truncated = decInferLen(containerLenS, f.d.h.MaxInitLen, int(rtelem0.Size())); truncated {
+			numToRead = rvlen
+		}
 		if f.seq == seqTypeSlice {
-			if containerLenS <= 0 {
-				rv.Set(reflect.MakeSlice(ti.rt, 0, 0))
-			} else {
-				rv.Set(reflect.MakeSlice(ti.rt, containerLenS, containerLenS))
-			}
+			rv.Set(reflect.MakeSlice(ti.rt, rvlen, rvlen))
 		} else if f.seq == seqTypeChan {
-			if containerLenS <= 0 {
-				rv.Set(reflect.MakeChan(ti.rt, 0))
-			} else {
-				rv.Set(reflect.MakeChan(ti.rt, containerLenS))
-			}
+			rv.Set(reflect.MakeChan(ti.rt, rvlen))
 		}
+	} else {
+		rvlen = rv.Len()
 	}
 
-	rvlen := rv.Len()
 	if containerLenS == 0 {
 		if f.seq == seqTypeSlice && rvlen != 0 {
 			rv.SetLen(0)
 		}
-		// f.dd.ReadEnd()
+		// dd.ReadEnd()
 		return
 	}
 
-	rtelem0 := ti.rt.Elem()
 	rtelem := rtelem0
 	for rtelem.Kind() == reflect.Ptr {
 		rtelem = rtelem.Elem()
 	}
 	fn := d.getDecFn(rtelem, true, true)
 
-	rv0 := rv
+	var rv0, rv9 reflect.Value
+	rv0 = rv
 	rvChanged := false
 
 	rvcap := rv.Cap()
 
 	// for j := 0; j < containerLenS; j++ {
 
-	hasLen := containerLenS >= 0
-	if hasLen {
+	if containerLenS >= 0 { // hasLen
 		if f.seq == seqTypeChan {
 			// handle chan specially:
 			for j := 0; j < containerLenS; j++ {
-				rv0 := reflect.New(rtelem0).Elem()
-				d.decodeValue(rv0, fn)
-				rv.Send(rv0)
+				rv9 = reflect.New(rtelem0).Elem()
+				d.decodeValue(rv9, fn)
+				rv.Send(rv9)
 			}
-		} else {
-			numToRead := containerLenS
+		} else { // slice or array
 			if containerLenS > rvcap {
 				if f.seq == seqTypeArray {
-					d.arrayCannotExpand(rv.Len(), containerLenS)
-					numToRead = rvlen
+					d.arrayCannotExpand(rvlen, containerLenS)
 				} else {
-					rv = reflect.MakeSlice(ti.rt, containerLenS, containerLenS)
-					if rvlen > 0 && !isMutableKind(ti.rt.Kind()) {
-						rv1 := rv0
-						rv1.SetLen(rvcap)
-						reflect.Copy(rv, rv1)
+					oldRvlenGtZero := rvlen > 0
+					rvlen, truncated = decInferLen(containerLenS, f.d.h.MaxInitLen, int(rtelem0.Size()))
+					rv = reflect.MakeSlice(ti.rt, rvlen, rvlen)
+					if oldRvlenGtZero && !isImmutableKind(rtelem0.Kind()) {
+						reflect.Copy(rv, rv0) // only copy up to length NOT cap i.e. rv0.Slice(0, rvcap)
 					}
 					rvChanged = true
-					rvlen = containerLenS
 				}
+				numToRead = rvlen
 			} else if containerLenS != rvlen {
 				if f.seq == seqTypeSlice {
 					rv.SetLen(containerLenS)
@@ -797,17 +793,31 @@ func (f decFnInfo) kSlice(rv reflect.Value) {
 				}
 			}
 			j := 0
+			// we read up to the numToRead
 			for ; j < numToRead; j++ {
 				d.decodeValue(rv.Index(j), fn)
 			}
+
+			// if slice, expand and read up to containerLenS (or EOF) iff truncated
+			// if array, swallow all the rest.
+
 			if f.seq == seqTypeArray {
 				for ; j < containerLenS; j++ {
 					d.swallow()
 				}
+			} else if truncated { // slice was truncated, as chan NOT in this block
+				for ; j < containerLenS; j++ {
+					rv = expandSliceValue(rv, 1)
+					rv9 = rv.Index(j)
+					if resetSliceElemToZeroValue {
+						rv9.Set(reflect.Zero(rtelem0))
+					}
+					d.decodeValue(rv9, fn)
+				}
 			}
 		}
 	} else {
-		for j := 0; !f.dd.CheckBreak(); j++ {
+		for j := 0; !dd.CheckBreak(); j++ {
 			var decodeIntoBlank bool
 			// if indefinite, etc, then expand the slice if necessary
 			if j >= rvlen {
@@ -815,19 +825,27 @@ func (f decFnInfo) kSlice(rv reflect.Value) {
 					d.arrayCannotExpand(rvlen, j+1)
 					decodeIntoBlank = true
 				} else if f.seq == seqTypeSlice {
-					rv = reflect.Append(rv, reflect.Zero(rtelem0))
+					// rv = reflect.Append(rv, reflect.Zero(rtelem0)) // uses append logic, plus varargs
+					rv = expandSliceValue(rv, 1)
+					rv9 = rv.Index(j)
+					// rv.Index(rv.Len() - 1).Set(reflect.Zero(rtelem0))
+					if resetSliceElemToZeroValue {
+						rv9.Set(reflect.Zero(rtelem0))
+					}
 					rvlen++
 					rvChanged = true
 				}
+			} else if f.seq != seqTypeChan { // slice or array
+				rv9 = rv.Index(j)
 			}
 			if f.seq == seqTypeChan {
-				rv0 := reflect.New(rtelem0).Elem()
-				d.decodeValue(rv0, fn)
-				rv.Send(rv0)
+				rv9 = reflect.New(rtelem0).Elem()
+				d.decodeValue(rv9, fn)
+				rv.Send(rv9)
 			} else if decodeIntoBlank {
 				d.swallow()
-			} else {
-				d.decodeValue(rv.Index(j), fn)
+			} else { // seqTypeSlice
+				d.decodeValue(rv9, fn)
 			}
 		}
 		slh.End()
@@ -838,13 +856,15 @@ func (f decFnInfo) kSlice(rv reflect.Value) {
 	}
 }
 
-func (f decFnInfo) kArray(rv reflect.Value) {
+func (f *decFnInfo) kArray(rv reflect.Value) {
 	// f.d.decodeValue(rv.Slice(0, rv.Len()))
 	f.kSlice(rv.Slice(0, rv.Len()))
 }
 
-func (f decFnInfo) kMap(rv reflect.Value) {
-	containerLen := f.dd.ReadMapStart()
+func (f *decFnInfo) kMap(rv reflect.Value) {
+	d := f.d
+	dd := d.d
+	containerLen := dd.ReadMapStart()
 
 	ti := f.ti
 	if rv.IsNil() {
@@ -853,15 +873,13 @@ func (f decFnInfo) kMap(rv reflect.Value) {
 
 	if containerLen == 0 {
 		// It is not length-prefix style container. They have no End marker.
-		// f.dd.ReadMapEnd()
+		// dd.ReadMapEnd()
 		return
 	}
 
-	d := f.d
-
 	ktype, vtype := ti.rt.Key(), ti.rt.Elem()
 	ktypeId := reflect.ValueOf(ktype).Pointer()
-	var keyFn, valFn decFn
+	var keyFn, valFn *decFn
 	var xtyp reflect.Type
 	for xtyp = ktype; xtyp.Kind() == reflect.Ptr; xtyp = xtyp.Elem() {
 	}
@@ -891,7 +909,7 @@ func (f decFnInfo) kMap(rv reflect.Value) {
 			rv.SetMapIndex(rvk, rvv)
 		}
 	} else {
-		for j := 0; !f.dd.CheckBreak(); j++ {
+		for j := 0; !dd.CheckBreak(); j++ {
 			rvk := reflect.New(ktype).Elem()
 			d.decodeValue(rvk, keyFn)
 
@@ -909,11 +927,11 @@ func (f decFnInfo) kMap(rv reflect.Value) {
 			d.decodeValue(rvv, valFn)
 			rv.SetMapIndex(rvk, rvv)
 		}
-		f.dd.ReadEnd()
+		dd.ReadEnd()
 	}
 }
 
-type rtidDecFn struct {
+type decRtidFn struct {
 	rtid uintptr
 	fn   decFn
 }
@@ -927,8 +945,8 @@ type Decoder struct {
 	// NOTE: Decoder shouldn't call it's read methods,
 	// as the handler MAY need to do some coordination.
 	r decReader
-	//sa [32]rtidDecFn
-	s []rtidDecFn
+	// sa [initCollectionCap]decRtidFn
+	s []decRtidFn
 	h *BasicHandle
 
 	rb    bytesDecReader
@@ -938,8 +956,8 @@ type Decoder struct {
 	js    bool // is json handle
 
 	ri ioDecReader
-	f  map[uintptr]decFn
-	_  uintptr // for alignment purposes, so next one starts from a cache line
+	f  map[uintptr]*decFn
+	// _  uintptr // for alignment purposes, so next one starts from a cache line
 
 	b [scratchByteArrayLen]byte
 }
@@ -950,7 +968,7 @@ type Decoder struct {
 // (eg bufio.Reader, bytes.Buffer).
 func NewDecoder(r io.Reader, h Handle) (d *Decoder) {
 	d = &Decoder{hh: h, h: h.getBasicHandle(), be: h.isBinary()}
-	//d.s = d.sa[:0]
+	// d.s = d.sa[:0]
 	d.ri.x = &d.b
 	d.ri.bs.r = r
 	var ok bool
@@ -968,7 +986,7 @@ func NewDecoder(r io.Reader, h Handle) (d *Decoder) {
 // from a byte slice with zero copying.
 func NewDecoderBytes(in []byte, h Handle) (d *Decoder) {
 	d = &Decoder{hh: h, h: h.getBasicHandle(), be: h.isBinary(), bytes: true}
-	//d.s = d.sa[:0]
+	// d.s = d.sa[:0]
 	d.rb.b = in
 	d.rb.a = len(in)
 	d.r = &d.rb
@@ -1037,7 +1055,7 @@ func (d *Decoder) Decode(v interface{}) (err error) {
 // this is not a smart swallow, as it allocates objects and does unnecessary work.
 func (d *Decoder) swallowViaHammer() {
 	var blank interface{}
-	d.decodeValue(reflect.ValueOf(&blank).Elem(), decFn{})
+	d.decodeValue(reflect.ValueOf(&blank).Elem(), nil)
 }
 
 func (d *Decoder) swallow() {
@@ -1157,7 +1175,7 @@ func (d *Decoder) decode(iv interface{}) {
 
 	case reflect.Value:
 		d.chkPtrValue(v)
-		d.decodeValueNotNil(v.Elem(), decFn{})
+		d.decodeValueNotNil(v.Elem(), nil)
 
 	case *string:
 
@@ -1192,7 +1210,7 @@ func (d *Decoder) decode(iv interface{}) {
 		*v = d.d.DecodeBytes(*v, false, false)
 
 	case *interface{}:
-		d.decodeValueNotNil(reflect.ValueOf(iv).Elem(), decFn{})
+		d.decodeValueNotNil(reflect.ValueOf(iv).Elem(), nil)
 
 	default:
 		if !fastpathDecodeTypeSwitch(iv, d) {
@@ -1231,29 +1249,29 @@ func (d *Decoder) decodeI(iv interface{}, checkPtr, tryNil, checkFastpath, check
 	rv, proceed := d.preDecodeValue(rv, tryNil)
 	if proceed {
 		fn := d.getDecFn(rv.Type(), checkFastpath, checkCodecSelfer)
-		fn.f(fn.i, rv)
+		fn.f(&fn.i, rv)
 	}
 }
 
-func (d *Decoder) decodeValue(rv reflect.Value, fn decFn) {
+func (d *Decoder) decodeValue(rv reflect.Value, fn *decFn) {
 	if rv, proceed := d.preDecodeValue(rv, true); proceed {
-		if fn.f == nil {
+		if fn == nil {
 			fn = d.getDecFn(rv.Type(), true, true)
 		}
-		fn.f(fn.i, rv)
+		fn.f(&fn.i, rv)
 	}
 }
 
-func (d *Decoder) decodeValueNotNil(rv reflect.Value, fn decFn) {
+func (d *Decoder) decodeValueNotNil(rv reflect.Value, fn *decFn) {
 	if rv, proceed := d.preDecodeValue(rv, false); proceed {
-		if fn.f == nil {
+		if fn == nil {
 			fn = d.getDecFn(rv.Type(), true, true)
 		}
-		fn.f(fn.i, rv)
+		fn.f(&fn.i, rv)
 	}
 }
 
-func (d *Decoder) getDecFn(rt reflect.Type, checkFastpath, checkCodecSelfer bool) (fn decFn) {
+func (d *Decoder) getDecFn(rt reflect.Type, checkFastpath, checkCodecSelfer bool) (fn *decFn) {
 	rtid := reflect.ValueOf(rt).Pointer()
 
 	// retrieve or register a focus'ed function for this type
@@ -1264,9 +1282,10 @@ func (d *Decoder) getDecFn(rt reflect.Type, checkFastpath, checkCodecSelfer bool
 	if useMapForCodecCache {
 		fn, ok = d.f[rtid]
 	} else {
-		for _, v := range d.s {
+		for i := range d.s {
+			v := &(d.s[i])
 			if v.rtid == rtid {
-				fn, ok = v.fn, true
+				fn, ok = &(v.fn), true
 				break
 			}
 		}
@@ -1275,11 +1294,25 @@ func (d *Decoder) getDecFn(rt reflect.Type, checkFastpath, checkCodecSelfer bool
 		return
 	}
 
+	if useMapForCodecCache {
+		if d.f == nil {
+			d.f = make(map[uintptr]*decFn, initCollectionCap)
+		}
+		fn = new(decFn)
+		d.f[rtid] = fn
+	} else {
+		if d.s == nil {
+			d.s = make([]decRtidFn, 0, initCollectionCap)
+		}
+		d.s = append(d.s, decRtidFn{rtid: rtid})
+		fn = &(d.s[len(d.s)-1]).fn
+	}
+
 	// debugf("\tCreating new dec fn for type: %v\n", rt)
 	ti := getTypeInfo(rtid, rt)
-	var fi decFnInfo
-	fi.dd = d.d
-	// fi.decFnInfoX = new(decFnInfoX)
+	fi := &(fn.i)
+	fi.d = d
+	fi.ti = ti
 
 	// An extension can be registered for any type, regardless of the Kind
 	// (e.g. type BitSet int64, type MyStruct { / * unexported fields * / }, type X []int, etc.
@@ -1291,35 +1324,26 @@ func (d *Decoder) getDecFn(rt reflect.Type, checkFastpath, checkCodecSelfer bool
 	// NOTE: if decoding into a nil interface{}, we return a non-nil
 	// value except even if the container registers a length of 0.
 	if checkCodecSelfer && ti.cs {
-		fi.decFnInfoX = &decFnInfoX{d: d, ti: ti}
-		fn.f = (decFnInfo).selferUnmarshal
+		fn.f = (*decFnInfo).selferUnmarshal
 	} else if rtid == rawExtTypId {
-		fi.decFnInfoX = &decFnInfoX{d: d, ti: ti}
-		fn.f = (decFnInfo).rawExt
+		fn.f = (*decFnInfo).rawExt
 	} else if d.d.IsBuiltinType(rtid) {
-		fi.decFnInfoX = &decFnInfoX{d: d, ti: ti}
-		fn.f = (decFnInfo).builtin
+		fn.f = (*decFnInfo).builtin
 	} else if xfFn := d.h.getExt(rtid); xfFn != nil {
-		// fi.decFnInfoX = &decFnInfoX{xfTag: xfFn.tag, xfFn: xfFn.ext}
-		fi.decFnInfoX = &decFnInfoX{d: d, ti: ti}
 		fi.xfTag, fi.xfFn = xfFn.tag, xfFn.ext
-		fn.f = (decFnInfo).ext
+		fn.f = (*decFnInfo).ext
 	} else if supportMarshalInterfaces && d.be && ti.bunm {
-		fi.decFnInfoX = &decFnInfoX{d: d, ti: ti}
-		fn.f = (decFnInfo).binaryUnmarshal
+		fn.f = (*decFnInfo).binaryUnmarshal
 	} else if supportMarshalInterfaces && !d.be && d.js && ti.junm {
 		//If JSON, we should check JSONUnmarshal before textUnmarshal
-		fi.decFnInfoX = &decFnInfoX{d: d, ti: ti}
-		fn.f = (decFnInfo).jsonUnmarshal
+		fn.f = (*decFnInfo).jsonUnmarshal
 	} else if supportMarshalInterfaces && !d.be && ti.tunm {
-		fi.decFnInfoX = &decFnInfoX{d: d, ti: ti}
-		fn.f = (decFnInfo).textUnmarshal
+		fn.f = (*decFnInfo).textUnmarshal
 	} else {
 		rk := rt.Kind()
 		if fastpathEnabled && checkFastpath && (rk == reflect.Map || rk == reflect.Slice) {
 			if rt.PkgPath() == "" {
 				if idx := fastpathAV.index(rtid); idx != -1 {
-					fi.decFnInfoX = &decFnInfoX{d: d, ti: ti}
 					fn.f = fastpathAV[idx].decfn
 				}
 			} else {
@@ -1335,8 +1359,7 @@ func (d *Decoder) getDecFn(rt reflect.Type, checkFastpath, checkCodecSelfer bool
 				if idx := fastpathAV.index(rtuid); idx != -1 {
 					xfnf := fastpathAV[idx].decfn
 					xrt := fastpathAV[idx].rt
-					fi.decFnInfoX = &decFnInfoX{d: d, ti: ti}
-					fn.f = func(xf decFnInfo, xrv reflect.Value) {
+					fn.f = func(xf *decFnInfo, xrv reflect.Value) {
 						// xfnf(xf, xrv.Convert(xrt))
 						xfnf(xf, xrv.Addr().Convert(reflect.PtrTo(xrt)).Elem())
 					}
@@ -1346,74 +1369,58 @@ func (d *Decoder) getDecFn(rt reflect.Type, checkFastpath, checkCodecSelfer bool
 		if fn.f == nil {
 			switch rk {
 			case reflect.String:
-				fn.f = (decFnInfo).kString
+				fn.f = (*decFnInfo).kString
 			case reflect.Bool:
-				fn.f = (decFnInfo).kBool
+				fn.f = (*decFnInfo).kBool
 			case reflect.Int:
-				fn.f = (decFnInfo).kInt
+				fn.f = (*decFnInfo).kInt
 			case reflect.Int64:
-				fn.f = (decFnInfo).kInt64
+				fn.f = (*decFnInfo).kInt64
 			case reflect.Int32:
-				fn.f = (decFnInfo).kInt32
+				fn.f = (*decFnInfo).kInt32
 			case reflect.Int8:
-				fn.f = (decFnInfo).kInt8
+				fn.f = (*decFnInfo).kInt8
 			case reflect.Int16:
-				fn.f = (decFnInfo).kInt16
+				fn.f = (*decFnInfo).kInt16
 			case reflect.Float32:
-				fn.f = (decFnInfo).kFloat32
+				fn.f = (*decFnInfo).kFloat32
 			case reflect.Float64:
-				fn.f = (decFnInfo).kFloat64
+				fn.f = (*decFnInfo).kFloat64
 			case reflect.Uint8:
-				fn.f = (decFnInfo).kUint8
+				fn.f = (*decFnInfo).kUint8
 			case reflect.Uint64:
-				fn.f = (decFnInfo).kUint64
+				fn.f = (*decFnInfo).kUint64
 			case reflect.Uint:
-				fn.f = (decFnInfo).kUint
+				fn.f = (*decFnInfo).kUint
 			case reflect.Uint32:
-				fn.f = (decFnInfo).kUint32
+				fn.f = (*decFnInfo).kUint32
 			case reflect.Uint16:
-				fn.f = (decFnInfo).kUint16
+				fn.f = (*decFnInfo).kUint16
 				// case reflect.Ptr:
-				// 	fn.f = (decFnInfo).kPtr
+				// 	fn.f = (*decFnInfo).kPtr
 			case reflect.Uintptr:
-				fn.f = (decFnInfo).kUintptr
+				fn.f = (*decFnInfo).kUintptr
 			case reflect.Interface:
-				fi.decFnInfoX = &decFnInfoX{d: d, ti: ti}
-				fn.f = (decFnInfo).kInterface
+				fn.f = (*decFnInfo).kInterface
 			case reflect.Struct:
-				fi.decFnInfoX = &decFnInfoX{d: d, ti: ti}
-				fn.f = (decFnInfo).kStruct
+				fn.f = (*decFnInfo).kStruct
 			case reflect.Chan:
-				fi.decFnInfoX = &decFnInfoX{d: d, ti: ti, seq: seqTypeChan}
-				fn.f = (decFnInfo).kSlice
+				fi.seq = seqTypeChan
+				fn.f = (*decFnInfo).kSlice
 			case reflect.Slice:
-				fi.decFnInfoX = &decFnInfoX{d: d, ti: ti, seq: seqTypeSlice}
-				fn.f = (decFnInfo).kSlice
+				fi.seq = seqTypeSlice
+				fn.f = (*decFnInfo).kSlice
 			case reflect.Array:
-				// fi.decFnInfoX = &decFnInfoX{array: true}
-				fi.decFnInfoX = &decFnInfoX{d: d, ti: ti, seq: seqTypeArray}
-				fn.f = (decFnInfo).kArray
+				fi.seq = seqTypeArray
+				fn.f = (*decFnInfo).kArray
 			case reflect.Map:
-				fi.decFnInfoX = &decFnInfoX{d: d, ti: ti}
-				fn.f = (decFnInfo).kMap
+				fn.f = (*decFnInfo).kMap
 			default:
-				fn.f = (decFnInfo).kErr
+				fn.f = (*decFnInfo).kErr
 			}
 		}
 	}
-	fn.i = fi
 
-	if useMapForCodecCache {
-		if d.f == nil {
-			d.f = make(map[uintptr]decFn, 32)
-		}
-		d.f[rtid] = fn
-	} else {
-		if d.s == nil {
-			d.s = make([]rtidDecFn, 0, 32)
-		}
-		d.s = append(d.s, rtidDecFn{rtid, fn})
-	}
 	return
 }
 
@@ -1522,6 +1529,46 @@ func detachZeroCopyBytes(isBytesReader bool, dest []byte, in []byte) (out []byte
 	return in
 }
 
+// decInferLen will infer a sensible length, given the following:
+//    - clen: length wanted.
+//    - maxlen: max length to be returned.
+//      if <= 0, it is unset, and we infer it based on the unit size
+//    - unit: number of bytes for each element of the collection
+func decInferLen(clen, maxlen, unit int) (rvlen int, truncated bool) {
+	// handle when maxlen is not set i.e. <= 0
+	if clen <= 0 {
+		return
+	}
+	if maxlen <= 0 {
+		// no maxlen defined. Use maximum of 256K memory, with a floor of 4K items.
+		// maxlen = 256 * 1024 / unit
+		// if maxlen < (4 * 1024) {
+		// 	maxlen = 4 * 1024
+		// }
+		if unit < (256 / 4) {
+			maxlen = 256 * 1024 / unit
+		} else {
+			maxlen = 4 * 1024
+		}
+	}
+	if clen > maxlen {
+		rvlen = maxlen
+		truncated = true
+	} else {
+		rvlen = clen
+	}
+	return
+	// if clen <= 0 {
+	// 	rvlen = 0
+	// } else if maxlen > 0 && clen > maxlen {
+	// 	rvlen = maxlen
+	// 	truncated = true
+	// } else {
+	// 	rvlen = clen
+	// }
+	// return
+}
+
 // // implement overall decReader wrapping both, for possible use inline:
 // type decReaderT struct {
 // 	bytes bool

+ 171 - 164
codec/encode.go

@@ -6,7 +6,6 @@ package codec
 import (
 	"bytes"
 	"encoding"
-	"errors"
 	"fmt"
 	"io"
 	"reflect"
@@ -246,10 +245,10 @@ func (z *bytesEncWriter) grow(n int) (oldcursor int) {
 	z.c = oldcursor + n
 	if z.c > len(z.b) {
 		if z.c > cap(z.b) {
-			// Tried using appendslice logic: (if cap < 1024, *2, else *1.25).
-			// However, it was too expensive, causing too many iterations of copy.
-			// Using bytes.Buffer model was much better (2*cap + n)
-			bs := make([]byte, 2*cap(z.b)+n)
+			// appendslice logic (if cap < 1024, *2, else *1.25): more expensive. many copy calls.
+			// bytes.Buffer model (2*cap + n): much better
+			// bs := make([]byte, 2*cap(z.b)+n)
+			bs := make([]byte, growCap(cap(z.b), 1, n))
 			copy(bs, z.b[:oldcursor])
 			z.b = bs
 		} else {
@@ -261,7 +260,7 @@ func (z *bytesEncWriter) grow(n int) (oldcursor int) {
 
 // ---------------------------------------------
 
-type encFnInfoX struct {
+type encFnInfo struct {
 	e     *Encoder
 	ti    *typeInfo
 	xfFn  Ext
@@ -269,25 +268,13 @@ type encFnInfoX struct {
 	seq   seqType
 }
 
-type encFnInfo struct {
-	// use encFnInfo as a value receiver.
-	// keep most of it less-used variables accessible via a pointer (*encFnInfoX).
-	// As sweet spot for value-receiver is 3 words, keep everything except
-	// encDriver (which everyone needs) directly accessible.
-	// ensure encFnInfoX is set for everyone who needs it i.e.
-	// rawExt, ext, builtin, (selfer|binary|text)Marshal, kSlice, kStruct, kMap, kInterface, fastpath
-
-	ee encDriver
-	*encFnInfoX
+func (f *encFnInfo) builtin(rv reflect.Value) {
+	f.e.e.EncodeBuiltin(f.ti.rtid, rv.Interface())
 }
 
-func (f encFnInfo) builtin(rv reflect.Value) {
-	f.ee.EncodeBuiltin(f.ti.rtid, rv.Interface())
-}
-
-func (f encFnInfo) rawExt(rv reflect.Value) {
+func (f *encFnInfo) rawExt(rv reflect.Value) {
 	// rev := rv.Interface().(RawExt)
-	// f.ee.EncodeRawExt(&rev, f.e)
+	// f.e.e.EncodeRawExt(&rev, f.e)
 	var re *RawExt
 	if rv.CanAddr() {
 		re = rv.Addr().Interface().(*RawExt)
@@ -295,18 +282,18 @@ func (f encFnInfo) rawExt(rv reflect.Value) {
 		rev := rv.Interface().(RawExt)
 		re = &rev
 	}
-	f.ee.EncodeRawExt(re, f.e)
+	f.e.e.EncodeRawExt(re, f.e)
 }
 
-func (f encFnInfo) ext(rv reflect.Value) {
+func (f *encFnInfo) ext(rv reflect.Value) {
 	// if this is a struct|array and it was addressable, then pass the address directly (not the value)
 	if k := rv.Kind(); (k == reflect.Struct || k == reflect.Array) && rv.CanAddr() {
 		rv = rv.Addr()
 	}
-	f.ee.EncodeExt(rv.Interface(), f.xfTag, f.xfFn, f.e)
+	f.e.e.EncodeExt(rv.Interface(), f.xfTag, f.xfFn, f.e)
 }
 
-func (f encFnInfo) getValueForMarshalInterface(rv reflect.Value, indir int8) (v interface{}, proceed bool) {
+func (f *encFnInfo) getValueForMarshalInterface(rv reflect.Value, indir int8) (v interface{}, proceed bool) {
 	if indir == 0 {
 		v = rv.Interface()
 	} else if indir == -1 {
@@ -323,7 +310,7 @@ func (f encFnInfo) getValueForMarshalInterface(rv reflect.Value, indir int8) (v
 	} else {
 		for j := int8(0); j < indir; j++ {
 			if rv.IsNil() {
-				f.ee.EncodeNil()
+				f.e.e.EncodeNil()
 				return
 			}
 			rv = rv.Elem()
@@ -333,20 +320,20 @@ func (f encFnInfo) getValueForMarshalInterface(rv reflect.Value, indir int8) (v
 	return v, true
 }
 
-func (f encFnInfo) selferMarshal(rv reflect.Value) {
+func (f *encFnInfo) selferMarshal(rv reflect.Value) {
 	if v, proceed := f.getValueForMarshalInterface(rv, f.ti.csIndir); proceed {
 		v.(Selfer).CodecEncodeSelf(f.e)
 	}
 }
 
-func (f encFnInfo) binaryMarshal(rv reflect.Value) {
+func (f *encFnInfo) binaryMarshal(rv reflect.Value) {
 	if v, proceed := f.getValueForMarshalInterface(rv, f.ti.bmIndir); proceed {
 		bs, fnerr := v.(encoding.BinaryMarshaler).MarshalBinary()
 		f.e.marshal(bs, fnerr, false, c_RAW)
 	}
 }
 
-func (f encFnInfo) textMarshal(rv reflect.Value) {
+func (f *encFnInfo) textMarshal(rv reflect.Value) {
 	if v, proceed := f.getValueForMarshalInterface(rv, f.ti.tmIndir); proceed {
 		// debugf(">>>> encoding.TextMarshaler: %T", rv.Interface())
 		bs, fnerr := v.(encoding.TextMarshaler).MarshalText()
@@ -354,46 +341,46 @@ func (f encFnInfo) textMarshal(rv reflect.Value) {
 	}
 }
 
-func (f encFnInfo) jsonMarshal(rv reflect.Value) {
+func (f *encFnInfo) jsonMarshal(rv reflect.Value) {
 	if v, proceed := f.getValueForMarshalInterface(rv, f.ti.jmIndir); proceed {
 		bs, fnerr := v.(jsonMarshaler).MarshalJSON()
 		f.e.marshal(bs, fnerr, true, c_UTF8)
 	}
 }
 
-func (f encFnInfo) kBool(rv reflect.Value) {
-	f.ee.EncodeBool(rv.Bool())
+func (f *encFnInfo) kBool(rv reflect.Value) {
+	f.e.e.EncodeBool(rv.Bool())
 }
 
-func (f encFnInfo) kString(rv reflect.Value) {
-	f.ee.EncodeString(c_UTF8, rv.String())
+func (f *encFnInfo) kString(rv reflect.Value) {
+	f.e.e.EncodeString(c_UTF8, rv.String())
 }
 
-func (f encFnInfo) kFloat64(rv reflect.Value) {
-	f.ee.EncodeFloat64(rv.Float())
+func (f *encFnInfo) kFloat64(rv reflect.Value) {
+	f.e.e.EncodeFloat64(rv.Float())
 }
 
-func (f encFnInfo) kFloat32(rv reflect.Value) {
-	f.ee.EncodeFloat32(float32(rv.Float()))
+func (f *encFnInfo) kFloat32(rv reflect.Value) {
+	f.e.e.EncodeFloat32(float32(rv.Float()))
 }
 
-func (f encFnInfo) kInt(rv reflect.Value) {
-	f.ee.EncodeInt(rv.Int())
+func (f *encFnInfo) kInt(rv reflect.Value) {
+	f.e.e.EncodeInt(rv.Int())
 }
 
-func (f encFnInfo) kUint(rv reflect.Value) {
-	f.ee.EncodeUint(rv.Uint())
+func (f *encFnInfo) kUint(rv reflect.Value) {
+	f.e.e.EncodeUint(rv.Uint())
 }
 
-func (f encFnInfo) kInvalid(rv reflect.Value) {
-	f.ee.EncodeNil()
+func (f *encFnInfo) kInvalid(rv reflect.Value) {
+	f.e.e.EncodeNil()
 }
 
-func (f encFnInfo) kErr(rv reflect.Value) {
+func (f *encFnInfo) kErr(rv reflect.Value) {
 	f.e.errorf("unsupported kind %s, for %#v", rv.Kind(), rv)
 }
 
-func (f encFnInfo) kSlice(rv reflect.Value) {
+func (f *encFnInfo) kSlice(rv reflect.Value) {
 	ti := f.ti
 	// array may be non-addressable, so we have to manage with care
 	//   (don't call rv.Bytes, rv.Slice, etc).
@@ -401,13 +388,13 @@ func (f encFnInfo) kSlice(rv reflect.Value) {
 	//   Encode(S{}) will bomb on "panic: slice of unaddressable array".
 	if f.seq != seqTypeArray {
 		if rv.IsNil() {
-			f.ee.EncodeNil()
+			f.e.e.EncodeNil()
 			return
 		}
 		// If in this method, then there was no extension function defined.
 		// So it's okay to treat as []byte.
 		if ti.rtid == uint8SliceTypId {
-			f.ee.EncodeStringBytes(c_RAW, rv.Bytes())
+			f.e.e.EncodeStringBytes(c_RAW, rv.Bytes())
 			return
 		}
 	}
@@ -416,9 +403,9 @@ func (f encFnInfo) kSlice(rv reflect.Value) {
 	if rtelem.Kind() == reflect.Uint8 {
 		switch f.seq {
 		case seqTypeArray:
-			// if l == 0 { f.ee.encodeStringBytes(c_RAW, nil) } else
+			// if l == 0 { f.e.e.encodeStringBytes(c_RAW, nil) } else
 			if rv.CanAddr() {
-				f.ee.EncodeStringBytes(c_RAW, rv.Slice(0, l).Bytes())
+				f.e.e.EncodeStringBytes(c_RAW, rv.Slice(0, l).Bytes())
 			} else {
 				var bs []byte
 				if l <= cap(f.e.b) {
@@ -431,10 +418,10 @@ func (f encFnInfo) kSlice(rv reflect.Value) {
 				// for i := 0; i < l; i++ {
 				// 	bs[i] = byte(rv.Index(i).Uint())
 				// }
-				f.ee.EncodeStringBytes(c_RAW, bs)
+				f.e.e.EncodeStringBytes(c_RAW, bs)
 			}
 		case seqTypeSlice:
-			f.ee.EncodeStringBytes(c_RAW, rv.Bytes())
+			f.e.e.EncodeStringBytes(c_RAW, rv.Bytes())
 		case seqTypeChan:
 			bs := f.e.b[:0]
 			// do not use range, so that the number of elements encoded
@@ -446,7 +433,7 @@ func (f encFnInfo) kSlice(rv reflect.Value) {
 			for i := 0; i < l; i++ {
 				bs = append(bs, <-ch)
 			}
-			f.ee.EncodeStringBytes(c_RAW, bs)
+			f.e.e.EncodeStringBytes(c_RAW, bs)
 		}
 		return
 	}
@@ -456,9 +443,9 @@ func (f encFnInfo) kSlice(rv reflect.Value) {
 			f.e.errorf("mapBySlice requires even slice length, but got %v", l)
 			return
 		}
-		f.ee.EncodeMapStart(l / 2)
+		f.e.e.EncodeMapStart(l / 2)
 	} else {
-		f.ee.EncodeArrayStart(l)
+		f.e.e.EncodeArrayStart(l)
 	}
 
 	e := f.e
@@ -469,7 +456,7 @@ func (f encFnInfo) kSlice(rv reflect.Value) {
 		// if kind is reflect.Interface, do not pre-determine the
 		// encoding type, because preEncodeValue may break it down to
 		// a concrete type and kInterface will bomb.
-		var fn encFn
+		var fn *encFn
 		if rtelem.Kind() != reflect.Interface {
 			rtelemid := reflect.ValueOf(rtelem).Pointer()
 			fn = e.getEncFn(rtelemid, rtelem, true, true)
@@ -487,42 +474,20 @@ func (f encFnInfo) kSlice(rv reflect.Value) {
 
 	}
 
-	f.ee.EncodeEnd()
+	f.e.e.EncodeEnd()
 }
 
-func (f encFnInfo) kStruct(rv reflect.Value) {
+func (f *encFnInfo) kStruct(rv reflect.Value) {
 	fti := f.ti
 	e := f.e
 	tisfi := fti.sfip
 	toMap := !(fti.toArray || e.h.StructToArray)
 	newlen := len(fti.sfi)
+
 	// Use sync.Pool to reduce allocating slices unnecessarily.
 	// The cost of the occasional locking is less than the cost of locking.
+	pool, poolv, fkvs := encStructPoolGet(newlen)
 
-	var fkvs []encStructFieldKV
-	var pool *sync.Pool
-	var poolv interface{}
-	idxpool := newlen / 8
-	if encStructPoolLen != 4 {
-		panic(errors.New("encStructPoolLen must be equal to 4")) // defensive, in case it is changed
-	}
-	if idxpool < encStructPoolLen {
-		pool = &encStructPool[idxpool]
-		poolv = pool.Get()
-		switch vv := poolv.(type) {
-		case *[8]encStructFieldKV:
-			fkvs = vv[:newlen]
-		case *[16]encStructFieldKV:
-			fkvs = vv[:newlen]
-		case *[32]encStructFieldKV:
-			fkvs = vv[:newlen]
-		case *[64]encStructFieldKV:
-			fkvs = vv[:newlen]
-		}
-	}
-	if fkvs == nil {
-		fkvs = make([]encStructFieldKV, newlen)
-	}
 	// if toMap, use the sorted array. If toArray, use unsorted array (to match sequence in struct)
 	if toMap {
 		tisfi = fti.sfi
@@ -558,7 +523,7 @@ func (f encFnInfo) kStruct(rv reflect.Value) {
 
 	// debugf(">>>> kStruct: newlen: %v", newlen)
 	// sep := !e.be
-	ee := f.ee //don't dereference everytime
+	ee := f.e.e //don't dereference everytime
 
 	if toMap {
 		ee.EncodeMapStart(newlen)
@@ -571,13 +536,13 @@ func (f encFnInfo) kStruct(rv reflect.Value) {
 			} else {
 				ee.EncodeString(c_UTF8, kv.k)
 			}
-			e.encodeValue(kv.v, encFn{})
+			e.encodeValue(kv.v, nil)
 		}
 	} else {
 		ee.EncodeArrayStart(newlen)
 		for j := 0; j < newlen; j++ {
 			kv = fkvs[j]
-			e.encodeValue(kv.v, encFn{})
+			e.encodeValue(kv.v, nil)
 		}
 	}
 	ee.EncodeEnd()
@@ -590,34 +555,35 @@ func (f encFnInfo) kStruct(rv reflect.Value) {
 	}
 }
 
-// func (f encFnInfo) kPtr(rv reflect.Value) {
+// func (f *encFnInfo) kPtr(rv reflect.Value) {
 // 	debugf(">>>>>>> ??? encode kPtr called - shouldn't get called")
 // 	if rv.IsNil() {
-// 		f.ee.encodeNil()
+// 		f.e.e.encodeNil()
 // 		return
 // 	}
 // 	f.e.encodeValue(rv.Elem())
 // }
 
-func (f encFnInfo) kInterface(rv reflect.Value) {
+func (f *encFnInfo) kInterface(rv reflect.Value) {
 	if rv.IsNil() {
-		f.ee.EncodeNil()
+		f.e.e.EncodeNil()
 		return
 	}
-	f.e.encodeValue(rv.Elem(), encFn{})
+	f.e.encodeValue(rv.Elem(), nil)
 }
 
-func (f encFnInfo) kMap(rv reflect.Value) {
+func (f *encFnInfo) kMap(rv reflect.Value) {
+	ee := f.e.e
 	if rv.IsNil() {
-		f.ee.EncodeNil()
+		ee.EncodeNil()
 		return
 	}
 
 	l := rv.Len()
-	f.ee.EncodeMapStart(l)
+	ee.EncodeMapStart(l)
 	e := f.e
 	if l == 0 {
-		f.ee.EncodeEnd()
+		ee.EncodeEnd()
 		return
 	}
 	var asSymbols bool
@@ -628,7 +594,7 @@ func (f encFnInfo) kMap(rv reflect.Value) {
 	// However, if kind is reflect.Interface, do not pre-determine the
 	// encoding type, because preEncodeValue may break it down to
 	// a concrete type and kInterface will bomb.
-	var keyFn, valFn encFn
+	var keyFn, valFn *encFn
 	ti := f.ti
 	rtkey := ti.rt.Key()
 	rtval := ti.rt.Elem()
@@ -655,7 +621,6 @@ func (f encFnInfo) kMap(rv reflect.Value) {
 	}
 	mks := rv.MapKeys()
 	// for j, lmks := 0, len(mks); j < lmks; j++ {
-	ee := f.ee //don't dereference everytime
 	if e.h.Canonical {
 		// first encode each key to a []byte first, then sort them, then record
 		// println(">>>>>>>> CANONICAL <<<<<<<<")
@@ -699,12 +664,12 @@ func (f encFnInfo) kMap(rv reflect.Value) {
 // instead of executing the checks every time.
 type encFn struct {
 	i encFnInfo
-	f func(encFnInfo, reflect.Value)
+	f func(*encFnInfo, reflect.Value)
 }
 
 // --------------------------------------------------
 
-type rtidEncFn struct {
+type encRtidFn struct {
 	rtid uintptr
 	fn   encFn
 }
@@ -716,7 +681,7 @@ type Encoder struct {
 	// NOTE: Encoder shouldn't call it's write methods,
 	// as the handler MAY need to do some coordination.
 	w  encWriter
-	s  []rtidEncFn
+	s  []encRtidFn
 	be bool // is binary encoding
 	js bool // is json handle
 
@@ -726,7 +691,7 @@ type Encoder struct {
 
 	as encDriverAsis
 	hh Handle
-	f  map[uintptr]encFn
+	f  map[uintptr]*encFn
 	b  [scratchByteArrayLen]byte
 }
 
@@ -871,7 +836,7 @@ func (e *Encoder) encode(iv interface{}) {
 		v.CodecEncodeSelf(e)
 
 	case reflect.Value:
-		e.encodeValue(v, encFn{})
+		e.encodeValue(v, nil)
 
 	case string:
 		e.e.EncodeString(c_UTF8, v)
@@ -957,7 +922,7 @@ func (e *Encoder) encodeI(iv interface{}, checkFastpath, checkCodecSelfer bool)
 		rt := rv.Type()
 		rtid := reflect.ValueOf(rt).Pointer()
 		fn := e.getEncFn(rtid, rt, checkFastpath, checkCodecSelfer)
-		fn.f(fn.i, rv)
+		fn.f(&fn.i, rv)
 	}
 }
 
@@ -987,27 +952,28 @@ LOOP:
 	return rv, true
 }
 
-func (e *Encoder) encodeValue(rv reflect.Value, fn encFn) {
+func (e *Encoder) encodeValue(rv reflect.Value, fn *encFn) {
 	// if a valid fn is passed, it MUST BE for the dereferenced type of rv
 	if rv, proceed := e.preEncodeValue(rv); proceed {
-		if fn.f == nil {
+		if fn == nil {
 			rt := rv.Type()
 			rtid := reflect.ValueOf(rt).Pointer()
 			fn = e.getEncFn(rtid, rt, true, true)
 		}
-		fn.f(fn.i, rv)
+		fn.f(&fn.i, rv)
 	}
 }
 
-func (e *Encoder) getEncFn(rtid uintptr, rt reflect.Type, checkFastpath, checkCodecSelfer bool) (fn encFn) {
+func (e *Encoder) getEncFn(rtid uintptr, rt reflect.Type, checkFastpath, checkCodecSelfer bool) (fn *encFn) {
 	// rtid := reflect.ValueOf(rt).Pointer()
 	var ok bool
 	if useMapForCodecCache {
 		fn, ok = e.f[rtid]
 	} else {
-		for _, v := range e.s {
+		for i := range e.s {
+			v := &(e.s[i])
 			if v.rtid == rtid {
-				fn, ok = v.fn, true
+				fn, ok = &(v.fn), true
 				break
 			}
 		}
@@ -1015,42 +981,48 @@ func (e *Encoder) getEncFn(rtid uintptr, rt reflect.Type, checkFastpath, checkCo
 	if ok {
 		return
 	}
-	// fi.encFnInfoX = new(encFnInfoX)
+
+	if useMapForCodecCache {
+		if e.f == nil {
+			e.f = make(map[uintptr]*encFn, initCollectionCap)
+		}
+		fn = new(encFn)
+		e.f[rtid] = fn
+	} else {
+		if e.s == nil {
+			e.s = make([]encRtidFn, 0, initCollectionCap)
+		}
+		e.s = append(e.s, encRtidFn{rtid: rtid})
+		fn = &(e.s[len(e.s)-1]).fn
+	}
+
 	ti := getTypeInfo(rtid, rt)
-	var fi encFnInfo
-	fi.ee = e.e
+	fi := &(fn.i)
+	fi.e = e
+	fi.ti = ti
 
 	if checkCodecSelfer && ti.cs {
-		fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}
-		fn.f = (encFnInfo).selferMarshal
+		fn.f = (*encFnInfo).selferMarshal
 	} else if rtid == rawExtTypId {
-		fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}
-		fn.f = (encFnInfo).rawExt
+		fn.f = (*encFnInfo).rawExt
 	} else if e.e.IsBuiltinType(rtid) {
-		fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}
-		fn.f = (encFnInfo).builtin
+		fn.f = (*encFnInfo).builtin
 	} else if xfFn := e.h.getExt(rtid); xfFn != nil {
-		// fi.encFnInfoX = new(encFnInfoX)
-		fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}
 		fi.xfTag, fi.xfFn = xfFn.tag, xfFn.ext
-		fn.f = (encFnInfo).ext
+		fn.f = (*encFnInfo).ext
 	} else if supportMarshalInterfaces && e.be && ti.bm {
-		fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}
-		fn.f = (encFnInfo).binaryMarshal
+		fn.f = (*encFnInfo).binaryMarshal
 	} else if supportMarshalInterfaces && !e.be && e.js && ti.jm {
 		//If JSON, we should check JSONMarshal before textMarshal
-		fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}
-		fn.f = (encFnInfo).jsonMarshal
+		fn.f = (*encFnInfo).jsonMarshal
 	} else if supportMarshalInterfaces && !e.be && ti.tm {
-		fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}
-		fn.f = (encFnInfo).textMarshal
+		fn.f = (*encFnInfo).textMarshal
 	} else {
 		rk := rt.Kind()
 		// if fastpathEnabled && checkFastpath && (rk == reflect.Map || rk == reflect.Slice) {
 		if fastpathEnabled && checkFastpath && (rk == reflect.Slice || (rk == reflect.Map && !e.h.Canonical)) {
 			if rt.PkgPath() == "" {
 				if idx := fastpathAV.index(rtid); idx != -1 {
-					fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}
 					fn.f = fastpathAV[idx].encfn
 				}
 			} else {
@@ -1066,8 +1038,7 @@ func (e *Encoder) getEncFn(rtid uintptr, rt reflect.Type, checkFastpath, checkCo
 				if idx := fastpathAV.index(rtuid); idx != -1 {
 					xfnf := fastpathAV[idx].encfn
 					xrt := fastpathAV[idx].rt
-					fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}
-					fn.f = func(xf encFnInfo, xrv reflect.Value) {
+					fn.f = func(xf *encFnInfo, xrv reflect.Value) {
 						xfnf(xf, xrv.Convert(xrt))
 					}
 				}
@@ -1076,57 +1047,42 @@ func (e *Encoder) getEncFn(rtid uintptr, rt reflect.Type, checkFastpath, checkCo
 		if fn.f == nil {
 			switch rk {
 			case reflect.Bool:
-				fn.f = (encFnInfo).kBool
+				fn.f = (*encFnInfo).kBool
 			case reflect.String:
-				fn.f = (encFnInfo).kString
+				fn.f = (*encFnInfo).kString
 			case reflect.Float64:
-				fn.f = (encFnInfo).kFloat64
+				fn.f = (*encFnInfo).kFloat64
 			case reflect.Float32:
-				fn.f = (encFnInfo).kFloat32
+				fn.f = (*encFnInfo).kFloat32
 			case reflect.Int, reflect.Int8, reflect.Int64, reflect.Int32, reflect.Int16:
-				fn.f = (encFnInfo).kInt
+				fn.f = (*encFnInfo).kInt
 			case reflect.Uint8, reflect.Uint64, reflect.Uint, reflect.Uint32, reflect.Uint16, reflect.Uintptr:
-				fn.f = (encFnInfo).kUint
+				fn.f = (*encFnInfo).kUint
 			case reflect.Invalid:
-				fn.f = (encFnInfo).kInvalid
+				fn.f = (*encFnInfo).kInvalid
 			case reflect.Chan:
-				fi.encFnInfoX = &encFnInfoX{e: e, ti: ti, seq: seqTypeChan}
-				fn.f = (encFnInfo).kSlice
+				fi.seq = seqTypeChan
+				fn.f = (*encFnInfo).kSlice
 			case reflect.Slice:
-				fi.encFnInfoX = &encFnInfoX{e: e, ti: ti, seq: seqTypeSlice}
-				fn.f = (encFnInfo).kSlice
+				fi.seq = seqTypeSlice
+				fn.f = (*encFnInfo).kSlice
 			case reflect.Array:
-				fi.encFnInfoX = &encFnInfoX{e: e, ti: ti, seq: seqTypeArray}
-				fn.f = (encFnInfo).kSlice
+				fi.seq = seqTypeArray
+				fn.f = (*encFnInfo).kSlice
 			case reflect.Struct:
-				fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}
-				fn.f = (encFnInfo).kStruct
+				fn.f = (*encFnInfo).kStruct
 				// case reflect.Ptr:
-				// 	fn.f = (encFnInfo).kPtr
+				// 	fn.f = (*encFnInfo).kPtr
 			case reflect.Interface:
-				fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}
-				fn.f = (encFnInfo).kInterface
+				fn.f = (*encFnInfo).kInterface
 			case reflect.Map:
-				fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}
-				fn.f = (encFnInfo).kMap
+				fn.f = (*encFnInfo).kMap
 			default:
-				fn.f = (encFnInfo).kErr
+				fn.f = (*encFnInfo).kErr
 			}
 		}
 	}
-	fn.i = fi
 
-	if useMapForCodecCache {
-		if e.f == nil {
-			e.f = make(map[uintptr]encFn, 32)
-		}
-		e.f[rtid] = fn
-	} else {
-		if e.s == nil {
-			e.s = make([]rtidEncFn, 0, 32)
-		}
-		e.s = append(e.s, rtidEncFn{rtid, fn})
-	}
 	return
 }
 
@@ -1163,7 +1119,7 @@ type encStructFieldKV struct {
 	v reflect.Value
 }
 
-const encStructPoolLen = 4
+const encStructPoolLen = 5
 
 // encStructPool is an array of sync.Pool.
 // Each element of the array pools one of encStructPool(8|16|32|64).
@@ -1181,6 +1137,57 @@ func init() {
 	encStructPool[1].New = func() interface{} { return new([16]encStructFieldKV) }
 	encStructPool[2].New = func() interface{} { return new([32]encStructFieldKV) }
 	encStructPool[3].New = func() interface{} { return new([64]encStructFieldKV) }
+	encStructPool[4].New = func() interface{} { return new([128]encStructFieldKV) }
+}
+
+func encStructPoolGet(newlen int) (p *sync.Pool, v interface{}, s []encStructFieldKV) {
+	// if encStructPoolLen != 5 { // constant chec, so removed at build time.
+	// 	panic(errors.New("encStructPoolLen must be equal to 4")) // defensive, in case it is changed
+	// }
+	// idxpool := newlen / 8
+
+	// if pool == nil {
+	// 	fkvs = make([]encStructFieldKV, newlen)
+	// } else {
+	// 	poolv = pool.Get()
+	// 	switch vv := poolv.(type) {
+	// 	case *[8]encStructFieldKV:
+	// 		fkvs = vv[:newlen]
+	// 	case *[16]encStructFieldKV:
+	// 		fkvs = vv[:newlen]
+	// 	case *[32]encStructFieldKV:
+	// 		fkvs = vv[:newlen]
+	// 	case *[64]encStructFieldKV:
+	// 		fkvs = vv[:newlen]
+	// 	case *[128]encStructFieldKV:
+	// 		fkvs = vv[:newlen]
+	// 	}
+	// }
+
+	if newlen <= 8 {
+		p = &encStructPool[0]
+		v = p.Get()
+		s = v.(*[8]encStructFieldKV)[:newlen]
+	} else if newlen <= 16 {
+		p = &encStructPool[1]
+		v = p.Get()
+		s = v.(*[16]encStructFieldKV)[:newlen]
+	} else if newlen <= 32 {
+		p = &encStructPool[2]
+		v = p.Get()
+		s = v.(*[32]encStructFieldKV)[:newlen]
+	} else if newlen <= 64 {
+		p = &encStructPool[3]
+		v = p.Get()
+		s = v.(*[64]encStructFieldKV)[:newlen]
+	} else if newlen <= 128 {
+		p = &encStructPool[4]
+		v = p.Get()
+		s = v.(*[128]encStructFieldKV)[:newlen]
+	} else {
+		s = make([]encStructFieldKV, newlen)
+	}
+	return
 }
 
 // ----------------------------------------

Filskillnaden har hållts tillbaka eftersom den är för stor
+ 298 - 298
codec/fast-path.generated.go


+ 56 - 47
codec/fast-path.go.tmpl

@@ -48,8 +48,8 @@ var fastpathTV fastpathT
 type fastpathE struct {
 	rtid uintptr
 	rt reflect.Type 
-	encfn func(encFnInfo, reflect.Value)
-	decfn func(decFnInfo, reflect.Value)
+	encfn func(*encFnInfo, reflect.Value)
+	decfn func(*decFnInfo, reflect.Value)
 }
 
 type fastpathA [{{ .FastpathLen }}]fastpathE
@@ -85,7 +85,7 @@ func init() {
 		return
 	}
 	i := 0
-	fn := func(v interface{}, fe func(encFnInfo, reflect.Value), fd func(decFnInfo, reflect.Value)) (f fastpathE) {
+	fn := func(v interface{}, fe func(*encFnInfo, reflect.Value), fd func(*decFnInfo, reflect.Value)) (f fastpathE) {
 		xrt := reflect.TypeOf(v)
 		xptr := reflect.ValueOf(xrt).Pointer()
 		fastpathAV[i] = fastpathE{xptr, xrt, fe, fd}
@@ -93,11 +93,11 @@ func init() {
 		return
 	}
 	
-	{{range .Values}}{{if not .Primitive}}{{if .Slice }}
-	fn([]{{ .Elem }}(nil), (encFnInfo).{{ .MethodNamePfx "fastpathEnc" false }}R, (decFnInfo).{{ .MethodNamePfx "fastpathDec" false }}R){{end}}{{end}}{{end}}
+	{{range .Values}}{{if not .Primitive}}{{if not .MapKey }}
+	fn([]{{ .Elem }}(nil), (*encFnInfo).{{ .MethodNamePfx "fastpathEnc" false }}R, (*decFnInfo).{{ .MethodNamePfx "fastpathDec" false }}R){{end}}{{end}}{{end}}
 	
-	{{range .Values}}{{if not .Primitive}}{{if not .Slice }}
-	fn(map[{{ .MapKey }}]{{ .Elem }}(nil), (encFnInfo).{{ .MethodNamePfx "fastpathEnc" false }}R, (decFnInfo).{{ .MethodNamePfx "fastpathDec" false }}R){{end}}{{end}}{{end}}
+	{{range .Values}}{{if not .Primitive}}{{if .MapKey }}
+	fn(map[{{ .MapKey }}]{{ .Elem }}(nil), (*encFnInfo).{{ .MethodNamePfx "fastpathEnc" false }}R, (*decFnInfo).{{ .MethodNamePfx "fastpathDec" false }}R){{end}}{{end}}{{end}}
 	
 	sort.Sort(fastpathAslice(fastpathAV[:]))
 }
@@ -107,10 +107,10 @@ func init() {
 // -- -- fast path type switch
 func fastpathEncodeTypeSwitch(iv interface{}, e *Encoder) bool {
 	switch v := iv.(type) {
-{{range .Values}}{{if not .Primitive}}{{if .Slice }}
+{{range .Values}}{{if not .Primitive}}{{if not .MapKey }}
 	case []{{ .Elem }}:{{else}}
 	case map[{{ .MapKey }}]{{ .Elem }}:{{end}}
-		fastpathTV.{{ .MethodNamePfx "Enc" false }}V(v, fastpathCheckNilTrue, e){{if .Slice }}
+		fastpathTV.{{ .MethodNamePfx "Enc" false }}V(v, fastpathCheckNilTrue, e){{if not .MapKey }}
 	case *[]{{ .Elem }}:{{else}}
 	case *map[{{ .MapKey }}]{{ .Elem }}:{{end}}
 		fastpathTV.{{ .MethodNamePfx "Enc" false }}V(*v, fastpathCheckNilTrue, e)
@@ -123,7 +123,7 @@ func fastpathEncodeTypeSwitch(iv interface{}, e *Encoder) bool {
 
 func fastpathEncodeTypeSwitchSlice(iv interface{}, e *Encoder) bool {
 	switch v := iv.(type) {
-{{range .Values}}{{if not .Primitive}}{{if .Slice }}
+{{range .Values}}{{if not .Primitive}}{{if not .MapKey }}
 	case []{{ .Elem }}:
 		fastpathTV.{{ .MethodNamePfx "Enc" false }}V(v, fastpathCheckNilTrue, e)
 	case *[]{{ .Elem }}:
@@ -137,7 +137,7 @@ func fastpathEncodeTypeSwitchSlice(iv interface{}, e *Encoder) bool {
 
 func fastpathEncodeTypeSwitchMap(iv interface{}, e *Encoder) bool {
 	switch v := iv.(type) {
-{{range .Values}}{{if not .Primitive}}{{if not .Slice }}
+{{range .Values}}{{if not .Primitive}}{{if .MapKey }}
 	case map[{{ .MapKey }}]{{ .Elem }}:
 		fastpathTV.{{ .MethodNamePfx "Enc" false }}V(v, fastpathCheckNilTrue, e)
 	case *map[{{ .MapKey }}]{{ .Elem }}:
@@ -150,9 +150,9 @@ func fastpathEncodeTypeSwitchMap(iv interface{}, e *Encoder) bool {
 }
 
 // -- -- fast path functions
-{{range .Values}}{{if not .Primitive}}{{if .Slice }} 
+{{range .Values}}{{if not .Primitive}}{{if not .MapKey }} 
 
-func (f encFnInfo) {{ .MethodNamePfx "fastpathEnc" false }}R(rv reflect.Value) {
+func (f *encFnInfo) {{ .MethodNamePfx "fastpathEnc" false }}R(rv reflect.Value) {
 	fastpathTV.{{ .MethodNamePfx "Enc" false }}V(rv.Interface().([]{{ .Elem }}), fastpathCheckNilFalse, f.e)
 }
 func (_ fastpathT) {{ .MethodNamePfx "Enc" false }}V(v []{{ .Elem }}, checkNil bool, e *Encoder) {
@@ -170,9 +170,9 @@ func (_ fastpathT) {{ .MethodNamePfx "Enc" false }}V(v []{{ .Elem }}, checkNil b
 
 {{end}}{{end}}{{end}}
 
-{{range .Values}}{{if not .Primitive}}{{if not .Slice }}
+{{range .Values}}{{if not .Primitive}}{{if .MapKey }}
 
-func (f encFnInfo) {{ .MethodNamePfx "fastpathEnc" false }}R(rv reflect.Value) {
+func (f *encFnInfo) {{ .MethodNamePfx "fastpathEnc" false }}R(rv reflect.Value) {
 	fastpathTV.{{ .MethodNamePfx "Enc" false }}V(rv.Interface().(map[{{ .MapKey }}]{{ .Elem }}), fastpathCheckNilFalse, f.e)
 }
 func (_ fastpathT) {{ .MethodNamePfx "Enc" false }}V(v map[{{ .MapKey }}]{{ .Elem }}, checkNil bool, e *Encoder) {
@@ -201,10 +201,10 @@ func (_ fastpathT) {{ .MethodNamePfx "Enc" false }}V(v map[{{ .MapKey }}]{{ .Ele
 // -- -- fast path type switch
 func fastpathDecodeTypeSwitch(iv interface{}, d *Decoder) bool {
 	switch v := iv.(type) {
-{{range .Values}}{{if not .Primitive}}{{if .Slice }}
+{{range .Values}}{{if not .Primitive}}{{if not .MapKey }}
 	case []{{ .Elem }}:{{else}}
 	case map[{{ .MapKey }}]{{ .Elem }}:{{end}}
-		fastpathTV.{{ .MethodNamePfx "Dec" false }}V(v, fastpathCheckNilFalse, false, d){{if .Slice }}
+		fastpathTV.{{ .MethodNamePfx "Dec" false }}V(v, fastpathCheckNilFalse, false, d){{if not .MapKey }}
 	case *[]{{ .Elem }}:{{else}}
 	case *map[{{ .MapKey }}]{{ .Elem }}:{{end}}
 		v2, changed2 := fastpathTV.{{ .MethodNamePfx "Dec" false }}V(*v, fastpathCheckNilFalse, true, d)
@@ -219,16 +219,16 @@ func fastpathDecodeTypeSwitch(iv interface{}, d *Decoder) bool {
 }
 
 // -- -- fast path functions
-{{range .Values}}{{if not .Primitive}}{{if .Slice }}
+{{range .Values}}{{if not .Primitive}}{{if not .MapKey }}
 {{/*
 Slices can change if they 
 - did not come from an array
 - are addressable (from a ptr)
 - are settable (e.g. contained in an interface{})
 */}}
-func (f decFnInfo) {{ .MethodNamePfx "fastpathDec" false }}R(rv reflect.Value) { 
+func (f *decFnInfo) {{ .MethodNamePfx "fastpathDec" false }}R(rv reflect.Value) { 
 	array := f.seq == seqTypeArray
-	if !array && rv.CanAddr() { // CanSet => CanAddr + Exported 
+	if !array && rv.CanAddr() { {{/* // CanSet => CanAddr + Exported */}}
 		vp := rv.Addr().Interface().(*[]{{ .Elem }})
 		v, changed := fastpathTV.{{ .MethodNamePfx "Dec" false }}V(*vp, fastpathCheckNilFalse, !array, f.d)
 		if changed {
@@ -249,7 +249,7 @@ func (f fastpathT) {{ .MethodNamePfx "Dec" false }}X(vp *[]{{ .Elem }}, checkNil
 func (_ fastpathT) {{ .MethodNamePfx "Dec" false }}V(v []{{ .Elem }}, checkNil bool, canChange bool, 
 	d *Decoder) (_ []{{ .Elem }}, changed bool) {
 	dd := d.d
-	// if dd.isContainerType(valueTypeNil) { dd.TryDecodeAsNil()
+	{{/* // if dd.isContainerType(valueTypeNil) { dd.TryDecodeAsNil() */}}
 	if checkNil && dd.TryDecodeAsNil() {
 		if v != nil {
 			changed = true 
@@ -258,47 +258,59 @@ func (_ fastpathT) {{ .MethodNamePfx "Dec" false }}V(v []{{ .Elem }}, checkNil b
 	}
 
 	slh, containerLenS := d.decSliceHelperStart()
+    x2read := containerLenS
+    var xtrunc bool 
 	if canChange && v == nil {
-		if containerLenS <= 0 {
-			v = []{{ .Elem }}{}
-		} else {
-			v = make([]{{ .Elem }}, containerLenS, containerLenS)
+		var xlen int 
+		if xlen, xtrunc = decInferLen(containerLenS, d.h.MaxInitLen, {{ .Size }}); xtrunc {
+			x2read = xlen
 		}
+		v = make([]{{ .Elem }}, xlen)
 		changed = true
-	}
+	} 
 	if containerLenS == 0 {
 		if canChange && len(v) != 0 {
 			v = v[:0]
 			changed = true 
 		}{{/*
-		// slh.End() // dd.ReadArrayEnd()
+			// slh.End() // dd.ReadArrayEnd()
 		*/}}
 		return v, changed 
 	}
 	
-	// for j := 0; j < containerLenS; j++ {
+	{{/* // for j := 0; j < containerLenS; j++ { */}}
 	if containerLenS > 0 {
-		decLen := containerLenS
 		if containerLenS > cap(v) {
-			if canChange {
-				s := make([]{{ .Elem }}, containerLenS, containerLenS)
+			if canChange { {{/*
+				// fast-path is for "basic" immutable types, so no need to copy them over
+				// s := make([]{{ .Elem }}, decInferLen(containerLenS, d.h.MaxInitLen))
 				// copy(s, v[:cap(v)])
-				v = s
+				// v = s */}}
+				var xlen int 
+                if xlen, xtrunc = decInferLen(containerLenS, d.h.MaxInitLen, {{ .Size }}); xtrunc {
+					x2read = xlen
+				}
+                v = make([]{{ .Elem }}, xlen)
 				changed = true
 			} else {
 				d.arrayCannotExpand(len(v), containerLenS)
-				decLen = len(v)
+				x2read = len(v)
 			}
 		} else if containerLenS != len(v) {
 			v = v[:containerLenS]
 			changed = true
 		}
-		// all checks done. cannot go past len.
+		{{/* // all checks done. cannot go past len. */}}
 		j := 0
-		for ; j < decLen; j++ { 
+		for ; j < x2read; j++ { 
 			{{ if eq .Elem "interface{}" }}d.decode(&v[j]){{ else }}v[j] = {{ decmd .Elem }}{{ end }}
 		}
-		if !canChange {
+		if xtrunc { {{/* // means canChange=true, changed=true already. */}}
+			for ; j < containerLenS; j++ {
+				v = append(v, {{ zerocmd .Elem }})
+				{{ if eq .Elem "interface{}" }}d.decode(&v[j]){{ else }}v[j] = {{ decmd .Elem }}{{ end }}
+			}
+		} else if !canChange {
 			for ; j < containerLenS; j++ { 
 				d.swallow()
 			}
@@ -314,7 +326,7 @@ func (_ fastpathT) {{ .MethodNamePfx "Dec" false }}V(v []{{ .Elem }}, checkNil b
 					d.arrayCannotExpand(len(v), j+1)
 				}
 			} 
-			if j < len(v) { // all checks done. cannot go past len.
+			if j < len(v) { {{/* // all checks done. cannot go past len. */}}
 				{{ if eq .Elem "interface{}" }}d.decode(&v[j])
 				{{ else }}v[j] = {{ decmd .Elem }}{{ end }}
 			} else {
@@ -329,13 +341,13 @@ func (_ fastpathT) {{ .MethodNamePfx "Dec" false }}V(v []{{ .Elem }}, checkNil b
 {{end}}{{end}}{{end}}
 
 
-{{range .Values}}{{if not .Primitive}}{{if not .Slice }}
+{{range .Values}}{{if not .Primitive}}{{if .MapKey }}
 {{/*
 Maps can change if they are
 - addressable (from a ptr)
 - settable (e.g. contained in an interface{})
 */}}
-func (f decFnInfo) {{ .MethodNamePfx "fastpathDec" false }}R(rv reflect.Value) { 
+func (f *decFnInfo) {{ .MethodNamePfx "fastpathDec" false }}R(rv reflect.Value) { 
 	if rv.CanAddr() {
 		vp := rv.Addr().Interface().(*map[{{ .MapKey }}]{{ .Elem }})
 		v, changed := fastpathTV.{{ .MethodNamePfx "Dec" false }}V(*vp, fastpathCheckNilFalse, true, f.d)
@@ -356,7 +368,7 @@ func (f fastpathT) {{ .MethodNamePfx "Dec" false }}X(vp *map[{{ .MapKey }}]{{ .E
 func (_ fastpathT) {{ .MethodNamePfx "Dec" false }}V(v map[{{ .MapKey }}]{{ .Elem }}, checkNil bool, canChange bool, 
 	d *Decoder) (_ map[{{ .MapKey }}]{{ .Elem }}, changed bool) {
 	dd := d.d
-	// if dd.isContainerType(valueTypeNil) {dd.TryDecodeAsNil()
+	{{/* // if dd.isContainerType(valueTypeNil) {dd.TryDecodeAsNil() */}}
 	if checkNil && dd.TryDecodeAsNil() {
 		if v != nil {
 			changed = true
@@ -366,11 +378,8 @@ func (_ fastpathT) {{ .MethodNamePfx "Dec" false }}V(v map[{{ .MapKey }}]{{ .Ele
 
 	containerLen := dd.ReadMapStart()
 	if canChange && v == nil {
-		if containerLen > 0 {
-			v = make(map[{{ .MapKey }}]{{ .Elem }}, containerLen)
-		} else {
-			v = make(map[{{ .MapKey }}]{{ .Elem }}) // supports indefinite-length, etc
-		}
+		xlen, _ := decInferLen(containerLen, d.h.MaxInitLen, {{ .Size }})
+		v = make(map[{{ .MapKey }}]{{ .Elem }}, xlen)
 		changed = true
 	}
 	if containerLen > 0 {
@@ -378,7 +387,7 @@ func (_ fastpathT) {{ .MethodNamePfx "Dec" false }}V(v map[{{ .MapKey }}]{{ .Ele
 			{{ if eq .MapKey "interface{}" }}var mk interface{}
 			d.decode(&mk)
 			if bv, bok := mk.([]byte); bok {
-				mk = string(bv) // maps cannot have []byte as key. switch to string.
+				mk = string(bv) {{/* // maps cannot have []byte as key. switch to string. */}}
 			}{{ else }}mk := {{ decmd .MapKey }}{{ end }}
 			mv := v[mk]
 			{{ if eq .Elem "interface{}" }}d.decode(&mv)
@@ -392,7 +401,7 @@ func (_ fastpathT) {{ .MethodNamePfx "Dec" false }}V(v map[{{ .MapKey }}]{{ .Ele
 			{{ if eq .MapKey "interface{}" }}var mk interface{}
 			d.decode(&mk)
 			if bv, bok := mk.([]byte); bok {
-				mk = string(bv) // maps cannot have []byte as key. switch to string.
+				mk = string(bv) {{/* // maps cannot have []byte as key. switch to string. */}}
 			}{{ else }}mk := {{ decmd .MapKey }}{{ end }}
 			mv := v[mk]
 			{{ if eq .Elem "interface{}" }}d.decode(&mv)

+ 26 - 17
codec/gen-dec-array.go.tmpl

@@ -1,15 +1,17 @@
 {{var "v"}} := {{ if not isArray}}*{{ end }}{{ .Varname }}
-{{var "h"}}, {{var "l"}} := z.DecSliceHelperStart()
+{{var "h"}}, {{var "l"}} := z.DecSliceHelperStart() {{/* // helper, containerLenS */}}
 
-var {{var "c"}} bool
-_ = {{var "c"}}
+var {{var "rr"}}, {{var "rl"}} int {{/* // num2read, length of slice/array/chan */}}
+var {{var "c"}}, {{var "rt"}} bool {{/* // changed, truncated */}}
+_, _, _ = {{var "c"}}, {{var "rt"}}, {{var "rl"}}
+{{var "rr"}} = {{var "l"}}
+{{/* rl is NOT used. Only used for getting DecInferLen. len(r) used directly in code */}}
 
 {{ if not isArray }}if {{var "v"}} == nil {
-	if {{var "l"}} <= 0 {
-        {{var "v"}} = make({{ .CTyp }}, 0)
-	} else {
-		{{var "v"}} = make({{ .CTyp }}, {{var "l"}})
+	if {{var "rl"}}, {{var "rt"}} = z.DecInferLen({{var "l"}}, z.DecBasicHandle().MaxInitLen, {{ .Size }}); {{var "rt"}} {
+		{{var "rr"}} = {{var "rl"}}
 	}
+	{{var "v"}} = make({{ .CTyp }}, {{var "rl"}})
 	{{var "c"}} = true 
 } 
 {{ end }}
@@ -25,31 +27,38 @@ if {{var "l"}} == 0 { {{ if isSlice }}
 		{{ $x := printf "%st%s" .TempVar .Rand }}{{ decLineVar $x }}
 		{{var "v"}} <- {{var "t"}} 
 	{{ else }} 
-	{{var "n"}} := {{var "l"}} 
 	if {{var "l"}} > cap({{var "v"}}) {
 		{{ if isArray }}z.DecArrayCannotExpand(len({{var "v"}}), {{var "l"}})
-		{{var "n"}} = len({{var "v"}})
-		{{ else }}{{ if .Immutable }}
+		{{ else }}{{var "rl"}}, {{var "rt"}} = z.DecInferLen({{var "l"}}, z.DecBasicHandle().MaxInitLen, {{ .Size }})
+		{{ if .Immutable }}
 		{{var "v2"}} := {{var "v"}}
-		{{var "v"}} = make([]{{ .Typ }}, {{var "l"}}, {{var "l"}})
+		{{var "v"}} = make([]{{ .Typ }}, {{var "rl"}})
 		if len({{var "v"}}) > 0 {
 			copy({{var "v"}}, {{var "v2"}}[:cap({{var "v2"}})])
 		}
-		{{ else }}{{var "v"}} = make([]{{ .Typ }}, {{var "l"}}, {{var "l"}})
+		{{ else }}{{var "v"}} = make([]{{ .Typ }}, {{var "rl"}})
 		{{ end }}{{var "c"}} = true 
 		{{ end }}
+		{{var "rr"}} = len({{var "v"}})
 	} else if {{var "l"}} != len({{var "v"}}) {
 		{{ if isSlice }}{{var "v"}} = {{var "v"}}[:{{var "l"}}]
 		{{var "c"}} = true {{ end }}
 	}
 	{{var "j"}} := 0
-	for ; {{var "j"}} < {{var "n"}} ; {{var "j"}}++ {
+	for ; {{var "j"}} < {{var "rr"}} ; {{var "j"}}++ {
 		{{ $x := printf "%[1]vv%[2]v[%[1]vj%[2]v]" .TempVar .Rand }}{{ decLineVar $x }}
-	} {{ if isArray }}
-	for ; {{var "j"}} < {{var "l"}} ; {{var "j"}}++ {
+	}
+	{{ if isArray }}for ; {{var "j"}} < {{var "l"}} ; {{var "j"}}++ {
 		z.DecSwallow()
-	}{{ end }}
-	{{ end }}{{/* closing if not chan */}}
+	}
+	{{ else }}if {{var "rt"}} { {{/* means that it is mutable and slice */}}
+		for ; {{var "j"}} < {{var "l"}} ; {{var "j"}}++ {
+			{{var "v"}} = append({{var "v"}}, {{ zero}})
+			{{ $x := printf "%[1]vv%[2]v[%[1]vj%[2]v]" .TempVar .Rand }}{{ decLineVar $x }}
+		}
+	}
+	{{ end }}
+	{{ end }}{{/* closing 'if not chan' */}}
 } else {
 	for {{var "j"}} := 0; !r.CheckBreak(); {{var "j"}}++ {
 		if {{var "j"}} >= len({{var "v"}}) {

+ 2 - 5
codec/gen-dec-map.go.tmpl

@@ -1,11 +1,8 @@
 {{var "v"}} := *{{ .Varname }}
 {{var "l"}} := r.ReadMapStart()
 if {{var "v"}} == nil {
-	if {{var "l"}} > 0 {
-		{{var "v"}} = make(map[{{ .KTyp }}]{{ .Typ }}, {{var "l"}})
-	} else {
-		{{var "v"}} = make(map[{{ .KTyp }}]{{ .Typ }}) // supports indefinite-length, etc
-	}
+	{{var "rl"}}, _ := z.DecInferLen({{var "l"}}, z.DecBasicHandle().MaxInitLen, {{ .Size }})
+	{{var "v"}} = make(map[{{ .KTyp }}]{{ .Typ }}, {{var "rl"}})
 	*{{ .Varname }} = {{var "v"}}
 }
 if {{var "l"}} > 0  {

+ 5 - 0
codec/gen-helper.generated.go

@@ -213,3 +213,8 @@ func (f genHelperDecoder) DecExt(v interface{}) (r bool) {
 	}
 	return false
 }
+
+// FOR USE BY CODECGEN ONLY. IT *WILL* CHANGE WITHOUT NOTICE. *DO NOT USE*
+func (f genHelperDecoder) DecInferLen(clen, maxlen, unit int) (rvlen int, truncated bool) {
+	return decInferLen(clen, maxlen, unit)
+}

+ 4 - 0
codec/gen-helper.go.tmpl

@@ -191,6 +191,10 @@ func (f genHelperDecoder) DecExt(v interface{}) (r bool) {
 	}
 	return false 
 }
+// FOR USE BY CODECGEN ONLY. IT *WILL* CHANGE WITHOUT NOTICE. *DO NOT USE*
+func (f genHelperDecoder) DecInferLen(clen, maxlen, unit int) (rvlen int, truncated bool) {
+	return decInferLen(clen, maxlen, unit)
+}
 
 {{/*
 

+ 28 - 22
codec/gen.generated.go

@@ -9,11 +9,8 @@ const genDecMapTmpl = `
 {{var "v"}} := *{{ .Varname }}
 {{var "l"}} := r.ReadMapStart()
 if {{var "v"}} == nil {
-	if {{var "l"}} > 0 {
-		{{var "v"}} = make(map[{{ .KTyp }}]{{ .Typ }}, {{var "l"}})
-	} else {
-		{{var "v"}} = make(map[{{ .KTyp }}]{{ .Typ }}) // supports indefinite-length, etc
-	}
+	{{var "rl"}}, _ := z.DecInferLen({{var "l"}}, z.DecBasicHandle().MaxInitLen, {{ .Size }})
+	{{var "v"}} = make(map[{{ .KTyp }}]{{ .Typ }}, {{var "rl"}})
 	*{{ .Varname }} = {{var "v"}}
 }
 if {{var "l"}} > 0  {
@@ -52,17 +49,19 @@ r.ReadEnd()
 
 const genDecListTmpl = `
 {{var "v"}} := {{ if not isArray}}*{{ end }}{{ .Varname }}
-{{var "h"}}, {{var "l"}} := z.DecSliceHelperStart()
+{{var "h"}}, {{var "l"}} := z.DecSliceHelperStart() {{/* // helper, containerLenS */}}
 
-var {{var "c"}} bool
-_ = {{var "c"}}
+var {{var "rr"}}, {{var "rl"}} int {{/* // num2read, length of slice/array/chan */}}
+var {{var "c"}}, {{var "rt"}} bool {{/* // changed, truncated */}}
+_, _, _ = {{var "c"}}, {{var "rt"}}, {{var "rl"}}
+{{var "rr"}} = {{var "l"}}
+{{/* rl is NOT used. Only used for getting DecInferLen. len(r) used directly in code */}}
 
 {{ if not isArray }}if {{var "v"}} == nil {
-	if {{var "l"}} <= 0 {
-        {{var "v"}} = make({{ .CTyp }}, 0)
-	} else {
-		{{var "v"}} = make({{ .CTyp }}, {{var "l"}})
+	if {{var "rl"}}, {{var "rt"}} = z.DecInferLen({{var "l"}}, z.DecBasicHandle().MaxInitLen, {{ .Size }}); {{var "rt"}} {
+		{{var "rr"}} = {{var "rl"}}
 	}
+	{{var "v"}} = make({{ .CTyp }}, {{var "rl"}})
 	{{var "c"}} = true 
 } 
 {{ end }}
@@ -78,31 +77,38 @@ if {{var "l"}} == 0 { {{ if isSlice }}
 		{{ $x := printf "%st%s" .TempVar .Rand }}{{ decLineVar $x }}
 		{{var "v"}} <- {{var "t"}} 
 	{{ else }} 
-	{{var "n"}} := {{var "l"}} 
 	if {{var "l"}} > cap({{var "v"}}) {
 		{{ if isArray }}z.DecArrayCannotExpand(len({{var "v"}}), {{var "l"}})
-		{{var "n"}} = len({{var "v"}})
-		{{ else }}{{ if .Immutable }}
+		{{ else }}{{var "rl"}}, {{var "rt"}} = z.DecInferLen({{var "l"}}, z.DecBasicHandle().MaxInitLen, {{ .Size }})
+		{{ if .Immutable }}
 		{{var "v2"}} := {{var "v"}}
-		{{var "v"}} = make([]{{ .Typ }}, {{var "l"}}, {{var "l"}})
+		{{var "v"}} = make([]{{ .Typ }}, {{var "rl"}})
 		if len({{var "v"}}) > 0 {
 			copy({{var "v"}}, {{var "v2"}}[:cap({{var "v2"}})])
 		}
-		{{ else }}{{var "v"}} = make([]{{ .Typ }}, {{var "l"}}, {{var "l"}})
+		{{ else }}{{var "v"}} = make([]{{ .Typ }}, {{var "rl"}})
 		{{ end }}{{var "c"}} = true 
 		{{ end }}
+		{{var "rr"}} = len({{var "v"}})
 	} else if {{var "l"}} != len({{var "v"}}) {
 		{{ if isSlice }}{{var "v"}} = {{var "v"}}[:{{var "l"}}]
 		{{var "c"}} = true {{ end }}
 	}
 	{{var "j"}} := 0
-	for ; {{var "j"}} < {{var "n"}} ; {{var "j"}}++ {
+	for ; {{var "j"}} < {{var "rr"}} ; {{var "j"}}++ {
 		{{ $x := printf "%[1]vv%[2]v[%[1]vj%[2]v]" .TempVar .Rand }}{{ decLineVar $x }}
-	} {{ if isArray }}
-	for ; {{var "j"}} < {{var "l"}} ; {{var "j"}}++ {
+	}
+	{{ if isArray }}for ; {{var "j"}} < {{var "l"}} ; {{var "j"}}++ {
 		z.DecSwallow()
-	}{{ end }}
-	{{ end }}{{/* closing if not chan */}}
+	}
+	{{ else }}if {{var "rt"}} { {{/* means that it is mutable and slice */}}
+		for ; {{var "j"}} < {{var "l"}} ; {{var "j"}}++ {
+			{{var "v"}} = append({{var "v"}}, {{ zero}})
+			{{ $x := printf "%[1]vv%[2]v[%[1]vj%[2]v]" .TempVar .Rand }}{{ decLineVar $x }}
+		}
+	}
+	{{ end }}
+	{{ end }}{{/* closing 'if not chan' */}}
 } else {
 	for {{var "j"}} := 0; !r.CheckBreak(); {{var "j"}}++ {
 		if {{var "j"}} >= len({{var "v"}}) {

+ 58 - 21
codec/gen.go

@@ -697,7 +697,7 @@ func (x *genRunner) enc(varname string, t reflect.Type) {
 		if rtid == uint8SliceTypId {
 			x.line("r.EncodeStringBytes(codecSelferC_RAW" + x.xs + ", []byte(" + varname + "))")
 		} else if fastpathAV.index(rtid) != -1 {
-			g := genV{Slice: true, Elem: x.genTypeName(t.Elem())}
+			g := x.newGenV(t)
 			x.line("z.F." + g.MethodNamePfx("Enc", false) + "V(" + varname + ", false, e)")
 		} else {
 			x.xtraSM(varname, true, t)
@@ -711,9 +711,7 @@ func (x *genRunner) enc(varname string, t reflect.Type) {
 		// - else call Encoder.encode(XXX) on it.
 		// x.line("if " + varname + " == nil { \nr.EncodeNil()\n } else { ")
 		if fastpathAV.index(rtid) != -1 {
-			g := genV{Slice: false,
-				Elem:   x.genTypeName(t.Elem()),
-				MapKey: x.genTypeName(t.Key())}
+			g := x.newGenV(t)
 			x.line("z.F." + g.MethodNamePfx("Enc", false) + "V(" + varname + ", false, e)")
 		} else {
 			x.xtraSM(varname, true, t)
@@ -1143,7 +1141,7 @@ func (x *genRunner) dec(varname string, t reflect.Type) {
 		if rtid == uint8SliceTypId {
 			x.line("*" + varname + " = r.DecodeBytes(*(*[]byte)(" + varname + "), false, false)")
 		} else if fastpathAV.index(rtid) != -1 {
-			g := genV{Slice: true, Elem: x.genTypeName(t.Elem())}
+			g := x.newGenV(t)
 			x.line("z.F." + g.MethodNamePfx("Dec", false) + "X(" + varname + ", false, d)")
 			// x.line("z." + g.MethodNamePfx("Dec", false) + "(" + varname + ")")
 			// x.line(g.FastpathName(false) + "(" + varname + ", d)")
@@ -1157,7 +1155,7 @@ func (x *genRunner) dec(varname string, t reflect.Type) {
 		// - if elements are primitives or Selfers, call dedicated function on each member.
 		// - else call Encoder.encode(XXX) on it.
 		if fastpathAV.index(rtid) != -1 {
-			g := genV{Slice: false, Elem: x.genTypeName(t.Elem()), MapKey: x.genTypeName(t.Key())}
+			g := x.newGenV(t)
 			x.line("z.F." + g.MethodNamePfx("Dec", false) + "X(" + varname + ", false, d)")
 			// x.line("z." + g.MethodNamePfx("Dec", false) + "(" + varname + ")")
 			// x.line(g.FastpathName(false) + "(" + varname + ", d)")
@@ -1248,11 +1246,13 @@ func (x *genRunner) decListFallback(varname string, rtid uintptr, t reflect.Type
 		CTyp      string
 		Typ       string
 		Immutable bool
+		Size      int
 	}
 	telem := t.Elem()
-	ts := tstruc{genTempVarPfx, x.varsfx(), varname, x.genTypeName(t), x.genTypeName(telem), genIsImmutable(telem)}
+	ts := tstruc{genTempVarPfx, x.varsfx(), varname, x.genTypeName(t), x.genTypeName(telem), genIsImmutable(telem), int(telem.Size())}
 
 	funcs := make(template.FuncMap)
+
 	funcs["decLineVar"] = func(varname string) string {
 		x.decVar(varname, telem, false)
 		return ""
@@ -1292,10 +1292,11 @@ func (x *genRunner) decMapFallback(varname string, rtid uintptr, t reflect.Type)
 		Varname string
 		KTyp    string
 		Typ     string
+		Size    int
 	}
 	telem := t.Elem()
 	tkey := t.Key()
-	ts := tstruc{genTempVarPfx, x.varsfx(), varname, x.genTypeName(tkey), x.genTypeName(telem)}
+	ts := tstruc{genTempVarPfx, x.varsfx(), varname, x.genTypeName(tkey), x.genTypeName(telem), int(telem.Size() + tkey.Size())}
 	funcs := make(template.FuncMap)
 	funcs["decLineVarK"] = func(varname string) string {
 		x.decVar(varname, tkey, false)
@@ -1497,11 +1498,28 @@ func (x *genRunner) decStruct(varname string, rtid uintptr, t reflect.Type) {
 // --------
 
 type genV struct {
-	// genV is either a primitive (Primitive != "") or a slice (Slice = true) or a map.
-	Slice     bool
+	// genV is either a primitive (Primitive != "") or a map (MapKey != "") or a slice
 	MapKey    string
 	Elem      string
 	Primitive string
+	Size      int
+}
+
+func (x *genRunner) newGenV(t reflect.Type) (v genV) {
+	switch t.Kind() {
+	case reflect.Slice, reflect.Array:
+		te := t.Elem()
+		v.Elem = x.genTypeName(te)
+		v.Size = int(te.Size())
+	case reflect.Map:
+		te, tk := t.Elem(), t.Key()
+		v.Elem = x.genTypeName(te)
+		v.MapKey = x.genTypeName(tk)
+		v.Size = int(te.Size() + tk.Size())
+	default:
+		panic("unexpected type for newGenV. Requires map or slice type")
+	}
+	return
 }
 
 func (x *genV) MethodNamePfx(prefix string, prim bool) string {
@@ -1512,7 +1530,7 @@ func (x *genV) MethodNamePfx(prefix string, prim bool) string {
 	if prim {
 		name = append(name, genTitleCaseName(x.Primitive)...)
 	} else {
-		if x.Slice {
+		if x.MapKey == "" {
 			name = append(name, "Slice"...)
 		} else {
 			name = append(name, "Map"...)
@@ -1640,7 +1658,7 @@ func genCustomTypeName(tstr string) string {
 }
 
 func genIsImmutable(t reflect.Type) (v bool) {
-	return isMutableKind(t.Kind())
+	return isImmutableKind(t.Kind())
 }
 
 type genInternal struct {
@@ -1770,23 +1788,42 @@ func genInternalInit() {
 		"float64",
 		"bool",
 	}
-	mapvaltypes2 := make(map[string]bool)
-	for _, s := range mapvaltypes {
-		mapvaltypes2[s] = true
-	}
+	wordSizeBytes := int(intBitsize) / 8
+
+	mapvaltypes2 := map[string]int{
+		"interface{}": 2 * wordSizeBytes,
+		"string":      2 * wordSizeBytes,
+		"uint":        1 * wordSizeBytes,
+		"uint8":       1,
+		"uint16":      2,
+		"uint32":      4,
+		"uint64":      8,
+		"int":         1 * wordSizeBytes,
+		"int8":        1,
+		"int16":       2,
+		"int32":       4,
+		"int64":       8,
+		"float32":     4,
+		"float64":     8,
+		"bool":        1,
+	}
+	// mapvaltypes2 := make(map[string]bool)
+	// for _, s := range mapvaltypes {
+	// 	mapvaltypes2[s] = true
+	// }
 	var gt genInternal
 
 	// For each slice or map type, there must be a (symetrical) Encode and Decode fast-path function
 	for _, s := range types {
-		gt.Values = append(gt.Values, genV{false, "", "", s})
+		gt.Values = append(gt.Values, genV{Primitive: s, Size: mapvaltypes2[s]})
 		if s != "uint8" { // do not generate fast path for slice of bytes. Treat specially already.
-			gt.Values = append(gt.Values, genV{true, "", s, ""})
+			gt.Values = append(gt.Values, genV{Elem: s, Size: mapvaltypes2[s]})
 		}
-		if !mapvaltypes2[s] {
-			gt.Values = append(gt.Values, genV{false, s, s, ""})
+		if _, ok := mapvaltypes2[s]; !ok {
+			gt.Values = append(gt.Values, genV{MapKey: s, Elem: s, Size: 2 * mapvaltypes2[s]})
 		}
 		for _, ms := range mapvaltypes {
-			gt.Values = append(gt.Values, genV{false, s, ms, ""})
+			gt.Values = append(gt.Values, genV{MapKey: s, Elem: ms, Size: mapvaltypes2[s] + mapvaltypes2[ms]})
 		}
 	}
 

+ 8 - 1
codec/helper.go

@@ -117,6 +117,7 @@ import (
 
 const (
 	scratchByteArrayLen = 32
+	initCollectionCap   = 32 // 32 is defensive. 16 is preferred.
 
 	// Support encoding.(Binary|Text)(Unm|M)arshaler.
 	// This constant flag will enable or disable it.
@@ -147,6 +148,12 @@ const (
 
 	// if derefForIsEmptyValue, deref pointers and interfaces when checking isEmptyValue
 	derefForIsEmptyValue = false
+
+	// if resetSliceElemToZeroValue, then on decoding a slice, reset the element to a zero value first.
+	// Only concern is that, if the slice already contained some garbage, we will decode into that garbage.
+	// The chances of this are slim, so leave this "optimization".
+	// TODO: should this be true, to ensure that we always decode into a "zero" "empty" value?
+	resetSliceElemToZeroValue bool = false
 )
 
 var oneByteArr = [1]byte{0}
@@ -870,7 +877,7 @@ func panicToErr(err *error) {
 // 	panic(fmt.Errorf("%s: "+format, params2...))
 // }
 
-func isMutableKind(k reflect.Kind) (v bool) {
+func isImmutableKind(k reflect.Kind) (v bool) {
 	return false ||
 		k == reflect.Int ||
 		k == reflect.Int8 ||

+ 91 - 0
codec/helper_internal.go

@@ -149,3 +149,94 @@ func halfFloatToFloatBits(yy uint16) (d uint32) {
 	m = m << 13
 	return (s << 31) | (e << 23) | m
 }
+
+// GrowCap will return a new capacity for a slice, given the following:
+//   - oldCap: current capacity
+//   - unit: in-memory size of an element
+//   - num: number of elements to add
+func growCap(oldCap, unit, num int) (newCap int) {
+	// appendslice logic (if cap < 1024, *2, else *1.25):
+	//   leads to many copy calls, especially when copying bytes.
+	//   bytes.Buffer model (2*cap + n): much better for bytes.
+	// smarter way is to take the byte-size of the appended element(type) into account
+
+	// maintain 3 thresholds:
+	// t1: if cap <= t1, newcap = 2x
+	// t2: if cap <= t2, newcap = 1.75x
+	// t3: if cap <= t3, newcap = 1.5x
+	//     else          newcap = 1.25x
+	//
+	// t1, t2, t3 >= 1024 always.
+	// i.e. if unit size >= 16, then always do 2x or 1.25x (ie t1, t2, t3 are all same)
+	//
+	// With this, appending for bytes increase by:
+	//    100% up to 4K
+	//     75% up to 8K
+	//     50% up to 16K
+	//     25% beyond that
+
+	// unit can be 0 e.g. for struct{}{}; handle that appropriately
+	var t1, t2, t3 int // thresholds
+	if unit <= 1 {
+		t1, t2, t3 = 4*1024, 8*1024, 16*1024
+	} else if unit < 16 {
+		t3 = 16 / unit * 1024
+		t1 = t3 * 1 / 4
+		t2 = t3 * 2 / 4
+	} else {
+		t1, t2, t3 = 1024, 1024, 1024
+	}
+
+	var x int // temporary variable
+
+	// x is multiplier here: one of 5, 6, 7 or 8; incr of 25%, 50%, 75% or 100% respectively
+	if oldCap <= t1 { // [0,t1]
+		x = 8
+	} else if oldCap > t3 { // (t3,infinity]
+		x = 5
+	} else if oldCap <= t2 { // (t1,t2]
+		x = 7
+	} else { // (t2,t3]
+		x = 6
+	}
+	newCap = x * oldCap / 4
+
+	if num > 0 {
+		newCap += num
+	}
+
+	// ensure newCap is a multiple of 64 (if it is > 64) or 16.
+	if newCap > 64 {
+		if x = newCap % 64; x != 0 {
+			x = newCap / 64
+			newCap = 64 * (x + 1)
+		}
+	} else {
+		if x = newCap % 16; x != 0 {
+			x = newCap / 16
+			newCap = 16 * (x + 1)
+		}
+	}
+	return
+}
+
+func expandSliceValue(s reflect.Value, num int) reflect.Value {
+	if num <= 0 {
+		return s
+	}
+	l0 := s.Len()
+	l1 := l0 + num // new slice length
+	if l1 < l0 {
+		panic("ExpandSlice: slice overflow")
+	}
+	c0 := s.Cap()
+	if l1 <= c0 {
+		return s.Slice(0, l1)
+	}
+	st := s.Type()
+	c1 := growCap(c0, int(st.Elem().Size()), num)
+	s2 := reflect.MakeSlice(st, l1, c1)
+	// println("expandslicevalue: cap-old: ", c0, ", cap-new: ", c1, ", len-new: ", l1)
+	reflect.Copy(s2, s)
+	return s2
+}

+ 6 - 0
codec/helper_unsafe.go

@@ -26,6 +26,9 @@ type unsafeBytes struct {
 // In unsafe mode, it doesn't incur allocation and copying caused by conversion.
 // In regular safe mode, it is an allocation and copy.
 func stringView(v []byte) string {
+	if len(v) == 0 {
+		return ""
+	}
 	x := unsafeString{uintptr(unsafe.Pointer(&v[0])), len(v)}
 	return *(*string)(unsafe.Pointer(&x))
 }
@@ -34,6 +37,9 @@ func stringView(v []byte) string {
 // In unsafe mode, it doesn't incur allocation and copying caused by conversion.
 // In regular safe mode, it is an allocation and copy.
 func bytesView(v string) []byte {
+	if len(v) == 0 {
+		return zeroByteSlice
+	}
 	x := unsafeBytes{uintptr(unsafe.Pointer(&v)), len(v), len(v)}
 	return *(*[]byte)(unsafe.Pointer(&x))
 }

+ 30 - 6
codec/json.go

@@ -39,6 +39,7 @@ import (
 	"fmt"
 	"reflect"
 	"strconv"
+	"sync"
 	"unicode/utf16"
 	"unicode/utf8"
 )
@@ -134,30 +135,49 @@ func (x *jsonStackElem) sep() (c byte) {
 		}
 	}
 	x.so = !x.so
-	if x.sr {
-		x.sr = false
-	}
+	// Note: Anything more, and this function doesn't inline. Keep it tight.
+	// if x.sr {
+	// 	x.sr = false
+	// }
 	return
 }
 
+const jsonStackPoolArrayLen = 32
+
+// pool used to prevent constant allocation of stacks.
+var jsonStackPool = sync.Pool{
+	New: func() interface{} {
+		return new([jsonStackPoolArrayLen]jsonStackElem)
+	},
+}
+
 // jsonStack contains the stack for tracking the state of the container (branch).
 // The same data structure is used during encode and decode, as it is similar functionality.
 type jsonStack struct {
 	s  []jsonStackElem // stack for map or array end tag. map=}, array=]
 	sc *jsonStackElem  // pointer to current (top) element on the stack.
+	sp *[jsonStackPoolArrayLen]jsonStackElem
 }
 
 func (j *jsonStack) start(c byte) {
+	if j.s == nil {
+		// j.s = make([]jsonStackElem, 0, 8)
+		j.sp = jsonStackPool.Get().(*[jsonStackPoolArrayLen]jsonStackElem)
+		j.s = j.sp[:0]
+	}
 	j.s = append(j.s, jsonStackElem{st: c})
 	j.sc = &(j.s[len(j.s)-1])
 }
 
 func (j *jsonStack) end() {
 	l := len(j.s) - 1 // length of new stack after pop'ing
-	j.s = j.s[:l]
 	if l == 0 {
+		jsonStackPool.Put(j.sp)
+		j.s = nil
+		j.sp = nil
 		j.sc = nil
 	} else {
+		j.s = j.s[:l]
 		j.sc = &(j.s[l-1])
 	}
 	//j.sc = &(j.s[len(j.s)-1])
@@ -481,6 +501,7 @@ type jsonDecDriver struct {
 	ct   valueType // container type. one of unset, array or map.
 	bstr [8]byte   // scratch used for string \UXXX parsing
 	b    [64]byte  // scratch
+	b2   [64]byte
 
 	wsSkipped bool // whitespace skipped
 
@@ -895,14 +916,17 @@ func (d *jsonDecDriver) DecodeBytes(bs []byte, isstring, zerocopy bool) (bsOut [
 	if c := d.s.sc.sep(); c != 0 {
 		d.expectChar(c)
 	}
-	// zerocopy doesn't matter for json, as the bytes must be parsed.
 	bs0 := d.appendStringAsBytes(d.b[:0])
+	// if isstring, then just return the bytes, even if it is using the scratch buffer.
+	// the bytes will be converted to a string as needed.
 	if isstring {
 		return bs0
 	}
 	slen := base64.StdEncoding.DecodedLen(len(bs0))
-	if cap(bs) >= slen {
+	if slen <= cap(bs) {
 		bsOut = bs[:slen]
+	} else if zerocopy && slen <= cap(d.b2) {
+		bsOut = d.b2[:slen]
 	} else {
 		bsOut = make([]byte, slen)
 	}

+ 15 - 3
codec/noop.go

@@ -57,8 +57,20 @@ func (h *noopDrv) newDecDriver(_ *Decoder) decDriver { return h }
 // --- encDriver
 
 // stack functions (for map and array)
-func (h *noopDrv) start(b bool) { h.mks = append(h.mks, b); h.mk = b }
-func (h *noopDrv) end()         { h.mks = h.mks[:len(h.mks)-1]; h.mk = h.mks[len(h.mks)-1] }
+func (h *noopDrv) start(b bool) {
+	// println("start", len(h.mks)+1)
+	h.mks = append(h.mks, b)
+	h.mk = b
+}
+func (h *noopDrv) end() {
+	// println("end: ", len(h.mks)-1)
+	h.mks = h.mks[:len(h.mks)-1]
+	if len(h.mks) > 0 {
+		h.mk = h.mks[len(h.mks)-1]
+	} else {
+		h.mk = false
+	}
+}
 
 func (h *noopDrv) EncodeBuiltin(rt uintptr, v interface{}) {}
 func (h *noopDrv) EncodeNil()                              {}
@@ -93,7 +105,7 @@ func (h *noopDrv) DecodeString() (s string)                   { return h.S[h.m(8
 
 func (h *noopDrv) DecodeBytes(bs []byte, isstring, zerocopy bool) []byte { return h.B[h.m(len(h.B))] }
 
-func (h *noopDrv) ReadEnd() { h.start(true) }
+func (h *noopDrv) ReadEnd() { h.end() }
 
 // toggle map/slice
 func (h *noopDrv) ReadMapStart() int   { h.start(true); return h.m(10) }

+ 4 - 4
codec/prebuild.sh

@@ -90,8 +90,8 @@ func fastpathEncodeTypeSwitchMap(iv interface{}, e *Encoder) bool { return false
 type fastpathE struct {
 	rtid uintptr
 	rt reflect.Type 
-	encfn func(encFnInfo, reflect.Value)
-	decfn func(decFnInfo, reflect.Value)
+	encfn func(*encFnInfo, reflect.Value)
+	decfn func(*decFnInfo, reflect.Value)
 }
 type fastpathA [0]fastpathE
 func (x fastpathA) index(rtid uintptr) int { return -1 }
@@ -142,9 +142,9 @@ _codegenerators() {
     then
         true && \
             echo "codecgen - !unsafe ... " && \
-            codecgen -rt codecgen -t 'x,codecgen,!unsafe' -o values_codecgen${zsfx} -d 1978 $zfin && \
+            codecgen  -rt codecgen -t 'x,codecgen,!unsafe' -o values_codecgen${zsfx} -d 1978 $zfin && \
             echo "codecgen - unsafe ... " && \
-            codecgen -u -rt codecgen -t 'x,codecgen,unsafe' -o values_codecgen_unsafe${zsfx} -d 1978 $zfin && \
+            codecgen  -u -rt codecgen -t 'x,codecgen,unsafe' -o values_codecgen_unsafe${zsfx} -d 1978 $zfin && \
             echo "msgp ... " && \
             msgp -tests=false -pkg=codec -o=values_msgp${zsfx} -file=$zfin && \
             echo "ffjson ... " && \

Vissa filer visades inte eftersom för många filer har ändrats