Forráskód Böngészése

codec: refactor to afford bounds check elimination

See https://docs.google.com/document/d/1vdAEAjYdzjnPA9WDOQ1e4e05cYVMpqSxJYZT33Cqw2g

- use uints for slicing and indexing,
  as it eliminates if index <= 0 checks
- use for j = 0; j < len(x); j++ pattern
  which is identified by golang's bounds check elimination prove phase
- Try to use constants within slices (instead of computing the index).
  This may involve making temporary slices, and working off that.
- Instead of json's readLit (for null, true, false),
  create dedicated functions that use constants to compare.
- eliminate bitset32 and bitset128, which do not elide
  bounds checks, because the pos (byte) can range from 0..255.
  Instead, use bitset256 where the pos is always in range, and bounds checks
  are eliminated.
- Use arithmetic whose value must be in the range of an array
  e.g. for immutableKinds which is [32]reflect.Kind, take the parameter%32.
  Compiler knows that the value is in range 0..31, so not bounds checks
Ugorji Nwoke 7 éve
szülő
commit
e531a5003d

+ 2 - 2
codec/binc.go

@@ -534,9 +534,9 @@ func (d *bincDecDriver) decUint() (v uint64) {
 		d.r.readb(d.b[4:8])
 		v = uint64(bigen.Uint32(d.b[4:8]))
 	case 4, 5, 6:
-		lim := int(7 - d.vs)
+		lim := 7 - d.vs
 		d.r.readb(d.b[lim:8])
-		for i := 0; i < lim; i++ {
+		for i := uint8(0); i < lim; i++ {
 			d.b[i] = 0
 		}
 		v = uint64(bigen.Uint64(d.b[:8]))

+ 4 - 3
codec/cbor.go

@@ -268,16 +268,17 @@ func (e *cborEncDriver) encStringBytesS(bb byte, v string) {
 		} else {
 			e.w.writen1(cborBdIndefiniteString)
 		}
-		blen := len(v) / 4
+		var vlen uint = uint(len(v))
+		blen := vlen / 4
 		if blen == 0 {
 			blen = 64
 		} else if blen > 1024 {
 			blen = 1024
 		}
-		for i := 0; i < len(v); {
+		for i := uint(0); i < vlen; {
 			var v2 string
 			i2 := i + blen
-			if i2 < len(v) {
+			if i2 < vlen {
 				v2 = v[i:i2]
 			} else {
 				v2 = v[i:]

+ 20 - 18
codec/decode.go

@@ -967,11 +967,12 @@ func (z *bytesDecReader) readn1() (v uint8) {
 // }
 
 func (z *bytesDecReader) skip(accept *bitset256) (token byte) {
-	i := z.c
-	if z.c == len(z.b) {
-		goto END
-		// panic(io.EOF)
-	}
+	i := uint(z.c)
+	// if i == len(z.b) {
+	// 	goto END
+	// 	// panic(io.EOF)
+	// }
+
 	// Replace loop with goto construct, so that this can be inlined
 	// for i := z.c; i < blen; i++ {
 	// 	if !accept.isset(z.b[i]) {
@@ -985,17 +986,17 @@ func (z *bytesDecReader) skip(accept *bitset256) (token byte) {
 
 	// i := z.c
 LOOP:
-	if i < len(z.b) {
+	if i < uint(len(z.b)) {
 		token = z.b[i]
 		i++
 		if accept.isset(token) {
 			goto LOOP
 		}
 		// z.a -= (i - z.c)
-		z.c = i
+		z.c = int(i)
 		return
 	}
-END:
+	// END:
 	panic(io.EOF)
 	// // z.a = 0
 	// z.c = blen
@@ -1007,8 +1008,7 @@ func (z *bytesDecReader) readTo(_ []byte, accept *bitset256) (out []byte) {
 }
 
 func (z *bytesDecReader) readToNoInput(accept *bitset256) (out []byte) {
-	i := z.c
-	if i == len(z.b) {
+	if z.c == len(z.b) {
 		panic(io.EOF)
 	}
 
@@ -1044,8 +1044,9 @@ func (z *bytesDecReader) readToNoInput(accept *bitset256) (out []byte) {
 	// 	return
 
 	// c := i
+	i := uint(z.c)
 LOOP:
-	if i < len(z.b) {
+	if i < uint(len(z.b)) {
 		if accept.isset(z.b[i]) {
 			i++
 			goto LOOP
@@ -1054,7 +1055,7 @@ LOOP:
 
 	out = z.b[z.c:i]
 	// z.a -= (i - z.c)
-	z.c = i
+	z.c = int(i)
 	return // z.b[c:i]
 	// z.c, i = i, z.c
 	// return z.b[i:z.c]
@@ -1065,10 +1066,11 @@ func (z *bytesDecReader) readUntil(_ []byte, stop byte) (out []byte) {
 }
 
 func (z *bytesDecReader) readUntilNoInput(stop byte) (out []byte) {
-	i := z.c
-	if i == len(z.b) {
-		panic(io.EOF)
-	}
+	i := uint(z.c)
+	// if i == len(z.b) {
+	// 	panic(io.EOF)
+	// }
+
 	// Replace loop with goto construct, so that this can be inlined
 	// for i := z.c; i < blen; i++ {
 	// 	if z.b[i] == stop {
@@ -1080,12 +1082,12 @@ func (z *bytesDecReader) readUntilNoInput(stop byte) (out []byte) {
 	// 	}
 	// }
 LOOP:
-	if i < len(z.b) {
+	if i < uint(len(z.b)) {
 		if z.b[i] == stop {
 			i++
 			out = z.b[z.c:i]
 			// z.a -= (i - z.c)
-			z.c = i
+			z.c = int(i)
 			return
 		}
 		i++

+ 10 - 6
codec/encode.go

@@ -730,7 +730,9 @@ func (e *Encoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 	var poolv interface{}
 	var fkvs []sfiRv
 	// fmt.Printf(">>>>>>>>>>>>>> encode.kStruct: newlen: %d\n", newlen)
-	if newlen <= 8 {
+	if newlen < 0 { // bounds-check-elimination
+		// cannot happen // here for bounds-check-elimination
+	} else if newlen <= 8 {
 		spool, poolv = pool.sfiRv8()
 		fkvs = poolv.(*[8]sfiRv)[:newlen]
 	} else if newlen <= 16 {
@@ -749,10 +751,10 @@ func (e *Encoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 		fkvs = make([]sfiRv, newlen)
 	}
 
-	newlen = 0
 	var kv sfiRv
 	recur := e.h.RecursiveEmptyCheck
 	sfn := structFieldNode{v: rv, update: false}
+	newlen = 0
 	for _, si := range tisfi {
 		// kv.r = si.field(rv, false)
 		kv.r = sfn.field(si)
@@ -774,6 +776,7 @@ func (e *Encoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 		fkvs[newlen] = kv
 		newlen++
 	}
+	fkvs = fkvs[:newlen]
 
 	var mflen int
 	for k, v := range mf {
@@ -788,10 +791,11 @@ func (e *Encoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 		mflen++
 	}
 
+	var j int
 	if toMap {
 		ee.WriteMapStart(newlen + mflen)
 		if elemsep {
-			for j := 0; j < newlen; j++ {
+			for j = 0; j < len(fkvs); j++ {
 				kv = fkvs[j]
 				ee.WriteMapElemKey()
 				// ee.EncodeStringEnc(cUTF8, kv.v)
@@ -800,7 +804,7 @@ func (e *Encoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 				e.encodeValue(kv.r, nil, true)
 			}
 		} else {
-			for j := 0; j < newlen; j++ {
+			for j = 0; j < len(fkvs); j++ {
 				kv = fkvs[j]
 				// ee.EncodeStringEnc(cUTF8, kv.v)
 				e.kStructFieldKey(fti.keyType, kv.v)
@@ -818,12 +822,12 @@ func (e *Encoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 	} else {
 		ee.WriteArrayStart(newlen)
 		if elemsep {
-			for j := 0; j < newlen; j++ {
+			for j = 0; j < len(fkvs); j++ {
 				ee.WriteArrayElem()
 				e.encodeValue(fkvs[j].r, nil, true)
 			}
 		} else {
-			for j := 0; j < newlen; j++ {
+			for j = 0; j < len(fkvs); j++ {
 				e.encodeValue(fkvs[j].r, nil, true)
 			}
 		}

+ 32 - 32
codec/fast-path.generated.go

@@ -17774,8 +17774,8 @@ func (_ fastpathT) DecSliceIntfV(v []interface{}, canChange bool, d *Decoder) (_
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 16)
@@ -17872,8 +17872,8 @@ func (_ fastpathT) DecSliceStringV(v []string, canChange bool, d *Decoder) (_ []
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 16)
@@ -17970,8 +17970,8 @@ func (_ fastpathT) DecSliceFloat32V(v []float32, canChange bool, d *Decoder) (_
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 4)
@@ -18068,8 +18068,8 @@ func (_ fastpathT) DecSliceFloat64V(v []float64, canChange bool, d *Decoder) (_
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 8)
@@ -18166,8 +18166,8 @@ func (_ fastpathT) DecSliceUintV(v []uint, canChange bool, d *Decoder) (_ []uint
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 8)
@@ -18264,8 +18264,8 @@ func (_ fastpathT) DecSliceUint8V(v []uint8, canChange bool, d *Decoder) (_ []ui
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 1)
@@ -18362,8 +18362,8 @@ func (_ fastpathT) DecSliceUint16V(v []uint16, canChange bool, d *Decoder) (_ []
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 2)
@@ -18460,8 +18460,8 @@ func (_ fastpathT) DecSliceUint32V(v []uint32, canChange bool, d *Decoder) (_ []
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 4)
@@ -18558,8 +18558,8 @@ func (_ fastpathT) DecSliceUint64V(v []uint64, canChange bool, d *Decoder) (_ []
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 8)
@@ -18656,8 +18656,8 @@ func (_ fastpathT) DecSliceUintptrV(v []uintptr, canChange bool, d *Decoder) (_
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 8)
@@ -18754,8 +18754,8 @@ func (_ fastpathT) DecSliceIntV(v []int, canChange bool, d *Decoder) (_ []int, c
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 8)
@@ -18852,8 +18852,8 @@ func (_ fastpathT) DecSliceInt8V(v []int8, canChange bool, d *Decoder) (_ []int8
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 1)
@@ -18950,8 +18950,8 @@ func (_ fastpathT) DecSliceInt16V(v []int16, canChange bool, d *Decoder) (_ []in
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 2)
@@ -19048,8 +19048,8 @@ func (_ fastpathT) DecSliceInt32V(v []int32, canChange bool, d *Decoder) (_ []in
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 4)
@@ -19146,8 +19146,8 @@ func (_ fastpathT) DecSliceInt64V(v []int64, canChange bool, d *Decoder) (_ []in
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 8)
@@ -19244,8 +19244,8 @@ func (_ fastpathT) DecSliceBoolV(v []bool, canChange bool, d *Decoder) (_ []bool
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, 1)

+ 2 - 2
codec/fast-path.go.tmpl

@@ -440,8 +440,8 @@ func (_ fastpathT) {{ .MethodNamePfx "Dec" false }}V(v []{{ .Elem }}, canChange
 			changed = true
 		}
 	}
-	j := 0
-	for ; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
+	var j int
+	for j = 0; (hasLen && j < containerLenS) || !(hasLen || dd.CheckBreak()); j++ {
 		if j == 0 && len(v) == 0 && canChange {
 			if hasLen {
 				xlen = decInferLen(containerLenS, d.h.MaxInitLen, {{ .Size }})

+ 1 - 1
codec/gen-dec-array.go.tmpl

@@ -33,7 +33,7 @@ if {{var "l"}} == 0 {
 	} {{end}}
 	var {{var "j"}} int 
     // var {{var "dn"}} bool 
-	for ; ({{var "hl"}} && {{var "j"}} < {{var "l"}}) || !({{var "hl"}} || r.CheckBreak()); {{var "j"}}++ {
+	for {{var "j"}} = 0; ({{var "hl"}} && {{var "j"}} < {{var "l"}}) || !({{var "hl"}} || r.CheckBreak()); {{var "j"}}++ { // bounds-check-elimination
 		{{if not isArray}} if {{var "j"}} == 0 && {{var "v"}} == nil {
 			if {{var "hl"}} {
 				{{var "rl"}} = z.DecInferLen({{var "l"}}, z.DecBasicHandle().MaxInitLen, {{ .Size }})

+ 1 - 1
codec/gen.generated.go

@@ -88,7 +88,7 @@ if {{var "l"}} == 0 {
 	} {{end}}
 	var {{var "j"}} int 
     // var {{var "dn"}} bool 
-	for ; ({{var "hl"}} && {{var "j"}} < {{var "l"}}) || !({{var "hl"}} || r.CheckBreak()); {{var "j"}}++ {
+	for {{var "j"}} = 0; ({{var "hl"}} && {{var "j"}} < {{var "l"}}) || !({{var "hl"}} || r.CheckBreak()); {{var "j"}}++ { // bounds-check-elimination
 		{{if not isArray}} if {{var "j"}} == 0 && {{var "v"}} == nil {
 			if {{var "hl"}} {
 				{{var "rl"}} = z.DecInferLen({{var "l"}}, z.DecBasicHandle().MaxInitLen, {{ .Size }})

+ 12 - 31
codec/helper.go

@@ -145,7 +145,7 @@ var (
 
 var codecgen bool
 
-var refBitset bitset32
+var refBitset bitset256
 var pool pooler
 var panicv panicHdl
 
@@ -1172,7 +1172,7 @@ func (ti *typeInfo) isFlag(f typeInfoFlag) bool {
 func (ti *typeInfo) indexForEncName(name []byte) (index int16) {
 	var sn []byte
 	if len(name)+2 <= 32 {
-		var buf [32]byte // should not escape
+		var buf [32]byte // should not escape to heap
 		sn = buf[:len(name)+2]
 	} else {
 		sn = make([]byte, len(name)+2)
@@ -1459,7 +1459,7 @@ LOOP:
 			si.encName = f.Name
 		}
 		si.encNameAsciiAlphaNum = true
-		for i := len(si.encName) - 1; i >= 0; i-- {
+		for i := len(si.encName) - 1; i >= 0; i-- { // bounds-check elimination
 			b := si.encName[i]
 			if (b >= '0' && b <= '9') || (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') {
 				continue
@@ -1695,7 +1695,9 @@ func panicValToErr(h errDecorator, v interface{}, err *error) {
 }
 
 func isImmutableKind(k reflect.Kind) (v bool) {
-	return immutableKindsSet[k]
+	// return immutableKindsSet[k]
+	// since we know reflect.Kind is in range 0..31, then use the k%32 == k constraint
+	return immutableKindsSet[k%reflect.Kind(len(immutableKindsSet))] // bounds-check-elimination
 }
 
 // ----
@@ -2292,7 +2294,12 @@ func (s *set) remove(v uintptr) (exists bool) {
 
 // bitset types are better than [256]bool, because they permit the whole
 // bitset array being on a single cache line and use less memory.
-
+//
+// Also, since pos is a byte (0-255), there's no bounds checks on indexing (cheap).
+//
+// We previously had bitset128 [16]byte, and bitset32 [4]byte, but those introduces
+// bounds checking, so we discarded them, and everyone uses bitset256.
+//
 // given x > 0 and n > 0 and x is exactly 2^n, then pos/x === pos>>n AND pos%x === pos&(x-1).
 // consequently, pos/32 === pos>>5, pos/16 === pos>>4, pos/8 === pos>>3, pos%8 == pos&7
 
@@ -2314,32 +2321,6 @@ func (x *bitset256) set(pos byte) {
 // 	x[pos>>3] &^= (1 << (pos & 7))
 // }
 
-type bitset128 [16]byte
-
-func (x *bitset128) isset(pos byte) bool {
-	return x[pos>>3]&(1<<(pos&7)) != 0
-}
-func (x *bitset128) set(pos byte) {
-	x[pos>>3] |= (1 << (pos & 7))
-}
-
-// func (x *bitset128) unset(pos byte) {
-// 	x[pos>>3] &^= (1 << (pos & 7))
-// }
-
-type bitset32 [4]byte
-
-func (x *bitset32) isset(pos byte) bool {
-	return x[pos>>3]&(1<<(pos&7)) != 0
-}
-func (x *bitset32) set(pos byte) {
-	x[pos>>3] |= (1 << (pos & 7))
-}
-
-// func (x *bitset32) unset(pos byte) {
-// 	x[pos>>3] &^= (1 << (pos & 7))
-// }
-
 // type bit2set256 [64]byte
 
 // func (x *bit2set256) set(pos byte, v1, v2 bool) {

+ 87 - 56
codec/json.go

@@ -50,6 +50,12 @@ const (
 	jsonLitNull = 14
 )
 
+var (
+	jsonLiteral4True  = jsonLiterals[jsonLitTrue+1 : jsonLitTrue+4]
+	jsonLiteral4False = jsonLiterals[jsonLitFalse+1 : jsonLitFalse+5]
+	jsonLiteral4Null  = jsonLiterals[jsonLitNull+1 : jsonLitNull+4]
+)
+
 const (
 	jsonU4Chk2 = '0'
 	jsonU4Chk1 = 'a' - 10
@@ -76,14 +82,15 @@ var (
 	// jsonTabs and jsonSpaces are used as caches for indents
 	jsonTabs, jsonSpaces [jsonSpacesOrTabsLen]byte
 
-	jsonCharHtmlSafeSet   bitset128
-	jsonCharSafeSet       bitset128
+	jsonCharHtmlSafeSet   bitset256
+	jsonCharSafeSet       bitset256
 	jsonCharWhitespaceSet bitset256
 	jsonNumSet            bitset256
 )
 
 func init() {
-	for i := 0; i < jsonSpacesOrTabsLen; i++ {
+	var i byte
+	for i = 0; i < jsonSpacesOrTabsLen; i++ {
 		jsonSpaces[i] = ' '
 		jsonTabs[i] = '\t'
 	}
@@ -91,7 +98,6 @@ func init() {
 	// populate the safe values as true: note: ASCII control characters are (0-31)
 	// jsonCharSafeSet:     all true except (0-31) " \
 	// jsonCharHtmlSafeSet: all true except (0-31) " \ < > &
-	var i byte
 	for i = 32; i < utf8.RuneSelf; i++ {
 		switch i {
 		case '"', '\\':
@@ -478,15 +484,15 @@ func (e *jsonEncDriver) EncodeStringBytes(c charEncoding, v []byte) {
 			return
 		}
 
-		slen := base64.StdEncoding.EncodedLen(len(v))
-		if cap(e.bs) >= slen+2 {
-			e.bs = e.bs[:slen+2]
+		slen := base64.StdEncoding.EncodedLen(len(v)) + 2
+		if cap(e.bs) >= slen {
+			e.bs = e.bs[:slen]
 		} else {
-			e.bs = make([]byte, slen+2)
+			e.bs = make([]byte, slen)
 		}
 		e.bs[0] = '"'
 		base64.StdEncoding.Encode(e.bs[1:], v)
-		e.bs[slen+1] = '"'
+		e.bs[slen-1] = '"'
 		e.ew.writeb(e.bs)
 	} else {
 		e.quoteStr(stringView(v))
@@ -504,15 +510,15 @@ func (e *jsonEncDriver) EncodeStringBytesRaw(v []byte) {
 		return
 	}
 
-	slen := base64.StdEncoding.EncodedLen(len(v))
-	if cap(e.bs) >= slen+2 {
-		e.bs = e.bs[:slen+2]
+	slen := base64.StdEncoding.EncodedLen(len(v)) + 2
+	if cap(e.bs) >= slen {
+		e.bs = e.bs[:slen]
 	} else {
-		e.bs = make([]byte, slen+2)
+		e.bs = make([]byte, slen)
 	}
 	e.bs[0] = '"'
 	base64.StdEncoding.Encode(e.bs[1:], v)
-	e.bs[slen+1] = '"'
+	e.bs[slen-1] = '"'
 	e.ew.writeb(e.bs)
 }
 
@@ -733,12 +739,36 @@ func (d *jsonDecDriver) ReadMapEnd() {
 	d.c = containerMapEnd
 }
 
-func (d *jsonDecDriver) readLit(length, fromIdx uint8) {
-	// length here is always less than 8 (literals are: null, true, false)
-	bs := d.r.readx(int(length))
+// func (d *jsonDecDriver) readLit(length, fromIdx uint8) {
+// 	// length here is always less than 8 (literals are: null, true, false)
+// 	bs := d.r.readx(int(length))
+// 	d.tok = 0
+// 	if jsonValidateSymbols && !bytes.Equal(bs, jsonLiterals[fromIdx:fromIdx+length]) {
+// 		d.d.errorf("expecting %s: got %s", jsonLiterals[fromIdx:fromIdx+length], bs)
+// 	}
+// }
+
+func (d *jsonDecDriver) readLit4True() {
+	bs := d.r.readx(3)
 	d.tok = 0
-	if jsonValidateSymbols && !bytes.Equal(bs, jsonLiterals[fromIdx:fromIdx+length]) {
-		d.d.errorf("expecting %s: got %s", jsonLiterals[fromIdx:fromIdx+length], bs)
+	if jsonValidateSymbols && !bytes.Equal(bs, jsonLiteral4True) {
+		d.d.errorf("expecting %s: got %s", jsonLiteral4True, bs)
+	}
+}
+
+func (d *jsonDecDriver) readLit4False() {
+	bs := d.r.readx(4)
+	d.tok = 0
+	if jsonValidateSymbols && !bytes.Equal(bs, jsonLiteral4False) {
+		d.d.errorf("expecting %s: got %s", jsonLiteral4False, bs)
+	}
+}
+
+func (d *jsonDecDriver) readLit4Null() {
+	bs := d.r.readx(3)
+	d.tok = 0
+	if jsonValidateSymbols && !bytes.Equal(bs, jsonLiteral4Null) {
+		d.d.errorf("expecting %s: got %s", jsonLiteral4Null, bs)
 	}
 }
 
@@ -749,7 +779,7 @@ func (d *jsonDecDriver) TryDecodeAsNil() bool {
 	// we shouldn't try to see if "null" was here, right?
 	// only the plain string: `null` denotes a nil (ie not quotes)
 	if d.tok == 'n' {
-		d.readLit(3, jsonLitNull+1) // (n)ull
+		d.readLit4Null()
 		return true
 	}
 	return false
@@ -765,10 +795,10 @@ func (d *jsonDecDriver) DecodeBool() (v bool) {
 	}
 	switch d.tok {
 	case 'f':
-		d.readLit(4, jsonLitFalse+1) // (f)alse
+		d.readLit4False()
 		// v = false
 	case 't':
-		d.readLit(3, jsonLitTrue+1) // (t)rue
+		d.readLit4True()
 		v = true
 	default:
 		d.d.errorf("decode bool: got first char %c", d.tok)
@@ -991,15 +1021,15 @@ func (d *jsonDecDriver) appendStringAsBytes() {
 		// handle non-string scalar: null, true, false or a number
 		switch d.tok {
 		case 'n':
-			d.readLit(3, jsonLitNull+1) // (n)ull
+			d.readLit4Null()
 			d.bs = d.bs[:0]
 			d.fnull = true
 		case 'f':
-			d.readLit(4, jsonLitFalse+1) // (f)alse
+			d.readLit4False()
 			d.bs = d.bs[:5]
 			copy(d.bs, "false")
 		case 't':
-			d.readLit(3, jsonLitTrue+1) // (t)rue
+			d.readLit4True()
 			d.bs = d.bs[:4]
 			copy(d.bs, "true")
 		default:
@@ -1018,7 +1048,7 @@ func (d *jsonDecDriver) appendStringAsBytes() {
 	d.tok = 0
 	r := d.r
 	var cs = r.readUntil(d.b2[:0], '"')
-	var cslen = len(cs)
+	var cslen = uint(len(cs))
 	var c uint8
 	v := d.bs[:0]
 	// append on each byte seen can be expensive, so we just
@@ -1027,11 +1057,12 @@ func (d *jsonDecDriver) appendStringAsBytes() {
 	// and when we see a special byte
 	// e.g. end-of-slice, " or \,
 	// we will append the full range into the v slice before proceeding
-	for i, cursor := 0, 0; ; {
+	var i, cursor uint
+	for {
 		if i == cslen {
 			v = append(v, cs[cursor:]...)
 			cs = r.readUntil(d.b2[:0], '"')
-			cslen = len(cs)
+			cslen = uint(len(cs))
 			i, cursor = 0, 0
 		}
 		c = cs[i]
@@ -1062,14 +1093,13 @@ func (d *jsonDecDriver) appendStringAsBytes() {
 		case 'u':
 			var r rune
 			var rr uint32
-			if len(cs) < i+4 { // may help reduce bounds-checking
+			if cslen < i+4 {
 				d.d.errorf("need at least 4 more bytes for unicode sequence")
 			}
-			// c = cs[i+4] // may help reduce bounds-checking
-			for j := 1; j < 5; j++ {
+			var j uint
+			for _, c = range cs[i+1 : i+5] { // bounds-check-elimination
 				// best to use explicit if-else
 				// - not a table, etc which involve memory loads, array lookup with bounds checks, etc
-				c = cs[i+j]
 				if c >= '0' && c <= '9' {
 					rr = rr*16 + uint32(c-jsonU4Chk2)
 				} else if c >= 'a' && c <= 'f' {
@@ -1085,30 +1115,31 @@ func (d *jsonDecDriver) appendStringAsBytes() {
 			r = rune(rr)
 			i += 4
 			if utf16.IsSurrogate(r) {
-				if len(cs) >= i+6 && cs[i+2] == 'u' && cs[i+1] == '\\' {
-					i += 2
-					// c = cs[i+4] // may help reduce bounds-checking
-					var rr1 uint32
-					for j := 1; j < 5; j++ {
-						c = cs[i+j]
-						if c >= '0' && c <= '9' {
-							rr = rr*16 + uint32(c-jsonU4Chk2)
-						} else if c >= 'a' && c <= 'f' {
-							rr = rr*16 + uint32(c-jsonU4Chk1)
-						} else if c >= 'A' && c <= 'F' {
-							rr = rr*16 + uint32(c-jsonU4Chk0)
-						} else {
-							r = unicode.ReplacementChar
-							i += 4
-							goto encode_rune
+				if len(cs) >= int(i+6) {
+					var cx = cs[i+1 : i+7 : i+7][:6] // [:6] affords bounds-check-elimination
+					if cx[0] == '\\' && cx[1] == 'u' {
+						i += 2
+						var rr1 uint32
+						for j = 2; j < 6; j++ {
+							c = cx[j]
+							if c >= '0' && c <= '9' {
+								rr = rr*16 + uint32(c-jsonU4Chk2)
+							} else if c >= 'a' && c <= 'f' {
+								rr = rr*16 + uint32(c-jsonU4Chk1)
+							} else if c >= 'A' && c <= 'F' {
+								rr = rr*16 + uint32(c-jsonU4Chk0)
+							} else {
+								r = unicode.ReplacementChar
+								i += 4
+								goto encode_rune
+							}
 						}
+						r = utf16.DecodeRune(r, rune(rr1))
+						i += 4
+						goto encode_rune
 					}
-					r = utf16.DecodeRune(r, rune(rr1))
-					i += 4
-				} else {
-					r = unicode.ReplacementChar
-					goto encode_rune
 				}
+				r = unicode.ReplacementChar
 			}
 		encode_rune:
 			w2 := utf8.EncodeRune(d.bstr[:], r)
@@ -1188,14 +1219,14 @@ func (d *jsonDecDriver) DecodeNaked() {
 	}
 	switch d.tok {
 	case 'n':
-		d.readLit(3, jsonLitNull+1) // (n)ull
+		d.readLit4Null()
 		z.v = valueTypeNil
 	case 'f':
-		d.readLit(4, jsonLitFalse+1) // (f)alse
+		d.readLit4False()
 		z.v = valueTypeBool
 		z.b = false
 	case 't':
-		d.readLit(3, jsonLitTrue+1) // (t)rue
+		d.readLit4True()
 		z.v = valueTypeBool
 		z.b = true
 	case '{':

+ 3 - 3
codec/mammoth2_codecgen_generated_test.go

@@ -40557,7 +40557,7 @@ func (x codecSelfer19781) dectestMammoth2Basic(v *testMammoth2Basic, d *Decoder)
 
 		var yyj1 int
 		// var yydn1 bool
-		for ; (yyhl1 && yyj1 < yyl1) || !(yyhl1 || r.CheckBreak()); yyj1++ {
+		for yyj1 = 0; (yyhl1 && yyj1 < yyl1) || !(yyhl1 || r.CheckBreak()); yyj1++ { // bounds-check-elimination
 
 			yyh1.ElemContainerState(yyj1)
 
@@ -40707,7 +40707,7 @@ func (x codecSelfer19781) decSliceTestMammoth2(v *[]TestMammoth2, d *Decoder) {
 		}
 		var yyj1 int
 		// var yydn1 bool
-		for ; (yyhl1 && yyj1 < yyl1) || !(yyhl1 || r.CheckBreak()); yyj1++ {
+		for yyj1 = 0; (yyhl1 && yyj1 < yyl1) || !(yyhl1 || r.CheckBreak()); yyj1++ { // bounds-check-elimination
 			if yyj1 == 0 && yyv1 == nil {
 				if yyhl1 {
 					yyrl1 = z.DecInferLen(yyl1, z.DecBasicHandle().MaxInitLen, 4880)
@@ -40782,7 +40782,7 @@ func (x codecSelfer19781) decArray4int64(v *[4]int64, d *Decoder) {
 
 		var yyj1 int
 		// var yydn1 bool
-		for ; (yyhl1 && yyj1 < yyl1) || !(yyhl1 || r.CheckBreak()); yyj1++ {
+		for yyj1 = 0; (yyhl1 && yyj1 < yyl1) || !(yyhl1 || r.CheckBreak()); yyj1++ { // bounds-check-elimination
 
 			yyh1.ElemContainerState(yyj1)