Explorar o código

codec: optimizations for json (hex strings, batch update) and loops (hoist len where possible)

json: optimizations for hex strings and batch append
- use a table to lookup jsonU4 rep instead of computation
- when reading string bytes, try to append characters in batch
  up until a special character is seen (end-of-slice, ", \)
- inline evaluation of hex strings \uXXXX[\uXXXX] (eliminating function call overhead)

general: hoist len() when used inside loop if slice/map is a func parameter
- per go runtime: len() is only cached if a local slice/map.
  Else, it is computed on request, assuming that a different goroutine
  might have updated the slice.
  Consequently, we cache len ourselves when we know runtime cannot safely cache it.

test: add testNumRepeatString parameter, to allow us make bigger strings
- use testNumRepeatString to make strings in test larger
- include more '"' and \uXXXX characters to truly represent json strings
Ugorji Nwoke %!s(int64=8) %!d(string=hai) anos
pai
achega
f76f59a92f
Modificáronse 8 ficheiros con 159 adicións e 78 borrados
  1. 11 8
      codec/decode.go
  2. 8 6
      codec/encode.go
  3. 3 2
      codec/helper.go
  4. 90 36
      codec/json.go
  5. 8 6
      codec/msgpack.go
  6. 3 0
      codec/shared_test.go
  7. 31 20
      codec/values_test.go
  8. 5 0
      codec/z_all_test.go

+ 11 - 8
codec/decode.go

@@ -547,7 +547,8 @@ func (z *bytesDecReader) skip(accept *bitset256) (token byte) {
 	if z.a == 0 {
 		return
 	}
-	for i := z.c; i < len(z.b); i++ {
+	blen := len(z.b)
+	for i := z.c; i < blen; i++ {
 		if accept.isset(z.b[i]) {
 			continue
 		}
@@ -557,15 +558,16 @@ func (z *bytesDecReader) skip(accept *bitset256) (token byte) {
 		z.c = i
 		return
 	}
-	z.a, z.c = 0, len(z.b)
+	z.a, z.c = 0, blen
 	return
 }
 
-func (z *bytesDecReader) readTo(in []byte, accept *bitset256) (out []byte) {
+func (z *bytesDecReader) readTo(_ []byte, accept *bitset256) (out []byte) {
 	if z.a == 0 {
 		return
 	}
-	for i := z.c; i < len(z.b); i++ {
+	blen := len(z.b)
+	for i := z.c; i < blen; i++ {
 		if !accept.isset(z.b[i]) {
 			out = z.b[z.c:i]
 			z.a -= (i - z.c)
@@ -574,15 +576,16 @@ func (z *bytesDecReader) readTo(in []byte, accept *bitset256) (out []byte) {
 		}
 	}
 	out = z.b[z.c:]
-	z.a, z.c = 0, len(z.b)
+	z.a, z.c = 0, blen
 	return
 }
 
-func (z *bytesDecReader) readUntil(in []byte, stop byte) (out []byte) {
+func (z *bytesDecReader) readUntil(_ []byte, stop byte) (out []byte) {
 	if z.a == 0 {
 		panic(io.EOF)
 	}
-	for i := z.c; i < len(z.b); i++ {
+	blen := len(z.b)
+	for i := z.c; i < blen; i++ {
 		if z.b[i] == stop {
 			i++
 			out = z.b[z.c:i]
@@ -591,7 +594,7 @@ func (z *bytesDecReader) readUntil(in []byte, stop byte) (out []byte) {
 			return
 		}
 	}
-	z.a, z.c = 0, len(z.b)
+	z.a, z.c = 0, blen
 	panic(io.EOF)
 }
 

+ 8 - 6
codec/encode.go

@@ -257,23 +257,25 @@ type bytesEncWriter struct {
 }
 
 func (z *bytesEncWriter) writeb(s []byte) {
-	if len(s) == 0 {
+	slen := len(s)
+	if slen == 0 {
 		return
 	}
-	oc, a := z.growNoAlloc(len(s))
+	oc, a := z.growNoAlloc(slen)
 	if a {
-		z.growAlloc(len(s), oc)
+		z.growAlloc(slen, oc)
 	}
 	copy(z.b[oc:], s)
 }
 
 func (z *bytesEncWriter) writestr(s string) {
-	if len(s) == 0 {
+	slen := len(s)
+	if slen == 0 {
 		return
 	}
-	oc, a := z.growNoAlloc(len(s))
+	oc, a := z.growNoAlloc(slen)
 	if a {
-		z.growAlloc(len(s), oc)
+		z.growAlloc(slen, oc)
 	}
 	copy(z.b[oc:], s)
 }

+ 3 - 2
codec/helper.go

@@ -791,6 +791,7 @@ func (si *structFieldInfo) field(v reflect.Value, update bool) (rv2 reflect.Valu
 		}
 		v = v.Field(int(x))
 	}
+
 	return v, true
 }
 
@@ -1180,8 +1181,8 @@ func (x *TypeInfos) rget(rt reflect.Type, rtid uintptr, omitEmpty bool,
 	//       Typically, types have < 16 fields,
 	//       and iteration using equals is faster than maps there
 	flen := rt.NumField()
-	if flen > (1<<16 - 1) {
-		panic(fmt.Errorf("codec: types with more than %v fields are not supported - has %v fields", (1<<16 - 1), flen))
+	if flen > (1<<maxLevelsEmbedding - 1) {
+		panic(fmt.Errorf("codec: types with more than %v fields are not supported - has %v fields", (1<<maxLevelsEmbedding - 1), flen))
 	}
 LOOP:
 	for j, jlen := uint16(0), uint16(flen); j < jlen; j++ {

+ 90 - 36
codec/json.go

@@ -63,6 +63,8 @@ var (
 	jsonCharSafeSet       bitset128
 	jsonCharWhitespaceSet bitset256
 	jsonNumSet            bitset256
+
+	jsonU4Set [256]byte
 )
 
 const (
@@ -83,6 +85,8 @@ const (
 	jsonValidateSymbols = true
 
 	jsonSpacesOrTabsLen = 128
+
+	jsonU4SetErrVal = 128
 )
 
 func init() {
@@ -119,6 +123,19 @@ func init() {
 			jsonNumSet.set(i)
 		}
 	}
+	for j := range jsonU4Set {
+		switch i = byte(j); i {
+		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			jsonU4Set[i] = i - '0'
+		case 'a', 'b', 'c', 'd', 'e', 'f':
+			jsonU4Set[i] = i - 'a' + 10
+		case 'A', 'B', 'C', 'D', 'E', 'F':
+			jsonU4Set[i] = i - 'A' + 10
+		default:
+			jsonU4Set[i] = jsonU4SetErrVal
+		}
+	}
+	// jsonU4Set[255] = jsonU4SetErrVal
 }
 
 type jsonEncDriver struct {
@@ -319,8 +336,8 @@ func (e *jsonEncDriver) quoteStr(s string) {
 	const hex = "0123456789abcdef"
 	w := e.w
 	w.writen1('"')
-	start := 0
-	for i := 0; i < len(s); {
+	var start int
+	for i, slen := 0, len(s); i < slen; {
 		// encode all bytes < 0x20 (except \r, \n).
 		// also encode < > & to prevent security holes when served to some browsers.
 		if b := s[i]; b < utf8.RuneSelf {
@@ -701,23 +718,33 @@ func (d *jsonDecDriver) appendStringAsBytes() {
 
 	d.tok = 0
 	r := d.r
-	var cs []byte
-	v := d.bs[:0]
+	var cs = r.readUntil(d.b2[:0], '"')
+	var cslen = len(cs)
 	var c uint8
-	for i := 0; ; i++ {
-		if i == len(cs) {
+	v := d.bs[:0]
+	// append on each byte seen can be expensive, so we just
+	// keep track of where we last read a contiguous set of
+	// non-special bytes (using cursor variable),
+	// and when we see a special byte
+	// e.g. end-of-slice, " or \,
+	// we will append the full range into the v slice before proceeding
+	for i, cursor := 0, 0; ; {
+		if i == cslen {
+			v = append(v, cs[cursor:]...)
 			cs = r.readUntil(d.b2[:0], '"')
-			i = 0
+			cslen = len(cs)
+			i, cursor = 0, 0
 		}
 		c = cs[i]
 		if c == '"' {
+			v = append(v, cs[cursor:i]...)
 			break
 		}
 		if c != '\\' {
-			v = append(v, c)
+			i++
 			continue
 		}
-		// cs[i] == '\\'
+		v = append(v, cs[cursor:i]...)
 		i++
 		c = cs[i]
 		switch c {
@@ -734,44 +761,65 @@ func (d *jsonDecDriver) appendStringAsBytes() {
 		case 't':
 			v = append(v, '\t')
 		case 'u':
-			rr := d.jsonU4Arr([4]byte{cs[i+1], cs[i+2], cs[i+3], cs[i+4]})
+			var r rune
+			var rr uint32
+			c = cs[i+4] // may help reduce bounds-checking
+			for j := 1; j < 5; j++ {
+				c = jsonU4Set[cs[i+j]]
+				if c == jsonU4SetErrVal {
+					d.d.errorf(`json: unquoteStr: invalid hex char in \u unicode sequence: %q`, c)
+				}
+				rr = rr*16 + uint32(c)
+			}
+			r = rune(rr)
 			i += 4
-			if utf16.IsSurrogate(rr) {
-				if !(cs[i+1] == '\\' && cs[i+2] == 'u') {
+			if utf16.IsSurrogate(r) {
+				if !(cs[i+2] == 'u' && cs[i+i] == '\\') {
 					d.d.errorf(`json: unquoteStr: invalid unicode sequence. Expecting \u`)
 					return
 				}
 				i += 2
-				rr = utf16.DecodeRune(rr, d.jsonU4Arr([4]byte{cs[i+1], cs[i+2], cs[i+3], cs[i+4]}))
+				c = cs[i+4] // may help reduce bounds-checking
+				var rr1 uint32
+				for j := 1; j < 5; j++ {
+					c = jsonU4Set[cs[i+j]]
+					if c == jsonU4SetErrVal {
+						d.d.errorf(`json: unquoteStr: invalid hex char in \u unicode sequence: %q`, c)
+					}
+					rr1 = rr1*16 + uint32(c)
+				}
+				r = utf16.DecodeRune(r, rune(rr1))
 				i += 4
 			}
-			w2 := utf8.EncodeRune(d.bstr[:], rr)
+			w2 := utf8.EncodeRune(d.bstr[:], r)
 			v = append(v, d.bstr[:w2]...)
 		default:
 			d.d.errorf("json: unsupported escaped value: %c", c)
 		}
+		i++
+		cursor = i
 	}
 	d.bs = v
 }
 
-func (d *jsonDecDriver) jsonU4Arr(bs [4]byte) (r rune) {
-	// u, _ := strconv.ParseUint(string(d.bstr[:4]), 16, 64)
-	var u uint32
-	for _, v := range bs {
-		if '0' <= v && v <= '9' {
-			v = v - '0'
-		} else if 'a' <= v && v <= 'z' {
-			v = v - 'a' + 10
-		} else if 'A' <= v && v <= 'Z' {
-			v = v - 'A' + 10
-		} else {
-			d.d.errorf(`json: unquoteStr: invalid hex char in \u unicode sequence: %q`, v)
-			return 0
-		}
-		u = u*16 + uint32(v)
-	}
-	return rune(u)
-}
+// func (d *jsonDecDriver) jsonU4Arr(bs [4]byte) (r rune) {
+// 	// u, _ := strconv.ParseUint(string(d.bstr[:4]), 16, 64)
+// 	var u uint32
+// 	for _, v := range bs {
+// 		if '0' <= v && v <= '9' {
+// 			v = v - '0'
+// 		} else if 'a' <= v && v <= 'f' {
+// 			v = v - 'a' + 10
+// 		} else if 'A' <= v && v <= 'f' {
+// 			v = v - 'A' + 10
+// 		} else {
+// 			// d.d.errorf(`json: unquoteStr: invalid hex char in \u unicode sequence: %q`, v)
+// 			return utf8.RuneError
+// 		}
+// 		u = u*16 + uint32(v)
+// 	}
+// 	return rune(u)
+// }
 
 func (d *jsonDecDriver) DecodeNaked() {
 	z := d.d.n
@@ -805,10 +853,7 @@ func (d *jsonDecDriver) DecodeNaked() {
 		if len(bs) == 0 {
 			d.d.errorf("json: decode number from empty string")
 			return
-		} else if d.h.PreferFloat ||
-			bytes.IndexByte(bs, '.') != -1 ||
-			bytes.IndexByte(bs, 'e') != -1 ||
-			bytes.IndexByte(bs, 'E') != -1 {
+		} else if d.h.PreferFloat || jsonIsFloatBytes(bs) { // bytes.IndexByte(bs, '.') != -1 ||...
 			// } else if d.h.PreferFloat || bytes.ContainsAny(bs, ".eE") {
 			z.v = valueTypeFloat
 			z.f, err = strconv.ParseFloat(stringView(bs), 64)
@@ -943,6 +988,15 @@ func (d *jsonDecDriver) reset() {
 	// d.n.reset()
 }
 
+func jsonIsFloatBytes(bs []byte) bool {
+	for _, v := range bs {
+		if v == '.' || v == 'e' || v == 'E' {
+			return true
+		}
+	}
+	return false
+}
+
 var jsonEncodeTerminate = []byte{' '}
 
 func (h *JsonHandle) rpcEncodeTerminate() []byte {

+ 8 - 6
codec/msgpack.go

@@ -222,12 +222,13 @@ func (e *msgpackEncDriver) EncodeMapStart(length int) {
 }
 
 func (e *msgpackEncDriver) EncodeString(c charEncoding, s string) {
+	slen := len(s)
 	if c == c_RAW && e.h.WriteExt {
-		e.writeContainerLen(msgpackContainerBin, len(s))
+		e.writeContainerLen(msgpackContainerBin, slen)
 	} else {
-		e.writeContainerLen(msgpackContainerStr, len(s))
+		e.writeContainerLen(msgpackContainerStr, slen)
 	}
-	if len(s) > 0 {
+	if slen > 0 {
 		e.w.writestr(s)
 	}
 }
@@ -237,12 +238,13 @@ func (e *msgpackEncDriver) EncodeSymbol(v string) {
 }
 
 func (e *msgpackEncDriver) EncodeStringBytes(c charEncoding, bs []byte) {
+	slen := len(bs)
 	if c == c_RAW && e.h.WriteExt {
-		e.writeContainerLen(msgpackContainerBin, len(bs))
+		e.writeContainerLen(msgpackContainerBin, slen)
 	} else {
-		e.writeContainerLen(msgpackContainerStr, len(bs))
+		e.writeContainerLen(msgpackContainerStr, slen)
 	}
-	if len(bs) > 0 {
+	if slen > 0 {
 		e.w.writeb(bs)
 	}
 }

+ 3 - 0
codec/shared_test.go

@@ -111,6 +111,8 @@ var (
 
 	testJsonHTMLCharsAsIs bool
 	testJsonPreferFloat   bool
+
+	testNumRepeatString int
 )
 
 // variables that are not flags, but which can configure the handles
@@ -155,6 +157,7 @@ func testInitFlags() {
 	flag.BoolVar(&testSkipIntf, "tf", false, "Skip Interfaces")
 	flag.BoolVar(&testUseReset, "tr", false, "Use Reset")
 	flag.IntVar(&testJsonIndent, "td", 0, "Use JSON Indent")
+	flag.IntVar(&testNumRepeatString, "trs", 10, "Create string variables by repeating a string N times")
 	flag.IntVar(&testMaxInitLen, "tx", 0, "Max Init Len")
 	flag.BoolVar(&testUseMust, "tm", true, "Use Must(En|De)code")
 	flag.BoolVar(&testCheckCircRef, "tl", false, "Use Check Circular Ref")

+ 31 - 20
codec/values_test.go

@@ -11,6 +11,7 @@ package codec
 
 import (
 	"math"
+	"strings"
 	"time"
 )
 
@@ -210,17 +211,21 @@ func populateTestStrucCommon(ts *testStrucCommon, bench, useInterface, useString
 
 	var a = AnonInTestStruc{
 		// There's more leeway in altering this.
-		AS:    "A-String",
+		AS:    strRpt("A-String"),
 		AI64:  -64646464,
 		AI16:  1616,
 		AUi64: 64646464,
 		// (U+1D11E)G-clef character may be represented in json as "\uD834\uDD1E".
 		// single reverse solidus character may be represented in json as "\u005C".
 		// include these in ASslice below.
-		ASslice: []string{"Aone", "Atwo", "Athree",
-			"Afour.reverse_solidus.\u005c", "Afive.Gclef.\U0001d11E"},
+		ASslice: []string{
+			strRpt("Aone"),
+			strRpt("Atwo"),
+			strRpt("Athree"),
+			strRpt("Afour.reverse_solidus.\u005c"),
+			strRpt("Afive.Gclef.\U0001d11E\"ugorji\"done.")},
 		AI64slice: []int64{1, -22, 333, -4444, 55555, -666666},
-		AMSU16:    map[string]uint16{"1": 1, "22": 2, "333": 3, "4444": 4},
+		AMSU16:    map[string]uint16{strRpt("1"): 1, strRpt("22"): 2, strRpt("333"): 3, strRpt("4444"): 4},
 		AF64slice: []float64{
 			11.11e-11, -11.11e+11,
 			2.222E+12, -2.222E-12,
@@ -236,7 +241,7 @@ func populateTestStrucCommon(ts *testStrucCommon, bench, useInterface, useString
 	}
 
 	*ts = testStrucCommon{
-		S: "some string",
+		S: strRpt(`some really really cool names that are nigerian and american like "ugorji melody nwoke" - get it? `),
 
 		// set the numbers close to the limits
 		I8:   math.MaxInt8 * 2 / 3,  // 8,
@@ -259,7 +264,7 @@ func populateTestStrucCommon(ts *testStrucCommon, bench, useInterface, useString
 		B:  true,
 		By: 5,
 
-		Sslice:    []string{"one", "two", "three"},
+		Sslice:    []string{strRpt("one"), strRpt("two"), strRpt("three")},
 		I64slice:  []int64{1111, 2222, 3333},
 		I16slice:  []int16{44, 55, 66},
 		Ui64slice: []uint64{12121212, 34343434, 56565656},
@@ -268,14 +273,15 @@ func populateTestStrucCommon(ts *testStrucCommon, bench, useInterface, useString
 		Byslice:   []byte{13, 14, 15},
 
 		Msi64: map[string]int64{
-			"one": 1,
-			"two": 2,
+			strRpt("one"):       1,
+			strRpt("two"):       2,
+			strRpt("\"three\""): 3,
 		},
 
 		Ui64array: [4]uint64{4, 16, 64, 256},
 
 		WrapSliceInt64:  []uint64{4, 16, 64, 256},
-		WrapSliceString: []string{"4", "16", "64", "256"},
+		WrapSliceString: []string{strRpt("4"), strRpt("16"), strRpt("64"), strRpt("256")},
 
 		// DecodeNaked bombs here, because the stringUint64T is decoded as a map,
 		// and a map cannot be the key type of a map.
@@ -287,7 +293,7 @@ func populateTestStrucCommon(ts *testStrucCommon, bench, useInterface, useString
 
 		// make Simplef same as top-level
 		Simplef: testSimpleFields{
-			S: "some string",
+			S: strRpt(`some really really cool names that are nigerian and american like "ugorji melody nwoke" - get it? `),
 
 			// set the numbers close to the limits
 			I8:   math.MaxInt8 * 2 / 3,  // 8,
@@ -310,7 +316,7 @@ func populateTestStrucCommon(ts *testStrucCommon, bench, useInterface, useString
 			B:  true,
 			By: 5,
 
-			Sslice:    []string{"one", "two", "three"},
+			Sslice:    []string{strRpt("one"), strRpt("two"), strRpt("three")},
 			I64slice:  []int64{1111, 2222, 3333},
 			I16slice:  []int16{44, 55, 66},
 			Ui64slice: []uint64{12121212, 34343434, 56565656},
@@ -319,14 +325,15 @@ func populateTestStrucCommon(ts *testStrucCommon, bench, useInterface, useString
 			Byslice:   []byte{13, 14, 15},
 
 			Msi64: map[string]int64{
-				"one": 1,
-				"two": 2,
+				strRpt("one"):       1,
+				strRpt("two"):       2,
+				strRpt("\"three\""): 3,
 			},
 
 			Ui64array: [4]uint64{4, 16, 64, 256},
 
 			WrapSliceInt64:  []uint64{4, 16, 64, 256},
-			WrapSliceString: []string{"4", "16", "64", "256"},
+			WrapSliceString: []string{strRpt("4"), strRpt("16"), strRpt("64"), strRpt("256")},
 		},
 
 		AnonInTestStruc: a,
@@ -337,10 +344,10 @@ func populateTestStrucCommon(ts *testStrucCommon, bench, useInterface, useString
 
 	if useInterface {
 		ts.AnonInTestStrucIntf = &AnonInTestStrucIntf{
-			Islice: []interface{}{"true", true, "no", false, uint64(288), float64(0.4)},
+			Islice: []interface{}{strRpt("true"), true, strRpt("no"), false, uint64(288), float64(0.4)},
 			Ms: map[string]interface{}{
-				"true":     "true",
-				"int64(9)": false,
+				strRpt("true"):     strRpt("true"),
+				strRpt("int64(9)"): false,
 			},
 			T: testStrucTime,
 		}
@@ -373,9 +380,13 @@ func newTestStruc(depth int, bench, useInterface, useStringKeyOnly bool) (ts *Te
 		if ts.Mts == nil {
 			ts.Mts = make(map[string]TestStruc)
 		}
-		ts.Mtsptr["0"] = newTestStruc(depth, bench, useInterface, useStringKeyOnly)
-		ts.Mts["0"] = *(ts.Mtsptr["0"])
-		ts.Its = append(ts.Its, ts.Mtsptr["0"])
+		ts.Mtsptr[strRpt("0")] = newTestStruc(depth, bench, useInterface, useStringKeyOnly)
+		ts.Mts[strRpt("0")] = *(ts.Mtsptr[strRpt("0")])
+		ts.Its = append(ts.Its, ts.Mtsptr[strRpt("0")])
 	}
 	return
 }
+
+func strRpt(s string) string {
+	return strings.Repeat(s, testNumRepeatString)
+}

+ 5 - 0
codec/z_all_test.go

@@ -51,6 +51,7 @@ func testSuite(t *testing.T, f func(t *testing.T)) {
 	testMaxInitLen = 0
 	testJsonIndent = 0
 	testUseIoWrapper = false
+	testNumRepeatString = 10
 	testReinit()
 	t.Run("optionsFalse", f)
 
@@ -86,6 +87,10 @@ func testSuite(t *testing.T, f func(t *testing.T)) {
 	testReinit()
 	t.Run("optionsTrue-deepstruct", f)
 
+	testNumRepeatString = 40
+	testReinit()
+	t.Run("optionsTrue-largestrings", f)
+
 	// The following here MUST be tested individually, as they create
 	// side effects i.e. the decoded value is different.
 	// testDecodeOptions.MapValueReset = true // ok - no side effects