Browse Source

codec: handle []byte encoded as array (as opposed to native mode per format)

Generally, formats (e.g. cbor, msgpack, etc) have a defined way of encoding
binary data ([]byte). However they also support encoding any array of values.

It is possible to encode []int8 of positive numbers, and then decode that
into []uint8. In this mode, the decoded stream doesn't look like the native
bytes in the stream, but just like a sequence of small positive numbers.

We supported this before in a non-performant way. Now, we support it better
by having the driver fully read a sequence of numbers.

While there, do not create special methods for []uint8 or []uintptr, or
maps with uintptr keys or values. uintptr is uncommon in use, so was cut
off to manage binary size.

Finally, expand the values in generated tests so we exercise more float types.
Ugorji Nwoke 6 years ago
parent
commit
e05b287349

+ 10 - 2
codec/binc.go

@@ -782,8 +782,16 @@ func (d *bincDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 	}
 	// check if an "array" of uint8's (see ContainerType for how to infer if an array)
 	if d.vd == bincVdArray {
-		bsOut, _ = fastpathTV.DecSliceUint8V(bs, true, d.d)
-		return
+		if zerocopy && len(bs) == 0 {
+			bs = d.d.b[:]
+		}
+		// bsOut, _ = fastpathTV.DecSliceUint8V(bs, true, d.d)
+		slen := d.ReadArrayStart()
+		bs = usableByteSlice(bs, slen)
+		for i := 0; i < slen; i++ {
+			bs[i] = uint8(chkOvf.UintV(d.DecodeUint64(), 8))
+		}
+		return bs
 	}
 	var clen int
 	if d.vd == bincVdString || d.vd == bincVdByteArray {

+ 36 - 11
codec/cbor.go

@@ -340,15 +340,15 @@ func (d *cborDecDriver) TryDecodeAsNil() bool {
 	return false
 }
 
-func (d *cborDecDriver) CheckBreak() bool {
+func (d *cborDecDriver) CheckBreak() (v bool) {
 	if !d.bdRead {
 		d.readNextBd()
 	}
 	if d.bd == cborBdBreak {
 		d.bdRead = false
-		return true
+		v = true
 	}
-	return false
+	return
 }
 
 func (d *cborDecDriver) decUint() (ui uint64) {
@@ -475,11 +475,9 @@ func (d *cborDecDriver) decLen() int {
 
 func (d *cborDecDriver) decAppendIndefiniteBytes(bs []byte) []byte {
 	d.bdRead = false
-	for {
-		if d.CheckBreak() {
-			break
-		}
-		if major := d.bd >> 5; major != cborMajorBytes && major != cborMajorText {
+	for !d.CheckBreak() {
+		major := d.bd >> 5
+		if major != cborMajorBytes && major != cborMajorText {
 			d.d.errorf("expect bytes/string major type in indefinite string/bytes;"+
 				" got major %v from descriptor %x/%x", major, d.bd, cbordesc(d.bd))
 			return nil
@@ -521,9 +519,36 @@ func (d *cborDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 		return d.decAppendIndefiniteBytes(bs[:0])
 	}
 	// check if an "array" of uint8's (see ContainerType for how to infer if an array)
-	if d.bd == cborBdIndefiniteArray || (d.bd >= cborBaseArray && d.bd < cborBaseMap) {
-		bsOut, _ = fastpathTV.DecSliceUint8V(bs, true, d.d)
-		return
+	// if d.bd == cborBdIndefiniteArray || (d.bd >= cborBaseArray && d.bd < cborBaseMap) {
+	// 	bsOut, _ = fastpathTV.DecSliceUint8V(bs, true, d.d)
+	// 	return
+	// }
+	if d.bd == cborBdIndefiniteArray {
+		d.bdRead = false
+		if zerocopy && len(bs) == 0 {
+			bs = d.d.b[:]
+		}
+		if bs == nil {
+			bs = []byte{}
+		} else {
+			bs = bs[:0]
+		}
+		for !d.CheckBreak() {
+			bs = append(bs, uint8(chkOvf.UintV(d.DecodeUint64(), 8)))
+		}
+		return bs
+	}
+	if d.bd >= cborBaseArray && d.bd < cborBaseMap {
+		d.bdRead = false
+		if zerocopy && len(bs) == 0 {
+			bs = d.d.b[:]
+		}
+		slen := d.decLen()
+		bs = usableByteSlice(bs, slen)
+		for i := 0; i < slen; i++ {
+			bs[i] = uint8(chkOvf.UintV(d.DecodeUint64(), 8))
+		}
+		return bs
 	}
 	clen := d.decLen()
 	d.bdRead = false

+ 41 - 3
codec/codec_test.go

@@ -2482,9 +2482,6 @@ func doTestMultipleEncDec(t *testing.T, name string, h Handle) {
 
 func doTestSelfExt(t *testing.T, name string, h Handle) {
 	testOnce.Do(testInitAll)
-	// encode a string multiple times.
-	// decode it multiple times.
-	// ensure we get the value each time
 	var ts TestSelfExtImpl
 	ts.S = "ugorji"
 	ts.I = 5678
@@ -2496,6 +2493,27 @@ func doTestSelfExt(t *testing.T, name string, h Handle) {
 	testDeepEqualErr(&ts, &ts2, t, name)
 }
 
+func doTestBytesEncodedAsArray(t *testing.T, name string, h Handle) {
+	testOnce.Do(testInitAll)
+	// Need to test edge case where bytes are encoded as an array
+	// (not using optimized []byte native format)
+
+	// encode []int8 (or int32 or any numeric type) with all positive numbers
+	// decode it into []uint8
+	var in = make([]int32, 128)
+	var un = make([]uint8, 128)
+	for i := range in {
+		in[i] = int32(i)
+		un[i] = uint8(i)
+	}
+	var out []byte
+	bs := testMarshalErr(&in, h, t, name)
+	testUnmarshalErr(&out, bs, h, t, name)
+	// xdebugf("in:  %v", in)
+	// xdebug2f("out: %v\n", out)
+	testDeepEqualErr(un, out, t, name)
+}
+
 // -----------------
 
 func TestJsonDecodeNonStringScalarInStringContext(t *testing.T) {
@@ -3309,6 +3327,26 @@ func TestSimpleSelfExt(t *testing.T) {
 	doTestSelfExt(t, "simple", testSimpleH)
 }
 
+func TestJsonBytesEncodedAsArray(t *testing.T) {
+	doTestBytesEncodedAsArray(t, "json", testJsonH)
+}
+
+func TestCborBytesEncodedAsArray(t *testing.T) {
+	doTestBytesEncodedAsArray(t, "cbor", testCborH)
+}
+
+func TestMsgpackBytesEncodedAsArray(t *testing.T) {
+	doTestBytesEncodedAsArray(t, "msgpack", testMsgpackH)
+}
+
+func TestBincBytesEncodedAsArray(t *testing.T) {
+	doTestBytesEncodedAsArray(t, "binc", testBincH)
+}
+
+func TestSimpleBytesEncodedAsArray(t *testing.T) {
+	doTestBytesEncodedAsArray(t, "simple", testSimpleH)
+}
+
 func TestMultipleEncDec(t *testing.T) {
 	doTestMultipleEncDec(t, "json", testJsonH)
 }

+ 1 - 0
codec/encode.go

@@ -536,6 +536,7 @@ func (e *Encoder) kSlice(f *codecFnInfo, rv reflect.Value) {
 	}
 	mbs := f.ti.mbs
 	rtelem := f.ti.elem
+
 	// if a slice, array or chan of bytes, treat specially
 	if !mbs && uint8TypId == rt2id(rtelem) { // NOT rtelem.Kind() == reflect.Uint8
 		e.kSliceBytes(rv, f.seq)

File diff suppressed because it is too large
+ 16 - 650
codec/fast-path.generated.go


+ 5 - 5
codec/fast-path.not.go

@@ -34,11 +34,11 @@ type fastpathA [0]fastpathE
 
 func (x fastpathA) index(rtid uintptr) int { return -1 }
 
-func (_ fastpathT) DecSliceUint8V(v []uint8, canChange bool, d *Decoder) (_ []uint8, changed bool) {
-	fn := d.h.fn(uint8SliceTyp, true, true)
-	d.kSlice(&fn.i, reflect.ValueOf(&v).Elem())
-	return v, true
-}
+// func (_ fastpathT) DecSliceUint8V(v []uint8, canChange bool, d *Decoder) (_ []uint8, changed bool) {
+// 	fn := d.h.fn(uint8SliceTyp, true, true)
+// 	d.kSlice(&fn.i, reflect.ValueOf(&v).Elem())
+// 	return v, true
+// }
 
 var fastpathAV fastpathA
 var fastpathTV fastpathT

+ 13 - 13
codec/float.go

@@ -102,12 +102,13 @@ func parseFloatErr(b []byte) error {
 }
 
 func parseFloat32_custom(b []byte) (f float32, err error) {
-	mantissa, exp, neg, _, bad, ok := readFloat(b, fi32)
+	mantissa, exp, neg, trunc, bad, ok := readFloat(b, fi32)
+	_ = trunc
 	if bad {
 		return 0, parseFloatErr(b)
 	}
-	// defer parseFloatDebug(b, 32, &trunc, exp, trunc, ok)
 	if ok {
+		// parseFloatDebug(b, 32, false, exp, trunc, ok)
 		f = float32(mantissa)
 		if neg {
 			f = -f
@@ -130,11 +131,13 @@ func parseFloat32_custom(b []byte) (f float32, err error) {
 		return
 	}
 FALLBACK:
+	// parseFloatDebug(b, 32, true, exp, trunc, ok)
 	return parseFloat32_strconv(b)
 }
 
 func parseFloat64_custom(b []byte) (f float64, err error) {
-	mantissa, exp, neg, _, bad, ok := readFloat(b, fi64)
+	mantissa, exp, neg, trunc, bad, ok := readFloat(b, fi64)
+	_ = trunc
 	if bad {
 		return 0, parseFloatErr(b)
 	}
@@ -301,13 +304,10 @@ L:
 
 // fMul10ShiftU64
 
-// func parseFloatDebug(b []byte, bitsize int, strconv *bool, exp int16, trunc, ok bool) {
-// 	if false && bitsize == 64 {
-// 		return
-// 	}
-// 	if *strconv {
-// 		xdebugf("parseFloat%d: delegating: %s, exp: %d, trunc: %v, ok: %v", bitsize, b, exp, trunc, ok)
-// 	} else {
-// 		xdebug2f("parseFloat%d: attempting: %s, exp: %d, trunc: %v, ok: %v", bitsize, b, exp, trunc, ok)
-// 	}
-// }
+func parseFloatDebug(b []byte, bitsize int, strconv bool, exp int8, trunc, ok bool) {
+	if strconv {
+		xdebugf("parseFloat%d: delegating: %s, exp: %d, trunc: %v, ok: %v", bitsize, b, exp, trunc, ok)
+	} else {
+		xdebug2f("parseFloat%d: attempting: %s, exp: %d, trunc: %v, ok: %v", bitsize, b, exp, trunc, ok)
+	}
+}

+ 30 - 5
codec/gen.go

@@ -1998,11 +1998,14 @@ func genInternalZeroValue(s string) string {
 }
 
 var genInternalNonZeroValueIdx [6]uint64
-var genInternalNonZeroValueStrs = [2][6]string{
-	{`"string-is-an-interface"`, "true", `"some-string"`, `[]byte("some-string")`, "11.1", "33"},
-	{`"string-is-an-interface-2"`, "true", `"some-string-2"`, `[]byte("some-string-2")`, "22.2", "44"},
+var genInternalNonZeroValueStrs = [...][6]string{
+	{`"string-is-an-interface-1"`, "true", `"some-string-1"`, `[]byte("some-string-1")`, "11.1", "111"},
+	{`"string-is-an-interface-2"`, "false", `"some-string-2"`, `[]byte("some-string-2")`, "22.2", "77"},
+	{`"string-is-an-interface-3"`, "true", `"some-string-3"`, `[]byte("some-string-3")`, "33.3e3", "127"},
 }
 
+// Note: last numbers must be in range: 0-127 (as they may be put into a int8, uint8, etc)
+
 func genInternalNonZeroValue(s string) string {
 	var i int
 	switch s {
@@ -2020,7 +2023,9 @@ func genInternalNonZeroValue(s string) string {
 		i = 5
 	}
 	genInternalNonZeroValueIdx[i]++
-	return genInternalNonZeroValueStrs[genInternalNonZeroValueIdx[i]%2][i] // return string, to remove ambiguity
+	idx := genInternalNonZeroValueIdx[i]
+	slen := uint64(len(genInternalNonZeroValueStrs))
+	return genInternalNonZeroValueStrs[idx%slen][i] // return string, to remove ambiguity
 }
 
 func genInternalEncCommandAsString(s string, vname string) string {
@@ -2185,6 +2190,26 @@ func genInternalInit() {
 	mapvaltypes = types[:]
 
 	if genFastpathTrimTypes {
+		slicetypes = []string{
+			"interface{}",
+			"string",
+			"[]byte",
+			"float32",
+			"float64",
+			"uint",
+			// "uint8", // no need for fastpath of []uint8, as it is handled specially
+			"uint16",
+			"uint32",
+			"uint64",
+			// "uintptr",
+			"int",
+			"int8",
+			"int16",
+			"int32",
+			"int64",
+			"bool",
+		}
+
 		mapkeytypes = []string{
 			//"interface{}",
 			"string",
@@ -2214,7 +2239,7 @@ func genInternalInit() {
 			//"uint16",
 			//"uint32",
 			"uint64",
-			"uintptr",
+			// "uintptr",
 			"int",
 			//"int8",
 			//"int16",

+ 12 - 0
codec/helper.go

@@ -2056,6 +2056,17 @@ func isImmutableKind(k reflect.Kind) (v bool) {
 	return immutableKindsSet[k%reflect.Kind(len(immutableKindsSet))] // bounds-check-elimination
 }
 
+func usableByteSlice(bs []byte, slen int) []byte {
+	if cap(bs) >= slen {
+		if bs == nil {
+			return []byte{}
+		} else {
+			return bs[:slen]
+		}
+	}
+	return make([]byte, slen)
+}
+
 // ----
 
 type codecFnInfo struct {
@@ -2196,6 +2207,7 @@ func noFrac64(f float64) (v bool) {
 	}
 	return
 }
+
 func noFrac32(f float32) (v bool) {
 	x := math.Float32bits(f)
 	e := uint32(x>>23)&0xFF - 127 // uint(x>>shift)&mask - bias

+ 23 - 2
codec/json.go

@@ -967,8 +967,29 @@ func (d *jsonDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 	}
 	// check if an "array" of uint8's (see ContainerType for how to infer if an array)
 	if d.tok == '[' {
-		bsOut, _ = fastpathTV.DecSliceUint8V(bs, true, d.d)
-		return
+		// bsOut, _ = fastpathTV.DecSliceUint8V(bs, true, d.d)
+		if zerocopy && len(bs) == 0 {
+			bs = d.d.b[:]
+		}
+		if bs == nil {
+			bs = []byte{}
+		} else {
+			bs = bs[:0]
+		}
+		d.tok = 0
+		bs = append(bs, uint8(d.DecodeUint64()))
+		d.tok = d.r.skip(&jsonCharWhitespaceSet)
+		for d.tok != ']' {
+			if d.tok != ',' {
+				d.d.errorf("read array element - expect char '%c' but got char '%c'", ',', d.tok)
+			}
+			d.tok = 0
+			bs = append(bs, uint8(chkOvf.UintV(d.DecodeUint64(), 8)))
+			d.tok = d.r.skip(&jsonCharWhitespaceSet)
+		}
+		d.tok = 0
+		// xdebug2f("bytes from array: returning: %v", bs)
+		return bs
 	}
 	d.appendStringAsBytes()
 	// base64 encodes []byte{} as "", and we encode nil []byte as null.

File diff suppressed because it is too large
+ 349 - 437
codec/mammoth2_codecgen_generated_test.go


+ 0 - 16
codec/mammoth2_generated_test.go

@@ -75,16 +75,12 @@ type TestMammoth2 struct {
 	FptrSliceFloat64 *[]float64
 	FSliceUint       []uint
 	FptrSliceUint    *[]uint
-	FSliceUint8      []uint8
-	FptrSliceUint8   *[]uint8
 	FSliceUint16     []uint16
 	FptrSliceUint16  *[]uint16
 	FSliceUint32     []uint32
 	FptrSliceUint32  *[]uint32
 	FSliceUint64     []uint64
 	FptrSliceUint64  *[]uint64
-	FSliceUintptr    []uintptr
-	FptrSliceUintptr *[]uintptr
 	FSliceInt        []int
 	FptrSliceInt     *[]int
 	FSliceInt8       []int8
@@ -110,8 +106,6 @@ type TestMammoth2 struct {
 	FptrMapStringUint8   *map[string]uint8
 	FMapStringUint64     map[string]uint64
 	FptrMapStringUint64  *map[string]uint64
-	FMapStringUintptr    map[string]uintptr
-	FptrMapStringUintptr *map[string]uintptr
 	FMapStringInt        map[string]int
 	FptrMapStringInt     *map[string]int
 	FMapStringInt64      map[string]int64
@@ -134,8 +128,6 @@ type TestMammoth2 struct {
 	FptrMapUintUint8     *map[uint]uint8
 	FMapUintUint64       map[uint]uint64
 	FptrMapUintUint64    *map[uint]uint64
-	FMapUintUintptr      map[uint]uintptr
-	FptrMapUintUintptr   *map[uint]uintptr
 	FMapUintInt          map[uint]int
 	FptrMapUintInt       *map[uint]int
 	FMapUintInt64        map[uint]int64
@@ -158,8 +150,6 @@ type TestMammoth2 struct {
 	FptrMapUint8Uint8    *map[uint8]uint8
 	FMapUint8Uint64      map[uint8]uint64
 	FptrMapUint8Uint64   *map[uint8]uint64
-	FMapUint8Uintptr     map[uint8]uintptr
-	FptrMapUint8Uintptr  *map[uint8]uintptr
 	FMapUint8Int         map[uint8]int
 	FptrMapUint8Int      *map[uint8]int
 	FMapUint8Int64       map[uint8]int64
@@ -182,8 +172,6 @@ type TestMammoth2 struct {
 	FptrMapUint64Uint8   *map[uint64]uint8
 	FMapUint64Uint64     map[uint64]uint64
 	FptrMapUint64Uint64  *map[uint64]uint64
-	FMapUint64Uintptr    map[uint64]uintptr
-	FptrMapUint64Uintptr *map[uint64]uintptr
 	FMapUint64Int        map[uint64]int
 	FptrMapUint64Int     *map[uint64]int
 	FMapUint64Int64      map[uint64]int64
@@ -206,8 +194,6 @@ type TestMammoth2 struct {
 	FptrMapIntUint8      *map[int]uint8
 	FMapIntUint64        map[int]uint64
 	FptrMapIntUint64     *map[int]uint64
-	FMapIntUintptr       map[int]uintptr
-	FptrMapIntUintptr    *map[int]uintptr
 	FMapIntInt           map[int]int
 	FptrMapIntInt        *map[int]int
 	FMapIntInt64         map[int]int64
@@ -230,8 +216,6 @@ type TestMammoth2 struct {
 	FptrMapInt64Uint8    *map[int64]uint8
 	FMapInt64Uint64      map[int64]uint64
 	FptrMapInt64Uint64   *map[int64]uint64
-	FMapInt64Uintptr     map[int64]uintptr
-	FptrMapInt64Uintptr  *map[int64]uintptr
 	FMapInt64Int         map[int64]int
 	FptrMapInt64Int      *map[int64]int
 	FMapInt64Int64       map[int64]int64

File diff suppressed because it is too large
+ 216 - 282
codec/mammoth_generated_test.go


+ 7 - 2
codec/msgpack.go

@@ -686,8 +686,13 @@ func (d *msgpackDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte)
 		if zerocopy && len(bs) == 0 {
 			bs = d.d.b[:]
 		}
-		bsOut, _ = fastpathTV.DecSliceUint8V(bs, true, d.d)
-		return
+		// bsOut, _ = fastpathTV.DecSliceUint8V(bs, true, d.d)
+		slen := d.ReadArrayStart()
+		bs = usableByteSlice(bs, slen)
+		for i := 0; i < slen; i++ {
+			bs[i] = uint8(chkOvf.UintV(d.DecodeUint64(), 8))
+		}
+		return bs
 	} else {
 		d.d.errorf("invalid byte descriptor for decoding bytes, got: 0x%x", d.bd)
 		return

+ 7 - 2
codec/simple.go

@@ -419,8 +419,13 @@ func (d *simpleDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 		if len(bs) == 0 && zerocopy {
 			bs = d.d.b[:]
 		}
-		bsOut, _ = fastpathTV.DecSliceUint8V(bs, true, d.d)
-		return
+		// bsOut, _ = fastpathTV.DecSliceUint8V(bs, true, d.d)
+		slen := d.ReadArrayStart()
+		bs = usableByteSlice(bs, slen)
+		for i := 0; i < slen; i++ {
+			bs[i] = uint8(chkOvf.UintV(d.DecodeUint64(), 8))
+		}
+		return bs
 	}
 
 	clen := d.decLen()

+ 5 - 0
codec/z_all_test.go

@@ -172,6 +172,7 @@ func testJsonGroup(t *testing.T) {
 	t.Run("TestJsonMissingFields", TestJsonMissingFields)
 	t.Run("TestJsonMaxDepth", TestJsonMaxDepth)
 	t.Run("TestJsonSelfExt", TestJsonSelfExt)
+	t.Run("TestJsonBytesEncodedAsArray", TestJsonBytesEncodedAsArray)
 
 	t.Run("TestJsonInvalidUnicode", TestJsonInvalidUnicode)
 }
@@ -202,6 +203,7 @@ func testBincGroup(t *testing.T) {
 	t.Run("TestBincMissingFields", TestBincMissingFields)
 	t.Run("TestBincMaxDepth", TestBincMaxDepth)
 	t.Run("TestBincSelfExt", TestBincSelfExt)
+	t.Run("TestBincBytesEncodedAsArray", TestBincBytesEncodedAsArray)
 }
 
 func testCborGroup(t *testing.T) {
@@ -231,6 +233,7 @@ func testCborGroup(t *testing.T) {
 	t.Run("TestCborMissingFields", TestCborMissingFields)
 	t.Run("TestCborMaxDepth", TestCborMaxDepth)
 	t.Run("TestCborSelfExt", TestCborSelfExt)
+	t.Run("TestCborBytesEncodedAsArray", TestCborBytesEncodedAsArray)
 
 	t.Run("TestCborHalfFloat", TestCborHalfFloat)
 }
@@ -260,6 +263,7 @@ func testMsgpackGroup(t *testing.T) {
 	t.Run("TestMsgpackMissingFields", TestMsgpackMissingFields)
 	t.Run("TestMsgpackMaxDepth", TestMsgpackMaxDepth)
 	t.Run("TestMsgpackSelfExt", TestMsgpackSelfExt)
+	t.Run("TestMsgpackBytesEncodedAsArray", TestMsgpackBytesEncodedAsArray)
 
 	t.Run("TestMsgpackDecodeMapAndExtSizeMismatch", TestMsgpackDecodeMapAndExtSizeMismatch)
 }
@@ -288,6 +292,7 @@ func testSimpleGroup(t *testing.T) {
 	t.Run("TestSimpleMissingFields", TestSimpleMissingFields)
 	t.Run("TestSimpleMaxDepth", TestSimpleMaxDepth)
 	t.Run("TestSimpleSelfExt", TestSimpleSelfExt)
+	t.Run("TestSimpleBytesEncodedAsArray", TestSimpleBytesEncodedAsArray)
 }
 
 func testSimpleMammothGroup(t *testing.T) {

Some files were not shown because too many files changed in this diff