فهرست منبع

codec: support ZeroCopy for allowing decoded output point into input bytes

ZeroCopy controls whether decoded values point into the
input bytes passed into a NewDecoderBytes/ResetBytes(...) call.

To illustrate, if ZeroCopy and decoding from a []byte (not io.Writer),
then a []byte in the output result may just be a slice of (point into)
the input bytes.

This optimization prevents unnecessary copying.

However, it is made optional, as the caller MUST ensure that the input parameter
is not modified after the Decode() happens.

Fixes #312
Ugorji Nwoke 6 سال پیش
والد
کامیت
42bc974514
8فایلهای تغییر یافته به همراه112 افزوده شده و 24 حذف شده
  1. 2 0
      codec/bench/z_all_bench_test.go
  2. 5 6
      codec/binc.go
  3. 5 6
      codec/cbor.go
  4. 61 0
      codec/codec_test.go
  5. 23 0
      codec/decode.go
  6. 5 6
      codec/msgpack.go
  7. 5 6
      codec/simple.go
  8. 6 0
      codec/z_all_test.go

+ 2 - 0
codec/bench/z_all_bench_test.go

@@ -42,6 +42,8 @@ func benchmarkGroupReset() {
 
 
 	testDepth = benchmarkGroupSave.testDepth
 	testDepth = benchmarkGroupSave.testDepth
 	testMapStringKeyOnly = benchmarkGroupSave.testMapStringKeyOnly
 	testMapStringKeyOnly = benchmarkGroupSave.testMapStringKeyOnly
+
+	testDecodeOptions.ZeroCopy = true
 }
 }
 
 
 func benchmarkOneFn(fns []func(*testing.B)) func(*testing.B) {
 func benchmarkOneFn(fns []func(*testing.B)) func(*testing.B) {

+ 5 - 6
codec/binc.go

@@ -829,12 +829,11 @@ func (d *bincDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 		return
 		return
 	}
 	}
 	d.bdRead = false
 	d.bdRead = false
-	if zerocopy {
-		if d.d.bytes {
-			return d.d.decRd.rb.readx(uint(clen))
-		} else if len(bs) == 0 {
-			bs = d.d.b[:]
-		}
+	if d.d.bytes && (zerocopy || d.h.ZeroCopy) {
+		return d.d.decRd.rb.readx(uint(clen))
+	}
+	if zerocopy && len(bs) == 0 {
+		bs = d.d.b[:]
 	}
 	}
 	return decByteSlice(d.d.r(), clen, d.d.h.MaxInitLen, bs)
 	return decByteSlice(d.d.r(), clen, d.d.h.MaxInitLen, bs)
 }
 }

+ 5 - 6
codec/cbor.go

@@ -663,12 +663,11 @@ func (d *cborDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 	}
 	}
 	clen := d.decLen()
 	clen := d.decLen()
 	d.bdRead = false
 	d.bdRead = false
-	if zerocopy {
-		if d.d.bytes {
-			return d.d.decRd.rb.readx(uint(clen))
-		} else if len(bs) == 0 {
-			bs = d.d.b[:]
-		}
+	if d.d.bytes && (zerocopy || d.h.ZeroCopy) {
+		return d.d.decRd.rb.readx(uint(clen))
+	}
+	if zerocopy && len(bs) == 0 {
+		bs = d.d.b[:]
 	}
 	}
 	return decByteSlice(d.d.r(), clen, d.h.MaxInitLen, bs)
 	return decByteSlice(d.d.r(), clen, d.h.MaxInitLen, bs)
 }
 }

+ 61 - 0
codec/codec_test.go

@@ -3133,6 +3133,47 @@ func doTestPreferArrayOverSlice(t *testing.T, h Handle) {
 	testDeepEqualErr(s2, v, t, t.Name())
 	testDeepEqualErr(s2, v, t, t.Name())
 }
 }
 
 
+func doTestZeroCopyBytes(t *testing.T, h Handle) {
+	testOnce.Do(testInitAll)
+	// jsonhandle and cborhandle with indefiniteLength do not support inline bytes, so skip them.
+	if _, ok := h.(*JsonHandle); ok { // if h == testJsonH {
+		t.Skip()
+	}
+	if ch, ok := h.(*CborHandle); ok && ch.IndefiniteLength {
+		t.Skip()
+	}
+
+	bh := basicHandle(h)
+	zc := bh.ZeroCopy
+	defer func() {
+		bh.ZeroCopy = zc
+	}()
+	bh.ZeroCopy = true
+
+	s := []byte("hello")
+	var v []byte
+	bs := testMarshalErr(s, h, t, t.Name())
+
+	// Note: this test only works for decoding from []byte, so cannot use testUnmarshalErr
+	NewDecoderBytes(bs, h).MustDecode(&v)
+	// testUnmarshalErr(&v, bs, h, t, t.Name())
+
+	// validate that bs and s points into the bs stream
+	for i := range bs {
+		if &bs[i] == &v[0] {
+			return
+		}
+	}
+
+	// if not match, then a failure happened.
+	if len(bs) > 0 && len(v) > 0 {
+		t.Logf("%s: ZeroCopy=true, but decoded (%p) is not slice of input: (%p)", h.Name(), &v[0], &bs[0])
+	} else {
+		t.Logf("%s: ZeroCopy=true, but decoded OR input slice is empty: %v, %v", h.Name(), v, bs)
+	}
+	t.FailNow()
+}
+
 func TestBufioDecReader(t *testing.T) {
 func TestBufioDecReader(t *testing.T) {
 	testOnce.Do(testInitAll)
 	testOnce.Do(testInitAll)
 	doTestBufioDecReader(t, 13)
 	doTestBufioDecReader(t, 13)
@@ -4087,6 +4128,26 @@ func TestSimplePreferArrayOverSlice(t *testing.T) {
 	doTestPreferArrayOverSlice(t, testSimpleH)
 	doTestPreferArrayOverSlice(t, testSimpleH)
 }
 }
 
 
+func TestJsonZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testJsonH)
+}
+
+func TestCborZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testCborH)
+}
+
+func TestMsgpackZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testMsgpackH)
+}
+
+func TestBincZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testBincH)
+}
+
+func TestSimpleZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testSimpleH)
+}
+
 // --------
 // --------
 
 
 func TestMultipleEncDec(t *testing.T) {
 func TestMultipleEncDec(t *testing.T) {

+ 23 - 0
codec/decode.go

@@ -269,6 +269,19 @@ type DecodeOptions struct {
 	// RawToString controls how raw bytes in a stream are decoded into a nil interface{}.
 	// RawToString controls how raw bytes in a stream are decoded into a nil interface{}.
 	// By default, they are decoded as []byte, but can be decoded as string (if configured).
 	// By default, they are decoded as []byte, but can be decoded as string (if configured).
 	RawToString bool
 	RawToString bool
+
+	// ZeroCopy controls whether decoded values point into the
+	// input bytes passed into a NewDecoderBytes/ResetBytes(...) call.
+	//
+	// To illustrate, if ZeroCopy and decoding from a []byte (not io.Writer),
+	// then a []byte in the output result may just be a slice of (point into)
+	// the input bytes.
+	//
+	// This optimization prevents unnecessary copying.
+	//
+	// However, it is made optional, as the caller MUST ensure that the input parameter
+	// is not modified after the Decode() happens.
+	ZeroCopy bool
 }
 }
 
 
 // ----------------------------------------
 // ----------------------------------------
@@ -1819,6 +1832,16 @@ func (d *Decoder) sideDecode(v interface{}, bs []byte) {
 	NewDecoderBytes(bs, d.hh).decodeValue(rv, d.h.fnNoExt(rv.Type()))
 	NewDecoderBytes(bs, d.hh).decodeValue(rv, d.h.fnNoExt(rv.Type()))
 }
 }
 
 
+// func (d *Decoder) bytesInline(clen int, bs []byte, zerocopy bool) []byte {
+// 	if d.bytes && (zerocopy || d.h.ZeroCopy) {
+// 		return d.decRd.rb.readx(uint(clen))
+// 	}
+// 	if zerocopy && len(bs) == 0 {
+// 		bs = d.b[:]
+// 	}
+// 	return decByteSlice(d.r(), clen, d.h.MaxInitLen, bs)
+// }
+
 // --------------------------------------------------
 // --------------------------------------------------
 
 
 // decSliceHelper assists when decoding into a slice, from a map or an array in the stream.
 // decSliceHelper assists when decoding into a slice, from a map or an array in the stream.

+ 5 - 6
codec/msgpack.go

@@ -762,12 +762,11 @@ func (d *msgpackDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte)
 	}
 	}
 
 
 	d.bdRead = false
 	d.bdRead = false
-	if zerocopy {
-		if d.d.bytes {
-			return d.d.decRd.rb.readx(uint(clen))
-		} else if len(bs) == 0 {
-			bs = d.d.b[:]
-		}
+	if d.d.bytes && (zerocopy || d.h.ZeroCopy) {
+		return d.d.decRd.rb.readx(uint(clen))
+	}
+	if zerocopy && len(bs) == 0 {
+		bs = d.d.b[:]
 	}
 	}
 	return decByteSlice(d.d.r(), clen, d.h.MaxInitLen, bs)
 	return decByteSlice(d.d.r(), clen, d.h.MaxInitLen, bs)
 }
 }

+ 5 - 6
codec/simple.go

@@ -442,12 +442,11 @@ func (d *simpleDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 
 
 	clen := d.decLen()
 	clen := d.decLen()
 	d.bdRead = false
 	d.bdRead = false
-	if zerocopy {
-		if d.d.bytes {
-			return d.d.decRd.rb.readx(uint(clen))
-		} else if len(bs) == 0 {
-			bs = d.d.b[:]
-		}
+	if d.d.bytes && (zerocopy || d.h.ZeroCopy) {
+		return d.d.decRd.rb.readx(uint(clen))
+	}
+	if zerocopy && len(bs) == 0 {
+		bs = d.d.b[:]
 	}
 	}
 	return decByteSlice(d.d.r(), clen, d.d.h.MaxInitLen, bs)
 	return decByteSlice(d.d.r(), clen, d.d.h.MaxInitLen, bs)
 }
 }

+ 6 - 0
codec/z_all_test.go

@@ -60,6 +60,7 @@ func testSuite(t *testing.T, f func(t *testing.T)) {
 	testUseReset = true
 	testUseReset = true
 
 
 	// xdebugf("with StructToArray=true")
 	// xdebugf("with StructToArray=true")
+	testDecodeOptions.ZeroCopy = true
 	testDecodeOptions.InternString = true
 	testDecodeOptions.InternString = true
 	testDecodeOptions.MapValueReset = true
 	testDecodeOptions.MapValueReset = true
 	// testDecodeOptions.SignedInteger = true
 	// testDecodeOptions.SignedInteger = true
@@ -180,6 +181,7 @@ func testJsonGroup(t *testing.T) {
 	t.Run("TestJsonRawToStringToRawEtc", TestJsonRawToStringToRawEtc)
 	t.Run("TestJsonRawToStringToRawEtc", TestJsonRawToStringToRawEtc)
 	t.Run("TestJsonStructKeyType", TestJsonStructKeyType)
 	t.Run("TestJsonStructKeyType", TestJsonStructKeyType)
 	t.Run("TestJsonPreferArrayOverSlice", TestJsonPreferArrayOverSlice)
 	t.Run("TestJsonPreferArrayOverSlice", TestJsonPreferArrayOverSlice)
+	t.Run("TestJsonZeroCopyBytes", TestJsonZeroCopyBytes)
 
 
 	t.Run("TestJsonInvalidUnicode", TestJsonInvalidUnicode)
 	t.Run("TestJsonInvalidUnicode", TestJsonInvalidUnicode)
 }
 }
@@ -216,6 +218,7 @@ func testBincGroup(t *testing.T) {
 	t.Run("TestBincRawToStringToRawEtc", TestBincRawToStringToRawEtc)
 	t.Run("TestBincRawToStringToRawEtc", TestBincRawToStringToRawEtc)
 	t.Run("TestBincStructKeyType", TestBincStructKeyType)
 	t.Run("TestBincStructKeyType", TestBincStructKeyType)
 	t.Run("TestBincPreferArrayOverSlice", TestBincPreferArrayOverSlice)
 	t.Run("TestBincPreferArrayOverSlice", TestBincPreferArrayOverSlice)
+	t.Run("TestBincZeroCopyBytes", TestBincZeroCopyBytes)
 }
 }
 
 
 func testCborGroup(t *testing.T) {
 func testCborGroup(t *testing.T) {
@@ -250,6 +253,7 @@ func testCborGroup(t *testing.T) {
 	t.Run("TestCborRawToStringToRawEtc", TestCborRawToStringToRawEtc)
 	t.Run("TestCborRawToStringToRawEtc", TestCborRawToStringToRawEtc)
 	t.Run("TestCborStructKeyType", TestCborStructKeyType)
 	t.Run("TestCborStructKeyType", TestCborStructKeyType)
 	t.Run("TestCborPreferArrayOverSlice", TestCborPreferArrayOverSlice)
 	t.Run("TestCborPreferArrayOverSlice", TestCborPreferArrayOverSlice)
+	t.Run("TestCborZeroCopyBytes", TestCborZeroCopyBytes)
 
 
 	t.Run("TestCborHalfFloat", TestCborHalfFloat)
 	t.Run("TestCborHalfFloat", TestCborHalfFloat)
 	t.Run("TestCborSkipTags", TestCborSkipTags)
 	t.Run("TestCborSkipTags", TestCborSkipTags)
@@ -286,6 +290,7 @@ func testMsgpackGroup(t *testing.T) {
 	t.Run("TestMsgpackRawToStringToRawEtc", TestMsgpackRawToStringToRawEtc)
 	t.Run("TestMsgpackRawToStringToRawEtc", TestMsgpackRawToStringToRawEtc)
 	t.Run("TestMsgpackStructKeyType", TestMsgpackStructKeyType)
 	t.Run("TestMsgpackStructKeyType", TestMsgpackStructKeyType)
 	t.Run("TestMsgpackPreferArrayOverSlice", TestMsgpackPreferArrayOverSlice)
 	t.Run("TestMsgpackPreferArrayOverSlice", TestMsgpackPreferArrayOverSlice)
+	t.Run("TestMsgpackZeroCopyBytes", TestMsgpackZeroCopyBytes)
 
 
 	t.Run("TestMsgpackDecodeMapAndExtSizeMismatch", TestMsgpackDecodeMapAndExtSizeMismatch)
 	t.Run("TestMsgpackDecodeMapAndExtSizeMismatch", TestMsgpackDecodeMapAndExtSizeMismatch)
 }
 }
@@ -320,6 +325,7 @@ func testSimpleGroup(t *testing.T) {
 	t.Run("TestSimpleRawToStringToRawEtc", TestSimpleRawToStringToRawEtc)
 	t.Run("TestSimpleRawToStringToRawEtc", TestSimpleRawToStringToRawEtc)
 	t.Run("TestSimpleStructKeyType", TestSimpleStructKeyType)
 	t.Run("TestSimpleStructKeyType", TestSimpleStructKeyType)
 	t.Run("TestSimplePreferArrayOverSlice", TestSimplePreferArrayOverSlice)
 	t.Run("TestSimplePreferArrayOverSlice", TestSimplePreferArrayOverSlice)
+	t.Run("TestSimpleZeroCopyBytes", TestSimpleZeroCopyBytes)
 }
 }
 
 
 func testSimpleMammothGroup(t *testing.T) {
 func testSimpleMammothGroup(t *testing.T) {