瀏覽代碼

codec: support ZeroCopy for allowing decoded output point into input bytes

ZeroCopy controls whether decoded values point into the
input bytes passed into a NewDecoderBytes/ResetBytes(...) call.

To illustrate, if ZeroCopy and decoding from a []byte (not io.Writer),
then a []byte in the output result may just be a slice of (point into)
the input bytes.

This optimization prevents unnecessary copying.

However, it is made optional, as the caller MUST ensure that the input parameter
is not modified after the Decode() happens.

Fixes #312
Ugorji Nwoke 6 年之前
父節點
當前提交
42bc974514
共有 8 個文件被更改,包括 112 次插入24 次删除
  1. 2 0
      codec/bench/z_all_bench_test.go
  2. 5 6
      codec/binc.go
  3. 5 6
      codec/cbor.go
  4. 61 0
      codec/codec_test.go
  5. 23 0
      codec/decode.go
  6. 5 6
      codec/msgpack.go
  7. 5 6
      codec/simple.go
  8. 6 0
      codec/z_all_test.go

+ 2 - 0
codec/bench/z_all_bench_test.go

@@ -42,6 +42,8 @@ func benchmarkGroupReset() {
 
 	testDepth = benchmarkGroupSave.testDepth
 	testMapStringKeyOnly = benchmarkGroupSave.testMapStringKeyOnly
+
+	testDecodeOptions.ZeroCopy = true
 }
 
 func benchmarkOneFn(fns []func(*testing.B)) func(*testing.B) {

+ 5 - 6
codec/binc.go

@@ -829,12 +829,11 @@ func (d *bincDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 		return
 	}
 	d.bdRead = false
-	if zerocopy {
-		if d.d.bytes {
-			return d.d.decRd.rb.readx(uint(clen))
-		} else if len(bs) == 0 {
-			bs = d.d.b[:]
-		}
+	if d.d.bytes && (zerocopy || d.h.ZeroCopy) {
+		return d.d.decRd.rb.readx(uint(clen))
+	}
+	if zerocopy && len(bs) == 0 {
+		bs = d.d.b[:]
 	}
 	return decByteSlice(d.d.r(), clen, d.d.h.MaxInitLen, bs)
 }

+ 5 - 6
codec/cbor.go

@@ -663,12 +663,11 @@ func (d *cborDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 	}
 	clen := d.decLen()
 	d.bdRead = false
-	if zerocopy {
-		if d.d.bytes {
-			return d.d.decRd.rb.readx(uint(clen))
-		} else if len(bs) == 0 {
-			bs = d.d.b[:]
-		}
+	if d.d.bytes && (zerocopy || d.h.ZeroCopy) {
+		return d.d.decRd.rb.readx(uint(clen))
+	}
+	if zerocopy && len(bs) == 0 {
+		bs = d.d.b[:]
 	}
 	return decByteSlice(d.d.r(), clen, d.h.MaxInitLen, bs)
 }

+ 61 - 0
codec/codec_test.go

@@ -3133,6 +3133,47 @@ func doTestPreferArrayOverSlice(t *testing.T, h Handle) {
 	testDeepEqualErr(s2, v, t, t.Name())
 }
 
+func doTestZeroCopyBytes(t *testing.T, h Handle) {
+	testOnce.Do(testInitAll)
+	// jsonhandle and cborhandle with indefiniteLength do not support inline bytes, so skip them.
+	if _, ok := h.(*JsonHandle); ok { // if h == testJsonH {
+		t.Skip()
+	}
+	if ch, ok := h.(*CborHandle); ok && ch.IndefiniteLength {
+		t.Skip()
+	}
+
+	bh := basicHandle(h)
+	zc := bh.ZeroCopy
+	defer func() {
+		bh.ZeroCopy = zc
+	}()
+	bh.ZeroCopy = true
+
+	s := []byte("hello")
+	var v []byte
+	bs := testMarshalErr(s, h, t, t.Name())
+
+	// Note: this test only works for decoding from []byte, so cannot use testUnmarshalErr
+	NewDecoderBytes(bs, h).MustDecode(&v)
+	// testUnmarshalErr(&v, bs, h, t, t.Name())
+
+	// validate that bs and s points into the bs stream
+	for i := range bs {
+		if &bs[i] == &v[0] {
+			return
+		}
+	}
+
+	// if not match, then a failure happened.
+	if len(bs) > 0 && len(v) > 0 {
+		t.Logf("%s: ZeroCopy=true, but decoded (%p) is not slice of input: (%p)", h.Name(), &v[0], &bs[0])
+	} else {
+		t.Logf("%s: ZeroCopy=true, but decoded OR input slice is empty: %v, %v", h.Name(), v, bs)
+	}
+	t.FailNow()
+}
+
 func TestBufioDecReader(t *testing.T) {
 	testOnce.Do(testInitAll)
 	doTestBufioDecReader(t, 13)
@@ -4087,6 +4128,26 @@ func TestSimplePreferArrayOverSlice(t *testing.T) {
 	doTestPreferArrayOverSlice(t, testSimpleH)
 }
 
+func TestJsonZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testJsonH)
+}
+
+func TestCborZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testCborH)
+}
+
+func TestMsgpackZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testMsgpackH)
+}
+
+func TestBincZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testBincH)
+}
+
+func TestSimpleZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testSimpleH)
+}
+
 // --------
 
 func TestMultipleEncDec(t *testing.T) {

+ 23 - 0
codec/decode.go

@@ -269,6 +269,19 @@ type DecodeOptions struct {
 	// RawToString controls how raw bytes in a stream are decoded into a nil interface{}.
 	// By default, they are decoded as []byte, but can be decoded as string (if configured).
 	RawToString bool
+
+	// ZeroCopy controls whether decoded values point into the
+	// input bytes passed into a NewDecoderBytes/ResetBytes(...) call.
+	//
+	// To illustrate, if ZeroCopy and decoding from a []byte (not io.Writer),
+	// then a []byte in the output result may just be a slice of (point into)
+	// the input bytes.
+	//
+	// This optimization prevents unnecessary copying.
+	//
+	// However, it is made optional, as the caller MUST ensure that the input parameter
+	// is not modified after the Decode() happens.
+	ZeroCopy bool
 }
 
 // ----------------------------------------
@@ -1819,6 +1832,16 @@ func (d *Decoder) sideDecode(v interface{}, bs []byte) {
 	NewDecoderBytes(bs, d.hh).decodeValue(rv, d.h.fnNoExt(rv.Type()))
 }
 
+// func (d *Decoder) bytesInline(clen int, bs []byte, zerocopy bool) []byte {
+// 	if d.bytes && (zerocopy || d.h.ZeroCopy) {
+// 		return d.decRd.rb.readx(uint(clen))
+// 	}
+// 	if zerocopy && len(bs) == 0 {
+// 		bs = d.b[:]
+// 	}
+// 	return decByteSlice(d.r(), clen, d.h.MaxInitLen, bs)
+// }
+
 // --------------------------------------------------
 
 // decSliceHelper assists when decoding into a slice, from a map or an array in the stream.

+ 5 - 6
codec/msgpack.go

@@ -762,12 +762,11 @@ func (d *msgpackDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte)
 	}
 
 	d.bdRead = false
-	if zerocopy {
-		if d.d.bytes {
-			return d.d.decRd.rb.readx(uint(clen))
-		} else if len(bs) == 0 {
-			bs = d.d.b[:]
-		}
+	if d.d.bytes && (zerocopy || d.h.ZeroCopy) {
+		return d.d.decRd.rb.readx(uint(clen))
+	}
+	if zerocopy && len(bs) == 0 {
+		bs = d.d.b[:]
 	}
 	return decByteSlice(d.d.r(), clen, d.h.MaxInitLen, bs)
 }

+ 5 - 6
codec/simple.go

@@ -442,12 +442,11 @@ func (d *simpleDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 
 	clen := d.decLen()
 	d.bdRead = false
-	if zerocopy {
-		if d.d.bytes {
-			return d.d.decRd.rb.readx(uint(clen))
-		} else if len(bs) == 0 {
-			bs = d.d.b[:]
-		}
+	if d.d.bytes && (zerocopy || d.h.ZeroCopy) {
+		return d.d.decRd.rb.readx(uint(clen))
+	}
+	if zerocopy && len(bs) == 0 {
+		bs = d.d.b[:]
 	}
 	return decByteSlice(d.d.r(), clen, d.d.h.MaxInitLen, bs)
 }

+ 6 - 0
codec/z_all_test.go

@@ -60,6 +60,7 @@ func testSuite(t *testing.T, f func(t *testing.T)) {
 	testUseReset = true
 
 	// xdebugf("with StructToArray=true")
+	testDecodeOptions.ZeroCopy = true
 	testDecodeOptions.InternString = true
 	testDecodeOptions.MapValueReset = true
 	// testDecodeOptions.SignedInteger = true
@@ -180,6 +181,7 @@ func testJsonGroup(t *testing.T) {
 	t.Run("TestJsonRawToStringToRawEtc", TestJsonRawToStringToRawEtc)
 	t.Run("TestJsonStructKeyType", TestJsonStructKeyType)
 	t.Run("TestJsonPreferArrayOverSlice", TestJsonPreferArrayOverSlice)
+	t.Run("TestJsonZeroCopyBytes", TestJsonZeroCopyBytes)
 
 	t.Run("TestJsonInvalidUnicode", TestJsonInvalidUnicode)
 }
@@ -216,6 +218,7 @@ func testBincGroup(t *testing.T) {
 	t.Run("TestBincRawToStringToRawEtc", TestBincRawToStringToRawEtc)
 	t.Run("TestBincStructKeyType", TestBincStructKeyType)
 	t.Run("TestBincPreferArrayOverSlice", TestBincPreferArrayOverSlice)
+	t.Run("TestBincZeroCopyBytes", TestBincZeroCopyBytes)
 }
 
 func testCborGroup(t *testing.T) {
@@ -250,6 +253,7 @@ func testCborGroup(t *testing.T) {
 	t.Run("TestCborRawToStringToRawEtc", TestCborRawToStringToRawEtc)
 	t.Run("TestCborStructKeyType", TestCborStructKeyType)
 	t.Run("TestCborPreferArrayOverSlice", TestCborPreferArrayOverSlice)
+	t.Run("TestCborZeroCopyBytes", TestCborZeroCopyBytes)
 
 	t.Run("TestCborHalfFloat", TestCborHalfFloat)
 	t.Run("TestCborSkipTags", TestCborSkipTags)
@@ -286,6 +290,7 @@ func testMsgpackGroup(t *testing.T) {
 	t.Run("TestMsgpackRawToStringToRawEtc", TestMsgpackRawToStringToRawEtc)
 	t.Run("TestMsgpackStructKeyType", TestMsgpackStructKeyType)
 	t.Run("TestMsgpackPreferArrayOverSlice", TestMsgpackPreferArrayOverSlice)
+	t.Run("TestMsgpackZeroCopyBytes", TestMsgpackZeroCopyBytes)
 
 	t.Run("TestMsgpackDecodeMapAndExtSizeMismatch", TestMsgpackDecodeMapAndExtSizeMismatch)
 }
@@ -320,6 +325,7 @@ func testSimpleGroup(t *testing.T) {
 	t.Run("TestSimpleRawToStringToRawEtc", TestSimpleRawToStringToRawEtc)
 	t.Run("TestSimpleStructKeyType", TestSimpleStructKeyType)
 	t.Run("TestSimplePreferArrayOverSlice", TestSimplePreferArrayOverSlice)
+	t.Run("TestSimpleZeroCopyBytes", TestSimpleZeroCopyBytes)
 }
 
 func testSimpleMammothGroup(t *testing.T) {