Browse Source

codec: support ZeroCopy for allowing decoded output point into input bytes

ZeroCopy controls whether decoded values point into the
input bytes passed into a NewDecoderBytes/ResetBytes(...) call.

To illustrate, if ZeroCopy and decoding from a []byte (not io.Writer),
then a []byte in the output result may just be a slice of (point into)
the input bytes.

This optimization prevents unnecessary copying.

However, it is made optional, as the caller MUST ensure that the input parameter
is not modified after the Decode() happens.

Fixes #312
Ugorji Nwoke 6 years ago
parent
commit
42bc974514
8 changed files with 112 additions and 24 deletions
  1. 2 0
      codec/bench/z_all_bench_test.go
  2. 5 6
      codec/binc.go
  3. 5 6
      codec/cbor.go
  4. 61 0
      codec/codec_test.go
  5. 23 0
      codec/decode.go
  6. 5 6
      codec/msgpack.go
  7. 5 6
      codec/simple.go
  8. 6 0
      codec/z_all_test.go

+ 2 - 0
codec/bench/z_all_bench_test.go

@@ -42,6 +42,8 @@ func benchmarkGroupReset() {
 
 	testDepth = benchmarkGroupSave.testDepth
 	testMapStringKeyOnly = benchmarkGroupSave.testMapStringKeyOnly
+
+	testDecodeOptions.ZeroCopy = true
 }
 
 func benchmarkOneFn(fns []func(*testing.B)) func(*testing.B) {

+ 5 - 6
codec/binc.go

@@ -829,12 +829,11 @@ func (d *bincDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 		return
 	}
 	d.bdRead = false
-	if zerocopy {
-		if d.d.bytes {
-			return d.d.decRd.rb.readx(uint(clen))
-		} else if len(bs) == 0 {
-			bs = d.d.b[:]
-		}
+	if d.d.bytes && (zerocopy || d.h.ZeroCopy) {
+		return d.d.decRd.rb.readx(uint(clen))
+	}
+	if zerocopy && len(bs) == 0 {
+		bs = d.d.b[:]
 	}
 	return decByteSlice(d.d.r(), clen, d.d.h.MaxInitLen, bs)
 }

+ 5 - 6
codec/cbor.go

@@ -663,12 +663,11 @@ func (d *cborDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 	}
 	clen := d.decLen()
 	d.bdRead = false
-	if zerocopy {
-		if d.d.bytes {
-			return d.d.decRd.rb.readx(uint(clen))
-		} else if len(bs) == 0 {
-			bs = d.d.b[:]
-		}
+	if d.d.bytes && (zerocopy || d.h.ZeroCopy) {
+		return d.d.decRd.rb.readx(uint(clen))
+	}
+	if zerocopy && len(bs) == 0 {
+		bs = d.d.b[:]
 	}
 	return decByteSlice(d.d.r(), clen, d.h.MaxInitLen, bs)
 }

+ 61 - 0
codec/codec_test.go

@@ -3133,6 +3133,47 @@ func doTestPreferArrayOverSlice(t *testing.T, h Handle) {
 	testDeepEqualErr(s2, v, t, t.Name())
 }
 
+func doTestZeroCopyBytes(t *testing.T, h Handle) {
+	testOnce.Do(testInitAll)
+	// jsonhandle and cborhandle with indefiniteLength do not support inline bytes, so skip them.
+	if _, ok := h.(*JsonHandle); ok { // if h == testJsonH {
+		t.Skip()
+	}
+	if ch, ok := h.(*CborHandle); ok && ch.IndefiniteLength {
+		t.Skip()
+	}
+
+	bh := basicHandle(h)
+	zc := bh.ZeroCopy
+	defer func() {
+		bh.ZeroCopy = zc
+	}()
+	bh.ZeroCopy = true
+
+	s := []byte("hello")
+	var v []byte
+	bs := testMarshalErr(s, h, t, t.Name())
+
+	// Note: this test only works for decoding from []byte, so cannot use testUnmarshalErr
+	NewDecoderBytes(bs, h).MustDecode(&v)
+	// testUnmarshalErr(&v, bs, h, t, t.Name())
+
+	// validate that bs and s points into the bs stream
+	for i := range bs {
+		if &bs[i] == &v[0] {
+			return
+		}
+	}
+
+	// if not match, then a failure happened.
+	if len(bs) > 0 && len(v) > 0 {
+		t.Logf("%s: ZeroCopy=true, but decoded (%p) is not slice of input: (%p)", h.Name(), &v[0], &bs[0])
+	} else {
+		t.Logf("%s: ZeroCopy=true, but decoded OR input slice is empty: %v, %v", h.Name(), v, bs)
+	}
+	t.FailNow()
+}
+
 func TestBufioDecReader(t *testing.T) {
 	testOnce.Do(testInitAll)
 	doTestBufioDecReader(t, 13)
@@ -4087,6 +4128,26 @@ func TestSimplePreferArrayOverSlice(t *testing.T) {
 	doTestPreferArrayOverSlice(t, testSimpleH)
 }
 
+func TestJsonZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testJsonH)
+}
+
+func TestCborZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testCborH)
+}
+
+func TestMsgpackZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testMsgpackH)
+}
+
+func TestBincZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testBincH)
+}
+
+func TestSimpleZeroCopyBytes(t *testing.T) {
+	doTestZeroCopyBytes(t, testSimpleH)
+}
+
 // --------
 
 func TestMultipleEncDec(t *testing.T) {

+ 23 - 0
codec/decode.go

@@ -269,6 +269,19 @@ type DecodeOptions struct {
 	// RawToString controls how raw bytes in a stream are decoded into a nil interface{}.
 	// By default, they are decoded as []byte, but can be decoded as string (if configured).
 	RawToString bool
+
+	// ZeroCopy controls whether decoded values point into the
+	// input bytes passed into a NewDecoderBytes/ResetBytes(...) call.
+	//
+	// To illustrate, if ZeroCopy and decoding from a []byte (not io.Writer),
+	// then a []byte in the output result may just be a slice of (point into)
+	// the input bytes.
+	//
+	// This optimization prevents unnecessary copying.
+	//
+	// However, it is made optional, as the caller MUST ensure that the input parameter
+	// is not modified after the Decode() happens.
+	ZeroCopy bool
 }
 
 // ----------------------------------------
@@ -1819,6 +1832,16 @@ func (d *Decoder) sideDecode(v interface{}, bs []byte) {
 	NewDecoderBytes(bs, d.hh).decodeValue(rv, d.h.fnNoExt(rv.Type()))
 }
 
+// func (d *Decoder) bytesInline(clen int, bs []byte, zerocopy bool) []byte {
+// 	if d.bytes && (zerocopy || d.h.ZeroCopy) {
+// 		return d.decRd.rb.readx(uint(clen))
+// 	}
+// 	if zerocopy && len(bs) == 0 {
+// 		bs = d.b[:]
+// 	}
+// 	return decByteSlice(d.r(), clen, d.h.MaxInitLen, bs)
+// }
+
 // --------------------------------------------------
 
 // decSliceHelper assists when decoding into a slice, from a map or an array in the stream.

+ 5 - 6
codec/msgpack.go

@@ -762,12 +762,11 @@ func (d *msgpackDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte)
 	}
 
 	d.bdRead = false
-	if zerocopy {
-		if d.d.bytes {
-			return d.d.decRd.rb.readx(uint(clen))
-		} else if len(bs) == 0 {
-			bs = d.d.b[:]
-		}
+	if d.d.bytes && (zerocopy || d.h.ZeroCopy) {
+		return d.d.decRd.rb.readx(uint(clen))
+	}
+	if zerocopy && len(bs) == 0 {
+		bs = d.d.b[:]
 	}
 	return decByteSlice(d.d.r(), clen, d.h.MaxInitLen, bs)
 }

+ 5 - 6
codec/simple.go

@@ -442,12 +442,11 @@ func (d *simpleDecDriver) DecodeBytes(bs []byte, zerocopy bool) (bsOut []byte) {
 
 	clen := d.decLen()
 	d.bdRead = false
-	if zerocopy {
-		if d.d.bytes {
-			return d.d.decRd.rb.readx(uint(clen))
-		} else if len(bs) == 0 {
-			bs = d.d.b[:]
-		}
+	if d.d.bytes && (zerocopy || d.h.ZeroCopy) {
+		return d.d.decRd.rb.readx(uint(clen))
+	}
+	if zerocopy && len(bs) == 0 {
+		bs = d.d.b[:]
 	}
 	return decByteSlice(d.d.r(), clen, d.d.h.MaxInitLen, bs)
 }

+ 6 - 0
codec/z_all_test.go

@@ -60,6 +60,7 @@ func testSuite(t *testing.T, f func(t *testing.T)) {
 	testUseReset = true
 
 	// xdebugf("with StructToArray=true")
+	testDecodeOptions.ZeroCopy = true
 	testDecodeOptions.InternString = true
 	testDecodeOptions.MapValueReset = true
 	// testDecodeOptions.SignedInteger = true
@@ -180,6 +181,7 @@ func testJsonGroup(t *testing.T) {
 	t.Run("TestJsonRawToStringToRawEtc", TestJsonRawToStringToRawEtc)
 	t.Run("TestJsonStructKeyType", TestJsonStructKeyType)
 	t.Run("TestJsonPreferArrayOverSlice", TestJsonPreferArrayOverSlice)
+	t.Run("TestJsonZeroCopyBytes", TestJsonZeroCopyBytes)
 
 	t.Run("TestJsonInvalidUnicode", TestJsonInvalidUnicode)
 }
@@ -216,6 +218,7 @@ func testBincGroup(t *testing.T) {
 	t.Run("TestBincRawToStringToRawEtc", TestBincRawToStringToRawEtc)
 	t.Run("TestBincStructKeyType", TestBincStructKeyType)
 	t.Run("TestBincPreferArrayOverSlice", TestBincPreferArrayOverSlice)
+	t.Run("TestBincZeroCopyBytes", TestBincZeroCopyBytes)
 }
 
 func testCborGroup(t *testing.T) {
@@ -250,6 +253,7 @@ func testCborGroup(t *testing.T) {
 	t.Run("TestCborRawToStringToRawEtc", TestCborRawToStringToRawEtc)
 	t.Run("TestCborStructKeyType", TestCborStructKeyType)
 	t.Run("TestCborPreferArrayOverSlice", TestCborPreferArrayOverSlice)
+	t.Run("TestCborZeroCopyBytes", TestCborZeroCopyBytes)
 
 	t.Run("TestCborHalfFloat", TestCborHalfFloat)
 	t.Run("TestCborSkipTags", TestCborSkipTags)
@@ -286,6 +290,7 @@ func testMsgpackGroup(t *testing.T) {
 	t.Run("TestMsgpackRawToStringToRawEtc", TestMsgpackRawToStringToRawEtc)
 	t.Run("TestMsgpackStructKeyType", TestMsgpackStructKeyType)
 	t.Run("TestMsgpackPreferArrayOverSlice", TestMsgpackPreferArrayOverSlice)
+	t.Run("TestMsgpackZeroCopyBytes", TestMsgpackZeroCopyBytes)
 
 	t.Run("TestMsgpackDecodeMapAndExtSizeMismatch", TestMsgpackDecodeMapAndExtSizeMismatch)
 }
@@ -320,6 +325,7 @@ func testSimpleGroup(t *testing.T) {
 	t.Run("TestSimpleRawToStringToRawEtc", TestSimpleRawToStringToRawEtc)
 	t.Run("TestSimpleStructKeyType", TestSimpleStructKeyType)
 	t.Run("TestSimplePreferArrayOverSlice", TestSimplePreferArrayOverSlice)
+	t.Run("TestSimpleZeroCopyBytes", TestSimpleZeroCopyBytes)
 }
 
 func testSimpleMammothGroup(t *testing.T) {