Forráskód Böngészése

codec: support Raw, a raw encoded value.

This supports the use-case where you have a pre-formatted value to be encoded,
or you want to read a pre-formatted value without decoding it explicitly into something.

A use-case during decode is for delayed decoding.

Fixes #169
Ugorji Nwoke 9 éve
szülő
commit
98ef79d6c6

+ 7 - 0
codec/binc.go

@@ -348,6 +348,13 @@ func (d *bincDecDriver) readNextBd() {
 	d.bdRead = true
 }
 
+func (d *bincDecDriver) uncacheRead() {
+	if d.bdRead {
+		d.r.unreadn1()
+		d.bdRead = false
+	}
+}
+
 func (d *bincDecDriver) ContainerType() (vt valueType) {
 	if d.vd == bincVdSpecial && d.vs == bincSpNil {
 		return valueTypeNil

+ 7 - 0
codec/cbor.go

@@ -188,6 +188,13 @@ func (d *cborDecDriver) readNextBd() {
 	d.bdRead = true
 }
 
+func (d *cborDecDriver) uncacheRead() {
+	if d.bdRead {
+		d.r.unreadn1()
+		d.bdRead = false
+	}
+}
+
 func (d *cborDecDriver) ContainerType() (vt valueType) {
 	if d.bd == cborBdNil {
 		return valueTypeNil

+ 58 - 0
codec/codec_test.go

@@ -172,6 +172,11 @@ func (r *TestRpcInt) Echo123(args []string, res *string) error {
 	return nil
 }
 
+type TestRawValue struct {
+	R Raw
+	I int
+}
+
 type testUnixNanoTimeExt struct {
 	// keep timestamp here, so that do not incur interface-conversion costs
 	ts int64
@@ -1132,6 +1137,42 @@ func doTestJsonLargeInteger(t *testing.T, v interface{}, ias uint8) {
 	}
 }
 
+func doTestRawValue(t *testing.T, name string, h Handle) {
+	bh := h.getBasicHandle()
+	if !bh.Raw {
+		bh.Raw = true
+		defer func() { bh.Raw = false }()
+	}
+
+	var i, i2 int
+	var v, v2 TestRawValue
+	var bs, bs2 []byte
+
+	i = 1234 //1234567890
+	v = TestRawValue{I: i}
+	e := NewEncoderBytes(&bs, h)
+	e.MustEncode(v.I)
+	logT(t, ">>> raw: %v\n", bs)
+
+	v.R = Raw(bs)
+	e.ResetBytes(&bs2)
+	e.MustEncode(v)
+
+	logT(t, ">>> bs2: %v\n", bs2)
+	d := NewDecoderBytes(bs2, h)
+	d.MustDecode(&v2)
+	d.ResetBytes(v2.R)
+	logT(t, ">>> v2.R: %v\n", ([]byte)(v2.R))
+	d.MustDecode(&i2)
+
+	logT(t, ">>> Encoded %v, decoded %v\n", i, i2)
+	// logT(t, "Encoded %v, decoded %v", i, i2)
+	if i != i2 {
+		logT(t, "Error: encoded %v, decoded %v", i, i2)
+		t.FailNow()
+	}
+}
+
 // Comprehensive testing that generates data encoded from python handle (cbor, msgpack),
 // and validates that our code can read and write it out accordingly.
 // We keep this unexported here, and put actual test in ext_dep_test.go.
@@ -1371,6 +1412,23 @@ func TestJsonStdEncIntf(t *testing.T) {
 	doTestStdEncIntf(t, "json", testJsonH)
 }
 
+// ----- Raw ---------
+func TestJsonRaw(t *testing.T) {
+	doTestRawValue(t, "json", testJsonH)
+}
+func TestBincRaw(t *testing.T) {
+	doTestRawValue(t, "binc", testBincH)
+}
+func TestMsgpackRaw(t *testing.T) {
+	doTestRawValue(t, "msgpack", testMsgpackH)
+}
+func TestSimpleRaw(t *testing.T) {
+	doTestRawValue(t, "simple", testSimpleH)
+}
+func TestCborRaw(t *testing.T) {
+	doTestRawValue(t, "cbor", testCborH)
+}
+
 // ----- ALL (framework based) -----
 
 func TestAllEncCircularRef(t *testing.T) {

+ 25 - 3
codec/decode.go

@@ -91,10 +91,12 @@ type decDriver interface {
 	uncacheRead()
 }
 
-type decNoSeparator struct{}
+type decNoSeparator struct {
+}
+
+func (_ decNoSeparator) ReadEnd() {}
 
-func (_ decNoSeparator) ReadEnd()     {}
-func (_ decNoSeparator) uncacheRead() {}
+// func (_ decNoSeparator) uncacheRead() {}
 
 type DecodeOptions struct {
 	// MapType specifies type to use during schema-less decoding of a map in the stream.
@@ -433,6 +435,10 @@ func (f *decFnInfo) rawExt(rv reflect.Value) {
 	f.d.d.DecodeExt(rv.Addr().Interface(), 0, nil)
 }
 
+func (f *decFnInfo) raw(rv reflect.Value) {
+	rv.SetBytes(f.d.raw())
+}
+
 func (f *decFnInfo) ext(rv reflect.Value) {
 	f.d.d.DecodeExt(rv.Addr().Interface(), f.xfTag, f.xfFn)
 }
@@ -1507,6 +1513,8 @@ func (d *Decoder) decode(iv interface{}) {
 			*v = 0
 		case *[]uint8:
 			*v = nil
+		case *Raw:
+			*v = nil
 		case reflect.Value:
 			if v.Kind() != reflect.Ptr || v.IsNil() {
 				d.errNotValidPtrValue(v)
@@ -1576,6 +1584,9 @@ func (d *Decoder) decode(iv interface{}) {
 	case *[]uint8:
 		*v = d.d.DecodeBytes(*v, false, false)
 
+	case *Raw:
+		*v = d.raw()
+
 	case *interface{}:
 		d.decodeValueNotNil(reflect.ValueOf(iv).Elem(), nil)
 
@@ -1697,6 +1708,8 @@ func (d *Decoder) getDecFn(rt reflect.Type, checkFastpath, checkCodecSelfer bool
 		fn.f = (*decFnInfo).selferUnmarshal
 	} else if rtid == rawExtTypId {
 		fn.f = (*decFnInfo).rawExt
+	} else if rtid == rawTypId {
+		fn.f = (*decFnInfo).raw
 	} else if d.d.IsBuiltinType(rtid) {
 		fn.f = (*decFnInfo).builtin
 	} else if xfFn := d.h.getExt(rtid); xfFn != nil {
@@ -1873,6 +1886,15 @@ func (d *Decoder) nextValueBytes() []byte {
 	return d.r.stopTrack()
 }
 
+func (d *Decoder) raw() []byte {
+	// ensure that this is not a view into the bytes
+	// i.e. make new copy always.
+	bs := d.nextValueBytes()
+	bs2 := make([]byte, len(bs))
+	copy(bs2, bs)
+	return bs2
+}
+
 // --------------------------------------------------
 
 // decSliceHelper assists when decoding into a slice, from a map or an array in the stream.

+ 27 - 1
codec/encode.go

@@ -125,6 +125,13 @@ type EncodeOptions struct {
 	// Note that this may make OmitEmpty more expensive, as it incurs a lot more reflect calls.
 	RecursiveEmptyCheck bool
 
+	// Raw controls whether we encode Raw values.
+	// This is a "dangerous" option and must be explicitly set.
+	// If set, we blindly encode Raw values as-is, without checking
+	// if they are a correct representation of a value in that format.
+	// If unset, we error out.
+	Raw bool
+
 	// AsSymbols defines what should be encoded as symbols.
 	//
 	// Encoding as symbols can reduce the encoded size significantly.
@@ -288,6 +295,10 @@ func (f *encFnInfo) builtin(rv reflect.Value) {
 	f.e.e.EncodeBuiltin(f.ti.rtid, rv.Interface())
 }
 
+func (f *encFnInfo) raw(rv reflect.Value) {
+	f.e.raw(rv.Interface().(Raw))
+}
+
 func (f *encFnInfo) rawExt(rv reflect.Value) {
 	// rev := rv.Interface().(RawExt)
 	// f.e.e.EncodeRawExt(&rev, f.e)
@@ -1073,7 +1084,8 @@ func (e *Encoder) encode(iv interface{}) {
 		e.e.EncodeNil()
 	case Selfer:
 		v.CodecEncodeSelf(e)
-
+	case Raw:
+		e.raw(v)
 	case reflect.Value:
 		e.encodeValue(v, nil)
 
@@ -1258,6 +1270,8 @@ func (e *Encoder) getEncFn(rtid uintptr, rt reflect.Type, checkFastpath, checkCo
 
 	if checkCodecSelfer && ti.cs {
 		fn.f = (*encFnInfo).selferMarshal
+	} else if rtid == rawTypId {
+		fn.f = (*encFnInfo).raw
 	} else if rtid == rawExtTypId {
 		fn.f = (*encFnInfo).rawExt
 	} else if e.e.IsBuiltinType(rtid) {
@@ -1362,6 +1376,18 @@ func (e *Encoder) asis(v []byte) {
 	}
 }
 
+func (e *Encoder) raw(vv Raw) {
+	v := []byte(vv)
+	if !e.h.Raw {
+		e.errorf("Raw values cannot be encoded: %v", v)
+	}
+	if e.as == nil {
+		e.w.writeb(v)
+	} else {
+		e.as.EncodeAsis(v)
+	}
+}
+
 func (e *Encoder) errorf(format string, params ...interface{}) {
 	err := fmt.Errorf(format, params...)
 	panic(err)

+ 10 - 0
codec/gen-helper.generated.go

@@ -83,6 +83,11 @@ func (f genHelperEncoder) EncBinaryMarshal(iv encoding.BinaryMarshaler) {
 	f.e.marshal(bs, fnerr, false, c_RAW)
 }
 
+// FOR USE BY CODECGEN ONLY. IT *WILL* CHANGE WITHOUT NOTICE. *DO NOT USE*
+func (f genHelperEncoder) EncRaw(iv Raw) {
+	f.e.raw(iv)
+}
+
 // FOR USE BY CODECGEN ONLY. IT *WILL* CHANGE WITHOUT NOTICE. *DO NOT USE*
 func (f genHelperEncoder) TimeRtidIfBinc() uintptr {
 	if _, ok := f.e.hh.(*BincHandle); ok {
@@ -191,6 +196,11 @@ func (f genHelperDecoder) DecBinaryUnmarshal(bm encoding.BinaryUnmarshaler) {
 	}
 }
 
+// FOR USE BY CODECGEN ONLY. IT *WILL* CHANGE WITHOUT NOTICE. *DO NOT USE*
+func (f genHelperDecoder) DecRaw() []byte {
+	return f.d.raw()
+}
+
 // FOR USE BY CODECGEN ONLY. IT *WILL* CHANGE WITHOUT NOTICE. *DO NOT USE*
 func (f genHelperDecoder) TimeRtidIfBinc() uintptr {
 	if _, ok := f.d.hh.(*BincHandle); ok {

+ 8 - 0
codec/gen-helper.go.tmpl

@@ -79,6 +79,10 @@ func (f genHelperEncoder) EncBinaryMarshal(iv encoding.BinaryMarshaler) {
 	f.e.marshal(bs, fnerr, false, c_RAW)
 }
 // FOR USE BY CODECGEN ONLY. IT *WILL* CHANGE WITHOUT NOTICE. *DO NOT USE*
+func (f genHelperEncoder) EncRaw(iv Raw) {
+	f.e.raw(iv)
+}
+// FOR USE BY CODECGEN ONLY. IT *WILL* CHANGE WITHOUT NOTICE. *DO NOT USE*
 func (f genHelperEncoder) TimeRtidIfBinc() uintptr {
 	if _, ok := f.e.hh.(*BincHandle); ok {
 		return timeTypId 
@@ -172,6 +176,10 @@ func (f genHelperDecoder) DecBinaryUnmarshal(bm encoding.BinaryUnmarshaler) {
 	}
 }
 // FOR USE BY CODECGEN ONLY. IT *WILL* CHANGE WITHOUT NOTICE. *DO NOT USE*
+func (f genHelperDecoder) DecRaw() []byte {
+	return f.d.raw()
+}
+// FOR USE BY CODECGEN ONLY. IT *WILL* CHANGE WITHOUT NOTICE. *DO NOT USE*
 func (f genHelperDecoder) TimeRtidIfBinc() uintptr {
 	if _, ok := f.d.hh.(*BincHandle); ok {
 		return timeTypId 

+ 11 - 2
codec/gen.go

@@ -27,6 +27,7 @@ import (
 // ---------------------------------------------------
 // codecgen supports the full cycle of reflection-based codec:
 //    - RawExt
+//    - Raw
 //    - Builtins
 //    - Extensions
 //    - (Binary|Text|JSON)(Unm|M)arshal
@@ -701,13 +702,17 @@ func (x *genRunner) enc(varname string, t reflect.Type) {
 	}
 
 	// check if
-	//   - type is RawExt
+	//   - type is RawExt, Raw
 	//   - the type implements (Text|JSON|Binary)(Unm|M)arshal
 	x.linef("%sm%s := z.EncBinary()", genTempVarPfx, mi)
 	x.linef("_ = %sm%s", genTempVarPfx, mi)
 	x.line("if false {")           //start if block
 	defer func() { x.line("}") }() //end if block
 
+	if t == rawTyp {
+		x.linef("} else { z.EncRaw(%v)", varname)
+		return
+	}
 	if t == rawExtTyp {
 		x.linef("} else { r.EncodeRawExt(%v, e)", varname)
 		return
@@ -1131,7 +1136,7 @@ func (x *genRunner) dec(varname string, t reflect.Type) {
 	}
 
 	// check if
-	//   - type is RawExt
+	//   - type is Raw, RawExt
 	//   - the type implements (Text|JSON|Binary)(Unm|M)arshal
 	mi := x.varsfx()
 	x.linef("%sm%s := z.DecBinary()", genTempVarPfx, mi)
@@ -1139,6 +1144,10 @@ func (x *genRunner) dec(varname string, t reflect.Type) {
 	x.line("if false {")           //start if block
 	defer func() { x.line("}") }() //end if block
 
+	if t == rawTyp {
+		x.linef("} else { *%v = z.DecRaw()", varname)
+		return
+	}
 	if t == rawExtTyp {
 		x.linef("} else { r.DecodeExt(%v, 0, nil)", varname)
 		return

+ 8 - 1
codec/helper.go

@@ -266,6 +266,7 @@ var (
 	stringTyp     = reflect.TypeOf("")
 	timeTyp       = reflect.TypeOf(time.Time{})
 	rawExtTyp     = reflect.TypeOf(RawExt{})
+	rawTyp        = reflect.TypeOf(Raw{})
 	uint8SliceTyp = reflect.TypeOf([]uint8(nil))
 
 	mapBySliceTyp = reflect.TypeOf((*MapBySlice)(nil)).Elem()
@@ -283,6 +284,7 @@ var (
 
 	uint8SliceTypId = reflect.ValueOf(uint8SliceTyp).Pointer()
 	rawExtTypId     = reflect.ValueOf(rawExtTyp).Pointer()
+	rawTypId        = reflect.ValueOf(rawTyp).Pointer()
 	intfTypId       = reflect.ValueOf(intfTyp).Pointer()
 	timeTypId       = reflect.ValueOf(timeTyp).Pointer()
 	stringTypId     = reflect.ValueOf(stringTyp).Pointer()
@@ -363,6 +365,11 @@ type Handle interface {
 	isBinary() bool
 }
 
+// Raw represents raw formatted bytes.
+// We "blindly" store it during encode and store the raw bytes during decode.
+// Note: it is dangerous during encode, so we may gate the behaviour behind an Encode flag which must be explicitly set.
+type Raw []byte
+
 // RawExt represents raw unprocessed extension data.
 // Some codecs will decode extension data as a *RawExt if there is no registered extension for the tag.
 //
@@ -373,7 +380,7 @@ type RawExt struct {
 	// Data is used by codecs (e.g. binc, msgpack, simple) which do custom serialization of the types
 	Data []byte
 	// Value represents the extension, if Data is nil.
-	// Value is used by codecs (e.g. cbor) which use the format to do custom serialization of the types.
+	// Value is used by codecs (e.g. cbor, json) which use the format to do custom serialization of the types.
 	Value interface{}
 }
 

+ 7 - 0
codec/msgpack.go

@@ -561,6 +561,13 @@ func (d *msgpackDecDriver) readNextBd() {
 	d.bdRead = true
 }
 
+func (d *msgpackDecDriver) uncacheRead() {
+	if d.bdRead {
+		d.r.unreadn1()
+		d.bdRead = false
+	}
+}
+
 func (d *msgpackDecDriver) ContainerType() (vt valueType) {
 	bd := d.bd
 	if bd == mpNil {

+ 7 - 0
codec/simple.go

@@ -166,6 +166,13 @@ func (d *simpleDecDriver) readNextBd() {
 	d.bdRead = true
 }
 
+func (d *simpleDecDriver) uncacheRead() {
+	if d.bdRead {
+		d.r.unreadn1()
+		d.bdRead = false
+	}
+}
+
 func (d *simpleDecDriver) ContainerType() (vt valueType) {
 	if d.bd == simpleVdNil {
 		return valueTypeNil