Browse Source

codec: support ability to preserve missing fields

Given multiple versions of a type, where some fields are removed,
we want to support the ability for the encoding stream to remain canonical.
This means that the value which encoded to a stream of bytes before
can be recovered into a different version of the type.

This is supported by implementing the MissingFielder interface.

Note that the MissingFielder interface is completely ignored during codecgen.

Updates #258
Ugorji Nwoke 7 years ago
parent
commit
e5646b99d2
5 changed files with 128 additions and 11 deletions
  1. 7 0
      codec/codecgen.go
  2. 23 3
      codec/decode.go
  3. 57 2
      codec/encode.go
  4. 2 0
      codec/gen.go
  5. 39 6
      codec/helper.go

+ 7 - 0
codec/codecgen.go

@@ -0,0 +1,7 @@
+// +build codecgen generated
+
+package codecgen
+
+func init() {
+	codecgen = true
+}

+ 23 - 3
codec/decode.go

@@ -1167,6 +1167,12 @@ func (d *Decoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 	elemsep := d.esep
 	elemsep := d.esep
 	sfn := structFieldNode{v: rv, update: true}
 	sfn := structFieldNode{v: rv, update: true}
 	ctyp := dd.ContainerType()
 	ctyp := dd.ContainerType()
+	var mf MissingFielder
+	if fti.mf {
+		mf = rv2i(rv).(MissingFielder)
+	} else if fti.mfp {
+		mf = rv2i(rv.Addr()).(MissingFielder)
+	}
 	if ctyp == valueTypeMap {
 	if ctyp == valueTypeMap {
 		containerLen := dd.ReadMapStart()
 		containerLen := dd.ReadMapStart()
 		if containerLen == 0 {
 		if containerLen == 0 {
@@ -1192,6 +1198,12 @@ func (d *Decoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 				} else {
 				} else {
 					d.decodeValue(sfn.field(si), nil, true)
 					d.decodeValue(sfn.field(si), nil, true)
 				}
 				}
+			} else if mf != nil {
+				var f interface{}
+				d.decode(&f)
+				if !mf.CodecMissingField(rvkencname, f) && d.h.ErrorIfNoField {
+					d.errorf("no matching struct field found when decoding stream map with key " + stringView(rvkencname))
+				}
 			} else {
 			} else {
 				d.structFieldNotFound(-1, stringView(rvkencname))
 				d.structFieldNotFound(-1, stringView(rvkencname))
 			}
 			}
@@ -1207,8 +1219,13 @@ func (d *Decoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 		// Not much gain from doing it two ways for array.
 		// Not much gain from doing it two ways for array.
 		// Arrays are not used as much for structs.
 		// Arrays are not used as much for structs.
 		hasLen := containerLen >= 0
 		hasLen := containerLen >= 0
+		var checkbreak bool
 		for j, si := range fti.sfiSrc {
 		for j, si := range fti.sfiSrc {
-			if (hasLen && j == containerLen) || (!hasLen && dd.CheckBreak()) {
+			if hasLen && j == containerLen {
+				break
+			}
+			if !hasLen && dd.CheckBreak() {
+				checkbreak = true
 				break
 				break
 			}
 			}
 			if elemsep {
 			if elemsep {
@@ -1220,9 +1237,12 @@ func (d *Decoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 				d.decodeValue(sfn.field(si), nil, true)
 				d.decodeValue(sfn.field(si), nil, true)
 			}
 			}
 		}
 		}
-		if containerLen > len(fti.sfiSrc) {
+		if (hasLen && containerLen > len(fti.sfiSrc)) || (!hasLen && !checkbreak) {
 			// read remaining values and throw away
 			// read remaining values and throw away
-			for j := len(fti.sfiSrc); j < containerLen; j++ {
+			for j := len(fti.sfiSrc); ; j++ {
+				if (hasLen && j == containerLen) || (!hasLen && dd.CheckBreak()) {
+					break
+				}
 				if elemsep {
 				if elemsep {
 					dd.ReadArrayElem()
 					dd.ReadArrayElem()
 				}
 				}

+ 57 - 2
codec/encode.go

@@ -541,16 +541,49 @@ func (e *Encoder) kStructFieldKey(keyType valueType, s *structFieldInfo) {
 	}
 	}
 }
 }
 
 
+func (e *Encoder) kStructFieldKeyName(keyType valueType, encName string) {
+	var m must
+	// use if-else-if, not switch (which compiles to binary-search)
+	// since keyType is typically valueTypeString, branch prediction is pretty good.
+	if keyType == valueTypeString {
+		e.e.EncodeString(cUTF8, encName)
+	} else if keyType == valueTypeInt {
+		e.e.EncodeInt(m.Int(strconv.ParseInt(encName, 10, 64)))
+	} else if keyType == valueTypeUint {
+		e.e.EncodeUint(m.Uint(strconv.ParseUint(encName, 10, 64)))
+	} else if keyType == valueTypeFloat {
+		e.e.EncodeFloat64(m.Float(strconv.ParseFloat(encName, 64)))
+	}
+}
+
 func (e *Encoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 func (e *Encoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 	fti := f.ti
 	fti := f.ti
 	elemsep := e.esep
 	elemsep := e.esep
 	tisfi := fti.sfiSrc
 	tisfi := fti.sfiSrc
+	var newlen int
 	toMap := !(fti.toArray || e.h.StructToArray)
 	toMap := !(fti.toArray || e.h.StructToArray)
+	var mf []MissingFieldPair
+	if f.ti.mf {
+		mf = rv2i(rv).(MissingFielder).CodecMissingFields()
+		toMap = true
+		newlen += len(mf)
+	} else if f.ti.mfp {
+		if rv.CanAddr() {
+			mf = rv2i(rv.Addr()).(MissingFielder).CodecMissingFields()
+		} else {
+			// make a new addressable value of same one, and use it
+			rv2 := reflect.New(rv.Type())
+			rv2.Elem().Set(rv)
+			mf = rv2i(rv2).(MissingFielder).CodecMissingFields()
+		}
+		toMap = true
+		newlen += len(mf)
+	}
 	// if toMap, use the sorted array. If toArray, use unsorted array (to match sequence in struct)
 	// if toMap, use the sorted array. If toArray, use unsorted array (to match sequence in struct)
 	if toMap {
 	if toMap {
 		tisfi = fti.sfiSort
 		tisfi = fti.sfiSort
 	}
 	}
-	newlen := len(fti.sfiSort)
+	newlen += len(tisfi)
 	ee := e.e
 	ee := e.e
 
 
 	// Use sync.Pool to reduce allocating slices unnecessarily.
 	// Use sync.Pool to reduce allocating slices unnecessarily.
@@ -614,8 +647,20 @@ func (e *Encoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 		newlen++
 		newlen++
 	}
 	}
 
 
+	var mflen int
+	for i := range mf {
+		if mf[i].Field == "" {
+			continue
+		}
+		if fti.infoFieldOmitempty && isEmptyValue(reflect.ValueOf(mf[i].Value), e.h.TypeInfos, recur, recur) {
+			mf[i].Field = ""
+			continue
+		}
+		mflen++
+	}
+
 	if toMap {
 	if toMap {
-		ee.WriteMapStart(newlen)
+		ee.WriteMapStart(newlen + mflen)
 		if elemsep {
 		if elemsep {
 			for j := 0; j < newlen; j++ {
 			for j := 0; j < newlen; j++ {
 				kv = fkvs[j]
 				kv = fkvs[j]
@@ -633,6 +678,16 @@ func (e *Encoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 				e.encodeValue(kv.r, nil, true)
 				e.encodeValue(kv.r, nil, true)
 			}
 			}
 		}
 		}
+		// now, add the others
+		for i := range mf {
+			if mf[i].Field == "" {
+				continue
+			}
+			ee.WriteMapElemKey()
+			e.kStructFieldKeyName(fti.keyType, mf[i].Field)
+			ee.WriteMapElemValue()
+			e.encode(mf[i].Value)
+		}
 		ee.WriteMapEnd()
 		ee.WriteMapEnd()
 	} else {
 	} else {
 		ee.WriteArrayStart(newlen)
 		ee.WriteArrayStart(newlen)

+ 2 - 0
codec/gen.go

@@ -41,6 +41,8 @@ import (
 // However, codecgen doesn't support the following:
 // However, codecgen doesn't support the following:
 //   - Canonical option. (codecgen IGNORES it currently)
 //   - Canonical option. (codecgen IGNORES it currently)
 //     This is just because it has not been implemented.
 //     This is just because it has not been implemented.
+//   - MissingFielder implementation.
+//     If a type implements MissingFielder, it is completely ignored by codecgen.
 //
 //
 // During encode/decode, Selfer takes precedence.
 // During encode/decode, Selfer takes precedence.
 // A type implementing Selfer will know how to encode/decode itself statically.
 // A type implementing Selfer will know how to encode/decode itself statically.

+ 39 - 6
codec/helper.go

@@ -143,6 +143,8 @@ var (
 	zeroByteSlice = oneByteArr[:0:0]
 	zeroByteSlice = oneByteArr[:0:0]
 )
 )
 
 
+var codecgen bool
+
 var refBitset bitset32
 var refBitset bitset32
 var pool pooler
 var pool pooler
 var panicv panicHdl
 var panicv panicHdl
@@ -327,8 +329,9 @@ var (
 	jsonMarshalerTyp   = reflect.TypeOf((*jsonMarshaler)(nil)).Elem()
 	jsonMarshalerTyp   = reflect.TypeOf((*jsonMarshaler)(nil)).Elem()
 	jsonUnmarshalerTyp = reflect.TypeOf((*jsonUnmarshaler)(nil)).Elem()
 	jsonUnmarshalerTyp = reflect.TypeOf((*jsonUnmarshaler)(nil)).Elem()
 
 
-	selferTyp = reflect.TypeOf((*Selfer)(nil)).Elem()
-	iszeroTyp = reflect.TypeOf((*isZeroer)(nil)).Elem()
+	selferTyp         = reflect.TypeOf((*Selfer)(nil)).Elem()
+	missingFielderTyp = reflect.TypeOf((*MissingFielder)(nil)).Elem()
+	iszeroTyp         = reflect.TypeOf((*isZeroer)(nil)).Elem()
 
 
 	uint8TypId      = rt2id(uint8Typ)
 	uint8TypId      = rt2id(uint8Typ)
 	uint8SliceTypId = rt2id(uint8SliceTyp)
 	uint8SliceTypId = rt2id(uint8SliceTyp)
@@ -400,6 +403,31 @@ type Selfer interface {
 	CodecDecodeSelf(*Decoder)
 	CodecDecodeSelf(*Decoder)
 }
 }
 
 
+// MissingFieldPair is a convenience value composed of the field name and the value of the field.
+type MissingFieldPair struct {
+	Field string
+	Value interface{}
+}
+
+// MissingFielder defines the interface allowing structs to internally decode or encode
+// values which do not map to struct fields.
+//
+// We expect that this interface is bound to a pointer type (so the mutation function works).
+//
+// A use-case is if a version of a type unexports a field, but you want compatibility between
+// both versions during encoding and decoding.
+//
+// Note that the interface is completely ignored during codecgen.
+type MissingFielder interface {
+	// CodecMissingField is called to set a missing field and value pair.
+	//
+	// It returns true if the missing field was set on the struct.
+	CodecMissingField(field []byte, value interface{}) bool
+
+	// CodecMissingFields returns the set of fields which are not struct fields
+	CodecMissingFields() []MissingFieldPair
+}
+
 // MapBySlice is a tag interface that denotes wrapped slice should encode as a map in the stream.
 // MapBySlice is a tag interface that denotes wrapped slice should encode as a map in the stream.
 // The slice contains a sequence of key-value pairs.
 // The slice contains a sequence of key-value pairs.
 // This affords storing a map in a specific sequence in the stream.
 // This affords storing a map in a specific sequence in the stream.
@@ -1066,12 +1094,15 @@ type typeInfo struct {
 	jup bool // *T is a jsonUnmarshaler
 	jup bool // *T is a jsonUnmarshaler
 	cs  bool // T is a Selfer
 	cs  bool // T is a Selfer
 	csp bool // *T is a Selfer
 	csp bool // *T is a Selfer
+	mf  bool // T is a MissingFielder
+	mfp bool // *T is a MissingFielder
 
 
 	// other flags, with individual bits representing if set.
 	// other flags, with individual bits representing if set.
-	flags typeInfoFlag
+	flags              typeInfoFlag
+	infoFieldOmitempty bool
 
 
-	// _ [2]byte   // padding
-	_ [3]uint64 // padding
+	_ [6]byte   // padding
+	_ [2]uint64 // padding
 }
 }
 
 
 func (ti *typeInfo) isFlag(f typeInfoFlag) bool {
 func (ti *typeInfo) isFlag(f typeInfoFlag) bool {
@@ -1191,6 +1222,7 @@ func (x *TypeInfos) get(rtid uintptr, rt reflect.Type) (pti *typeInfo) {
 	ti.jm, ti.jmp = implIntf(rt, jsonMarshalerTyp)
 	ti.jm, ti.jmp = implIntf(rt, jsonMarshalerTyp)
 	ti.ju, ti.jup = implIntf(rt, jsonUnmarshalerTyp)
 	ti.ju, ti.jup = implIntf(rt, jsonUnmarshalerTyp)
 	ti.cs, ti.csp = implIntf(rt, selferTyp)
 	ti.cs, ti.csp = implIntf(rt, selferTyp)
+	ti.mf, ti.mfp = implIntf(rt, missingFielderTyp)
 
 
 	b1, b2 := implIntf(rt, iszeroTyp)
 	b1, b2 := implIntf(rt, iszeroTyp)
 	if b1 {
 	if b1 {
@@ -1208,6 +1240,7 @@ func (x *TypeInfos) get(rtid uintptr, rt reflect.Type) (pti *typeInfo) {
 		var omitEmpty bool
 		var omitEmpty bool
 		if f, ok := rt.FieldByName(structInfoFieldName); ok {
 		if f, ok := rt.FieldByName(structInfoFieldName); ok {
 			ti.toArray, omitEmpty, ti.keyType = parseStructInfo(x.structTag(f.Tag))
 			ti.toArray, omitEmpty, ti.keyType = parseStructInfo(x.structTag(f.Tag))
+			ti.infoFieldOmitempty = omitEmpty
 		} else {
 		} else {
 			ti.keyType = valueTypeString
 			ti.keyType = valueTypeString
 		}
 		}
@@ -1852,7 +1885,7 @@ func (c *codecFner) get(rt reflect.Type, checkFastpath, checkCodecSelfer bool) (
 				}
 				}
 				// fn.fd = (*Decoder).kArray
 				// fn.fd = (*Decoder).kArray
 			case reflect.Struct:
 			case reflect.Struct:
-				if ti.anyOmitEmpty {
+				if ti.anyOmitEmpty || ti.mf || ti.mfp {
 					fn.fe = (*Encoder).kStruct
 					fn.fe = (*Encoder).kStruct
 				} else {
 				} else {
 					fn.fe = (*Encoder).kStructNoOmitempty
 					fn.fe = (*Encoder).kStructNoOmitempty