Browse Source

codec: clarify and fix EncodeOptions:StringToRaw inconsistencies

Clarify and fix implementation such that we always encode UTF-8 strings as such
(including encoding.TextMarshaler, time.Format calls, struct field names, etc)
but otherwise, consult the flag StringToRaw to determine whether to encode a
string as a raw []byte or as a UTF-8 value.

During reflection path, we now consult the flag to encode appropriately.

Fixes #286
Ugorji Nwoke 6 years ago
parent
commit
95c34d148d
5 changed files with 56 additions and 50 deletions
  1. 42 24
      codec/encode.go
  2. 1 12
      codec/gen-helper.generated.go
  3. 1 12
      codec/gen-helper.go.tmpl
  4. 6 1
      codec/helper_not_unsafe.go
  5. 6 1
      codec/helper_unsafe.go

+ 42 - 24
codec/encode.go

@@ -168,9 +168,17 @@ type EncodeOptions struct {
 	// If unset, we error out.
 	Raw bool
 
-	// StringToRaw controls how strings are encoded -
-	// by default, they are encoded as UTF-8,
+	// StringToRaw controls how strings are encoded.
+	//
+	// As a go string is just an (immutable) sequence of bytes,
+	// it can be encoded either as raw bytes or as a UTF string.
+	//
+	// By default, strings are encoded as UTF-8.
 	// but can be treated as []byte during an encode.
+	//
+	// Note that things which we know (by definition) to be UTF-8
+	// are ALWAYS encoded as UTF-8 strings.
+	// These include encoding.TextMarshaler, time.Format calls, struct field names, etc.
 	StringToRaw bool
 
 	// // AsSymbols defines what should be encoded as symbols.
@@ -647,14 +655,12 @@ func (e *Encoder) kStructNoOmitempty(f *codecFnInfo, rv reflect.Value) {
 		if e.esep {
 			for _, si := range tisfi {
 				ee.WriteMapElemKey()
-				// ee.EncodeStringEnc(cUTF8, si.encName)
 				e.kStructFieldKey(fti.keyType, si.encNameAsciiAlphaNum, si.encName)
 				ee.WriteMapElemValue()
 				e.encodeValue(sfn.field(si), nil, true)
 			}
 		} else {
 			for _, si := range tisfi {
-				// ee.EncodeStringEnc(cUTF8, si.encName)
 				e.kStructFieldKey(fti.keyType, si.encNameAsciiAlphaNum, si.encName)
 				e.encodeValue(sfn.field(si), nil, true)
 			}
@@ -677,24 +683,7 @@ func (e *Encoder) kStructNoOmitempty(f *codecFnInfo, rv reflect.Value) {
 }
 
 func (e *Encoder) kStructFieldKey(keyType valueType, encNameAsciiAlphaNum bool, encName string) {
-	var m must
-	// use if-else-if, not switch (which compiles to binary-search)
-	// since keyType is typically valueTypeString, branch prediction is pretty good.
-	if keyType == valueTypeString {
-		if e.js && encNameAsciiAlphaNum { // keyType == valueTypeString
-			e.w.writen1('"')
-			e.w.writestr(encName)
-			e.w.writen1('"')
-		} else { // keyType == valueTypeString
-			e.e.EncodeStringEnc(cUTF8, encName)
-		}
-	} else if keyType == valueTypeInt {
-		e.e.EncodeInt(m.Int(strconv.ParseInt(encName, 10, 64)))
-	} else if keyType == valueTypeUint {
-		e.e.EncodeUint(m.Uint(strconv.ParseUint(encName, 10, 64)))
-	} else if keyType == valueTypeFloat {
-		e.e.EncodeFloat64(m.Float(strconv.ParseFloat(encName, 64)))
-	}
+	encStructFieldKey(encName, e.e, e.w, keyType, encNameAsciiAlphaNum, e.js)
 }
 
 func (e *Encoder) kStruct(f *codecFnInfo, rv reflect.Value) {
@@ -791,7 +780,6 @@ func (e *Encoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 			for j = 0; j < len(fkvs); j++ {
 				kv = fkvs[j]
 				ee.WriteMapElemKey()
-				// ee.EncodeStringEnc(cUTF8, kv.v)
 				e.kStructFieldKey(fti.keyType, kv.v.encNameAsciiAlphaNum, kv.v.encName)
 				ee.WriteMapElemValue()
 				e.encodeValue(kv.r, nil, true)
@@ -799,7 +787,6 @@ func (e *Encoder) kStruct(f *codecFnInfo, rv reflect.Value) {
 		} else {
 			for j = 0; j < len(fkvs); j++ {
 				kv = fkvs[j]
-				// ee.EncodeStringEnc(cUTF8, kv.v)
 				e.kStructFieldKey(fti.keyType, kv.v.encNameAsciiAlphaNum, kv.v.encName)
 				e.encodeValue(kv.r, nil, true)
 			}
@@ -1781,6 +1768,37 @@ func (e *Encoder) wrapErr(v interface{}, err *error) {
 	*err = encodeError{codecError{name: e.hh.Name(), err: v}}
 }
 
+func encStructFieldKey(encName string, ee encDriver, w *encWriterSwitch,
+	keyType valueType, encNameAsciiAlphaNum bool, js bool) {
+	var m must
+	// use if-else-if, not switch (which compiles to binary-search)
+	// since keyType is typically valueTypeString, branch prediction is pretty good.
+	if keyType == valueTypeString {
+		if js && encNameAsciiAlphaNum { // keyType == valueTypeString
+			// w.writen1('"')
+			// w.writestr(encName)
+			// w.writen1('"')
+			// ----
+			// w.writestr(`"` + encName + `"`)
+			// ----
+			// do concat myself, so it is faster than the generic string concat
+			b := make([]byte, len(encName)+2)
+			copy(b[1:], encName)
+			b[0] = '"'
+			b[len(b)-1] = '"'
+			w.writeb(b)
+		} else { // keyType == valueTypeString
+			ee.EncodeStringEnc(cUTF8, encName)
+		}
+	} else if keyType == valueTypeInt {
+		ee.EncodeInt(m.Int(strconv.ParseInt(encName, 10, 64)))
+	} else if keyType == valueTypeUint {
+		ee.EncodeUint(m.Uint(strconv.ParseUint(encName, 10, 64)))
+	} else if keyType == valueTypeFloat {
+		ee.EncodeFloat64(m.Float(strconv.ParseFloat(encName, 64)))
+	}
+}
+
 // func encStringAsRawBytesMaybe(ee encDriver, s string, stringToRaw bool) {
 // 	if stringToRaw {
 // 		ee.EncodeStringBytesRaw(bytesView(s))

+ 1 - 12
codec/gen-helper.generated.go

@@ -10,7 +10,6 @@ package codec
 import (
 	"encoding"
 	"reflect"
-	"strconv"
 )
 
 // GenVersion is the current version of codecgen.
@@ -50,17 +49,7 @@ type genHelperEncDriver struct {
 
 func (x genHelperEncDriver) EncodeBuiltin(rt uintptr, v interface{}) {}
 func (x genHelperEncDriver) EncStructFieldKey(keyType valueType, s string) {
-	var m must
-	if keyType == valueTypeString {
-		x.encDriver.EncodeStringEnc(cUTF8, s)
-	} else if keyType == valueTypeInt {
-		x.encDriver.EncodeInt(m.Int(strconv.ParseInt(s, 10, 64)))
-	} else if keyType == valueTypeUint {
-		x.encDriver.EncodeUint(m.Uint(strconv.ParseUint(s, 10, 64)))
-	} else if keyType == valueTypeFloat {
-		x.encDriver.EncodeFloat64(m.Float(strconv.ParseFloat(s, 64)))
-	}
-	// encStructFieldKey(x.encDriver, keyType, s)
+	encStructFieldKey(s, x.encDriver, nil, keyType, false, false)
 }
 func (x genHelperEncDriver) EncodeSymbol(s string) {
 	x.encDriver.EncodeStringEnc(cUTF8, s)

+ 1 - 12
codec/gen-helper.go.tmpl

@@ -10,7 +10,6 @@ package codec
 import (
 	"encoding"
 	"reflect"
-	"strconv"
 )
 
 // GenVersion is the current version of codecgen.
@@ -50,17 +49,7 @@ type genHelperEncDriver struct {
 
 func (x genHelperEncDriver) EncodeBuiltin(rt uintptr, v interface{}) {}
 func (x genHelperEncDriver) EncStructFieldKey(keyType valueType, s string) {
-	var m must
-	if keyType == valueTypeString {
-		x.encDriver.EncodeStringEnc(cUTF8, s)
-	} else if keyType == valueTypeInt {
-		x.encDriver.EncodeInt(m.Int(strconv.ParseInt(s, 10, 64)))
-	} else if keyType == valueTypeUint {
-		x.encDriver.EncodeUint(m.Uint(strconv.ParseUint(s, 10, 64)))
-	} else if keyType == valueTypeFloat {
-		x.encDriver.EncodeFloat64(m.Float(strconv.ParseFloat(s, 64)))
-	} 
-	// encStructFieldKey(x.encDriver, keyType, s)
+	encStructFieldKey(s, x.encDriver, nil, keyType, false, false)
 }
 func (x genHelperEncDriver) EncodeSymbol(s string) {
 	x.encDriver.EncodeStringEnc(cUTF8, s)

+ 6 - 1
codec/helper_not_unsafe.go

@@ -248,7 +248,12 @@ func (e *Encoder) kTime(f *codecFnInfo, rv reflect.Value) {
 }
 
 func (e *Encoder) kString(f *codecFnInfo, rv reflect.Value) {
-	e.e.EncodeStringEnc(cUTF8, rv.String())
+	s := rv.String()
+	if e.h.StringToRaw {
+		e.e.EncodeStringBytesRaw(bytesView(s))
+	} else {
+		e.e.EncodeStringEnc(cUTF8, s)
+	}
 }
 
 func (e *Encoder) kFloat64(f *codecFnInfo, rv reflect.Value) {

+ 6 - 1
codec/helper_unsafe.go

@@ -408,7 +408,12 @@ func (e *Encoder) kTime(f *codecFnInfo, rv reflect.Value) {
 
 func (e *Encoder) kString(f *codecFnInfo, rv reflect.Value) {
 	v := (*unsafeReflectValue)(unsafe.Pointer(&rv))
-	e.e.EncodeStringEnc(cUTF8, *(*string)(v.ptr))
+	s := *(*string)(v.ptr)
+	if e.h.StringToRaw {
+		e.e.EncodeStringBytesRaw(bytesView(s))
+	} else {
+		e.e.EncodeStringEnc(cUTF8, s)
+	}
 }
 
 func (e *Encoder) kFloat64(f *codecFnInfo, rv reflect.Value) {