Browse Source

codec: Canonical mode must never be used for fastpath of maps

It is possible for fastpath to be triggered outside a top-level call to encode.
For those times, where we directly call encodeI or getEncFn, we must ensure
that the fastpath code is not used for maps if Canonical is set to true.

Fixes #93
Ugorji Nwoke 10 years ago
parent
commit
8295e69258
2 changed files with 64 additions and 4 deletions
  1. 58 0
      codec/codec_test.go
  2. 6 4
      codec/encode.go

+ 58 - 0
codec/codec_test.go

@@ -860,6 +860,60 @@ func testCodecRpcOne(t *testing.T, rr Rpc, h Handle, doRequest bool, exitSleepMs
 	return
 	return
 }
 }
 
 
+func doTestMapEncodeForCanonical(t *testing.T, name string, h Handle) {
+	v1 := map[string]interface{}{
+		"a": 1,
+		"b": "hello",
+		"c": map[string]interface{}{
+			"c/a": 1,
+			"c/b": "world",
+			"c/c": []int{1, 2, 3, 4},
+			"c/d": map[string]interface{}{
+				"c/d/a": "fdisajfoidsajfopdjsaopfjdsapofda",
+				"c/d/b": "fdsafjdposakfodpsakfopdsakfpodsakfpodksaopfkdsopafkdopsa",
+				"c/d/c": "poir02  ir30qif4p03qir0pogjfpoaerfgjp ofke[padfk[ewapf kdp[afep[aw",
+				"c/d/d": "fdsopafkd[sa f-32qor-=4qeof -afo-erfo r-eafo 4e-  o r4-qwo ag",
+				"c/d/e": "kfep[a sfkr0[paf[a foe-[wq  ewpfao-q ro3-q ro-4qof4-qor 3-e orfkropzjbvoisdb",
+				"c/d/f": "",
+			},
+			"c/e": map[int]string{
+				1:     "1",
+				22:    "22",
+				333:   "333",
+				4444:  "4444",
+				55555: "55555",
+			},
+			"c/f": map[string]int{
+				"1":     1,
+				"22":    22,
+				"333":   333,
+				"4444":  4444,
+				"55555": 55555,
+			},
+		},
+	}
+	var v2 map[string]interface{}
+	var b1, b2 []byte
+
+	// encode v1 into b1, decode b1 into v2, encode v2 into b2, compare b1 and b2
+
+	bh := h.getBasicHandle()
+	canonical0 := bh.Canonical
+	bh.Canonical = true
+	defer func() { bh.Canonical = canonical0 }()
+
+	e1 := NewEncoderBytes(&b1, h)
+	e1.MustEncode(v1)
+	d1 := NewDecoderBytes(b1, h)
+	d1.MustDecode(&v2)
+	e2 := NewEncoderBytes(&b2, h)
+	e2.MustEncode(v2)
+	if !bytes.Equal(b1, b2) {
+		logT(t, "Unequal bytes: %v VS %v", b1, b2)
+		t.FailNow()
+	}
+}
+
 // Comprehensive testing that generates data encoded from python handle (cbor, msgpack),
 // Comprehensive testing that generates data encoded from python handle (cbor, msgpack),
 // and validates that our code can read and write it out accordingly.
 // and validates that our code can read and write it out accordingly.
 // We keep this unexported here, and put actual test in ext_dep_test.go.
 // We keep this unexported here, and put actual test in ext_dep_test.go.
@@ -1048,6 +1102,10 @@ func TestCborCodecsEmbeddedPointer(t *testing.T) {
 	testCodecEmbeddedPointer(t, testCborH)
 	testCodecEmbeddedPointer(t, testCborH)
 }
 }
 
 
+func TestCborMapEncodeForCanonical(t *testing.T) {
+	doTestMapEncodeForCanonical(t, "cbor", testCborH)
+}
+
 func TestJsonCodecsTable(t *testing.T) {
 func TestJsonCodecsTable(t *testing.T) {
 	testCodecTableOne(t, testJsonH)
 	testCodecTableOne(t, testJsonH)
 }
 }

+ 6 - 4
codec/encode.go

@@ -113,8 +113,9 @@ type EncodeOptions struct {
 	// Canonical representation means that encoding a value will always result in the same
 	// Canonical representation means that encoding a value will always result in the same
 	// sequence of bytes.
 	// sequence of bytes.
 	//
 	//
-	// This mostly will apply to maps. In this case, codec will do more work to encode the
-	// map keys out of band, and then sort them, before writing out the map to the stream.
+	// This only affects maps, as the iteration order for maps is random.
+	// In this case, the map keys will first be encoded into []byte, and then sorted,
+	// before writing the sorted keys and the corresponding map values to the stream.
 	Canonical bool
 	Canonical bool
 
 
 	// AsSymbols defines what should be encoded as symbols.
 	// AsSymbols defines what should be encoded as symbols.
@@ -722,7 +723,7 @@ func (f encFnInfo) kMap(rv reflect.Value) {
 	if e.h.Canonical {
 	if e.h.Canonical {
 		// first encode each key to a []byte first, then sort them, then record
 		// first encode each key to a []byte first, then sort them, then record
 		// println(">>>>>>>> CANONICAL <<<<<<<<")
 		// println(">>>>>>>> CANONICAL <<<<<<<<")
-		var mksv []byte // temporary byte slice for the encoding
+		var mksv []byte = make([]byte, 0, len(mks)*16) // temporary byte slice for the encoding
 		e2 := NewEncoderBytes(&mksv, e.hh)
 		e2 := NewEncoderBytes(&mksv, e.hh)
 		mksbv := make([]encStructFieldBytesV, len(mks))
 		mksbv := make([]encStructFieldBytesV, len(mks))
 		for i, k := range mks {
 		for i, k := range mks {
@@ -1110,7 +1111,8 @@ func (e *Encoder) getEncFn(rtid uintptr, rt reflect.Type, checkFastpath, checkCo
 		fn.f = (encFnInfo).textMarshal
 		fn.f = (encFnInfo).textMarshal
 	} else {
 	} else {
 		rk := rt.Kind()
 		rk := rt.Kind()
-		if fastpathEnabled && checkFastpath && (rk == reflect.Map || rk == reflect.Slice) {
+		// if fastpathEnabled && checkFastpath && (rk == reflect.Map || rk == reflect.Slice) {
+		if fastpathEnabled && checkFastpath && (rk == reflect.Slice || (rk == reflect.Map && !e.h.Canonical)) {
 			if rt.PkgPath() == "" {
 			if rt.PkgPath() == "" {
 				if idx := fastpathAV.index(rtid); idx != -1 {
 				if idx := fastpathAV.index(rtid); idx != -1 {
 					fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}
 					fi.encFnInfoX = &encFnInfoX{e: e, ti: ti}