8 years ago · cbd720c66c
--- a/codec/codec_test.go
+++ b/codec/codec_test.go
@@ -2020,6 +2020,29 @@ func TestJsonLargeInteger(t *testing.T) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+func TestJsonInvalidUnicode(t *testing.T) {
			
 
				+	var m = map[string]string{
			
 
				+		`"\udc49\u0430abc"`: "\uFFFDabc",
			
 
				+		`"\udc49\u0430"`:    "\uFFFD",
			
 
				+		`"\udc49abc"`:       "\uFFFDabc",
			
 
				+		`"\udc49"`:          "\uFFFD",
			
 
				+		`"\udZ49\u0430abc"`: "\uFFFD\u0430abc",
			
 
				+		`"\udcG9\u0430"`:    "\uFFFD\u0430",
			
 
				+		`"\uHc49abc"`:       "\uFFFDabc",
			
 
				+		`"\uKc49"`:          "\uFFFD",
			
 
				+		// ``: "",
			
 
				+	}
			
 
				+	for k, v := range m {
			
 
				+		// println("k = ", k)
			
 
				+		var s string
			
 
				+		testUnmarshalErr(&s, []byte(k), testJsonH, t, "-")
			
 
				+		if s != v {
			
 
				+			logT(t, "not equal: %q, %q", v, s)
			
 
				+			failT(t)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 // ----------
			
 
				 
			
 
				 func TestBincCodecsTable(t *testing.T) {
			
--- a/codec/json.go
+++ b/codec/json.go
@@ -36,6 +36,7 @@ import (
 
				 	"encoding/base64"
			
 
				 	"reflect"
			
 
				 	"strconv"
			
 
				+	"unicode"
			
 
				 	"unicode/utf16"
			
 
				 	"unicode/utf8"
			
 
				 )
			
@@ -88,14 +89,6 @@ var (
 
				 )
			
 
				 
			
 
				 const (
			
 
				-	// jsonUnreadAfterDecNum controls whether we unread after decoding a number.
			
 
				-	//
			
 
				-	// instead of unreading, just update d.tok (iff it's not a whitespace char)
			
 
				-	// However, doing this means that we may HOLD onto some data which belongs to another stream.
			
 
				-	// Thus, it is safest to unread the data when done.
			
 
				-	// keep behind a constant flag for now.
			
 
				-	jsonUnreadAfterDecNum = true
			
 
				-
			
 
				 	// If !jsonValidateSymbols, decoding will be faster, by skipping some checks:
			
 
				 	//   - If we see first character of null, false or true,
			
 
				 	//     do not validate subsequent characters.
			
@@ -882,34 +875,45 @@ func (d *jsonDecDriver) appendStringAsBytes() {
 
				 		case 'u':
			
 
				 			var r rune
			
 
				 			var rr uint32
			
 
				-			c = cs[i+4] // may help reduce bounds-checking
			
 
				+			if len(cs) < i+4 { // may help reduce bounds-checking
			
 
				+				d.d.errorf(`json: need at least 4 more bytes for unicode sequence`)
			
 
				+			}
			
 
				+			// c = cs[i+4] // may help reduce bounds-checking
			
 
				 			for j := 1; j < 5; j++ {
			
 
				 				c = jsonU4Set[cs[i+j]]
			
 
				 				if c == jsonU4SetErrVal {
			
 
				-					d.d.errorf(`json: unquoteStr: invalid hex char in \u unicode sequence: %q`, c)
			
 
				+					// d.d.errorf(`json: unquoteStr: invalid hex char in \u unicode sequence: %q`, c)
			
 
				+					r = unicode.ReplacementChar
			
 
				+					i += 4
			
 
				+					goto encode_rune
			
 
				 				}
			
 
				 				rr = rr*16 + uint32(c)
			
 
				 			}
			
 
				 			r = rune(rr)
			
 
				 			i += 4
			
 
				 			if utf16.IsSurrogate(r) {
			
 
				-				if !(cs[i+2] == 'u' && cs[i+i] == '\\') {
			
 
				-					d.d.errorf(`json: unquoteStr: invalid unicode sequence. Expecting \u`)
			
 
				-					return
			
 
				-				}
			
 
				-				i += 2
			
 
				-				c = cs[i+4] // may help reduce bounds-checking
			
 
				-				var rr1 uint32
			
 
				-				for j := 1; j < 5; j++ {
			
 
				-					c = jsonU4Set[cs[i+j]]
			
 
				-					if c == jsonU4SetErrVal {
			
 
				-						d.d.errorf(`json: unquoteStr: invalid hex char in \u unicode sequence: %q`, c)
			
 
				+				if len(cs) >= i+6 && cs[i+2] == 'u' && cs[i+1] == '\\' {
			
 
				+					i += 2
			
 
				+					// c = cs[i+4] // may help reduce bounds-checking
			
 
				+					var rr1 uint32
			
 
				+					for j := 1; j < 5; j++ {
			
 
				+						c = jsonU4Set[cs[i+j]]
			
 
				+						if c == jsonU4SetErrVal {
			
 
				+							// d.d.errorf(`json: unquoteStr: invalid hex char in \u unicode sequence: %q`, c)
			
 
				+							r = unicode.ReplacementChar
			
 
				+							i += 4
			
 
				+							goto encode_rune
			
 
				+						}
			
 
				+						rr1 = rr1*16 + uint32(c)
			
 
				 					}
			
 
				-					rr1 = rr1*16 + uint32(c)
			
 
				+					r = utf16.DecodeRune(r, rune(rr1))
			
 
				+					i += 4
			
 
				+				} else {
			
 
				+					r = unicode.ReplacementChar
			
 
				+					goto encode_rune
			
 
				 				}
			
 
				-				r = utf16.DecodeRune(r, rune(rr1))
			
 
				-				i += 4
			
 
				 			}
			
 
				+		encode_rune:
			
 
				 			w2 := utf8.EncodeRune(d.bstr[:], r)
			
 
				 			v = append(v, d.bstr[:w2]...)
			
 
				 		default:
			
@@ -1032,6 +1036,10 @@ func (d *jsonDecDriver) DecodeNaked() {
 
				 // reading multiple values from a stream containing json and non-json content.
			
 
				 // For example, a user can read a json value, then a cbor value, then a msgpack value,
			
 
				 // all from the same stream in sequence.
			
 
				+//
			
 
				+// Note that, when decoding quoted strings, invalid UTF-8 or invalid UTF-16 surrogate pairs
			
 
				+// are not treated as an error.
			
 
				+// Instead, they are replaced by the Unicode replacement character U+FFFD.
			
 
				 type JsonHandle struct {
			
 
				 	textEncodingType
			
 
				 	BasicHandle
			
--- a/codec/z_all_test.go
+++ b/codec/z_all_test.go
@@ -201,6 +201,7 @@ func testCodecGroup(t *testing.T) {
 
				 	t.Run("TestBincMammothMapsAndSlices", TestBincMammothMapsAndSlices)
			
 
				 	t.Run("TestSimpleMammothMapsAndSlices", TestSimpleMammothMapsAndSlices)
			
 
				 
			
 
				+	t.Run("TestJsonInvalidUnicode", TestJsonInvalidUnicode)
			
 
				 	// <tear-down code>
			
 
				 }
			
 
				 
			
@@ -224,6 +225,7 @@ func testJsonGroup(t *testing.T) {
 
				 	t.Run("TestJsonEmbeddedFieldPrecedence", TestJsonEmbeddedFieldPrecedence)
			
 
				 	t.Run("TestJsonLargeContainerLen", TestJsonLargeContainerLen)
			
 
				 	t.Run("TestJsonMammothMapsAndSlices", TestJsonMammothMapsAndSlices)
			
 
				+	t.Run("TestJsonInvalidUnicode", TestJsonInvalidUnicode)
			
 
				 }
			
 
				 
			
 
				 func testBincGroup(t *testing.T) {