Bladeren bron

#69 fix unicode support

Tao Wen 8 jaren geleden
bovenliggende
commit
caaa04195e
3 gewijzigde bestanden met toevoegingen van 83 en 30 verwijderingen
  1. 1 0
      feature_config.go
  2. 74 30
      feature_stream_string.go
  3. 8 0
      jsoniter_string_test.go

+ 1 - 0
feature_config.go

@@ -49,6 +49,7 @@ var ConfigCompatibleWithStandardLibrary = Config{
 }.Froze()
 
 var ConfigFastest = Config{
+	EscapeHtml:  false,
 	MarshalFloatWith6Digits: true,
 }.Froze()
 

+ 74 - 30
feature_stream_string.go

@@ -246,7 +246,74 @@ func (stream *Stream) WriteStringWithHtmlEscaped(s string) {
 		return
 	}
 	stream.n = n
-	writeStringSlowPath(stream, htmlSafeSet, i, s, valLen)
+	writeStringSlowPathWithHtmlEscaped(stream, i, s, valLen)
+}
+
+func writeStringSlowPathWithHtmlEscaped(stream *Stream, i int, s string, valLen int) {
+	start := i
+	// for the remaining parts, we process them char by char
+	for ; i < valLen; i++ {
+		if b := s[i]; b < utf8.RuneSelf {
+			if htmlSafeSet[b] {
+				i++
+				continue
+			}
+			if start < i {
+				stream.WriteRaw(s[start:i])
+			}
+			switch b {
+			case '\\', '"':
+				stream.writeTwoBytes('\\', b)
+			case '\n':
+				stream.writeTwoBytes('\\', 'n')
+			case '\r':
+				stream.writeTwoBytes('\\', 'r')
+			case '\t':
+				stream.writeTwoBytes('\\', 't')
+			default:
+				// This encodes bytes < 0x20 except for \t, \n and \r.
+				// If escapeHTML is set, it also escapes <, >, and &
+				// because they can lead to security holes when
+				// user-controlled strings are rendered into JSON
+				// and served to some browsers.
+				stream.WriteRaw(`\u00`)
+				stream.writeTwoBytes(hex[b>>4], hex[b&0xF])
+			}
+			i++
+			start = i
+			continue
+		}
+		c, size := utf8.DecodeRuneInString(s[i:])
+		if c == utf8.RuneError && size == 1 {
+			if start < i {
+				stream.WriteRaw(s[start:i])
+			}
+			start = i
+			continue
+		}
+		// U+2028 is LINE SEPARATOR.
+		// U+2029 is PARAGRAPH SEPARATOR.
+		// They are both technically valid characters in JSON strings,
+		// but don't work in JSONP, which has to be evaluated as JavaScript,
+		// and can lead to security holes there. It is valid JSON to
+		// escape them, so we do so unconditionally.
+		// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
+		if c == '\u2028' || c == '\u2029' {
+			if start < i {
+				stream.WriteRaw(s[start:i])
+			}
+			stream.WriteRaw(`\u202`)
+			stream.writeByte(hex[c&0xF])
+			i += size
+			start = i
+			continue
+		}
+		i += size
+	}
+	if start < len(s) {
+		stream.WriteRaw(s[start:])
+	}
+	stream.writeByte('"')
 }
 
 func (stream *Stream) WriteString(s string) {
@@ -278,10 +345,10 @@ func (stream *Stream) WriteString(s string) {
 		return
 	}
 	stream.n = n
-	writeStringSlowPath(stream, safeSet, i, s, valLen)
+	writeStringSlowPath(stream, i, s, valLen)
 }
 
-func writeStringSlowPath(stream *Stream, safeSet [utf8.RuneSelf]bool, i int, s string, valLen int) {
+func writeStringSlowPath(stream *Stream, i int, s string, valLen int) {
 	start := i
 	// for the remaining parts, we process them char by char
 	for ; i < valLen; i++ {
@@ -315,34 +382,11 @@ func writeStringSlowPath(stream *Stream, safeSet [utf8.RuneSelf]bool, i int, s s
 			start = i
 			continue
 		}
-		c, size := utf8.DecodeRuneInString(s[i:])
-		if c == utf8.RuneError && size == 1 {
-			if start < i {
-				stream.WriteRaw(s[start:i])
-			}
-			stream.WriteRaw(`\ufffd`)
-			i += size
-			start = i
-			continue
-		}
-		// U+2028 is LINE SEPARATOR.
-		// U+2029 is PARAGRAPH SEPARATOR.
-		// They are both technically valid characters in JSON strings,
-		// but don't work in JSONP, which has to be evaluated as JavaScript,
-		// and can lead to security holes there. It is valid JSON to
-		// escape them, so we do so unconditionally.
-		// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
-		if c == '\u2028' || c == '\u2029' {
-			if start < i {
-				stream.WriteRaw(s[start:i])
-			}
-			stream.WriteRaw(`\u202`)
-			stream.writeByte(hex[c&0xF])
-			i += size
-			start = i
-			continue
+		if start < i {
+			stream.WriteRaw(s[start:i])
 		}
-		i += size
+		start = i
+		continue
 	}
 	if start < len(s) {
 		stream.WriteRaw(s[start:])

+ 8 - 0
jsoniter_string_test.go

@@ -136,6 +136,14 @@ func Test_string_encode_with_std_without_html_escape(t *testing.T) {
 	}
 }
 
+func Test_unicode(t *testing.T) {
+	should := require.New(t)
+	output , _ := MarshalToString(map[string]interface{}{"a": "数字山谷"})
+	should.Equal(`{"a":"数字山谷"}`, output)
+	output , _ = Config{EscapeHtml: false}.Froze().MarshalToString(map[string]interface{}{"a": "数字山谷"})
+	should.Equal(`{"a":"数字山谷"}`, output)
+}
+
 func Benchmark_jsoniter_unicode(b *testing.B) {
 	for n := 0; n < b.N; n++ {
 		iter := ParseString(ConfigDefault, `"\ud83d\udc4a"`)