ソースを参照

optimize read string

Tao Wen 9 年 前
コミット
c44e7c496a
3 ファイル変更70 行追加4 行削除
  1. 44 3
      jsoniter.go
  2. 2 0
      jsoniter_object_test.go
  3. 24 1
      jsoniter_string_test.go

+ 44 - 3
jsoniter.go

@@ -271,7 +271,40 @@ func (iter *Iterator) ReadInt64() (ret int64) {
 }
 
 func (iter *Iterator) ReadString() (ret string) {
-	str := make([]byte, 0, 8)
+	return string(iter.ReadStringAsBytes())
+}
+
+// Tries to find the end of string
+// Support if string contains escaped quote symbols.
+func stringEnd(data []byte) (int, bool) {
+	escaped := false
+	for i, c := range data {
+		if c == '"' {
+			if !escaped {
+				return i + 1, false
+			} else {
+				j := i - 1
+				for {
+					if j < 0 || data[j] != '\\' {
+						return i + 1, true // even number of backslashes
+					}
+					j--
+					if j < 0 || data[j] != '\\' {
+						break // odd number of backslashes
+					}
+					j--
+
+				}
+			}
+		} else if c == '\\' {
+			escaped = true
+		}
+	}
+
+	return -1, escaped
+}
+
+func (iter *Iterator) ReadStringAsBytes() (ret []byte) {
 	c := iter.readByte()
 	if c == 'n' {
 		iter.skipNull()
@@ -281,10 +314,17 @@ func (iter *Iterator) ReadString() (ret string) {
 		iter.ReportError("ReadString", `expects " or n`)
 		return
 	}
+	end, escaped := stringEnd(iter.buf[iter.head:])
+	if end != -1 && !escaped {
+		ret = iter.buf[iter.head:iter.head+end-1]
+		iter.head += end
+		return ret
+	}
+	str := make([]byte, 0, 8)
 	for iter.Error == nil {
 		c = iter.readByte()
 		if c == '"' {
-			return string(str)
+			return str
 		}
 		if c == '\\' {
 			c = iter.readByte()
@@ -543,7 +583,8 @@ func (iter *Iterator) ReadObject() (ret string) {
 }
 
 func (iter *Iterator) readObjectField() (ret string) {
-	field := iter.ReadString()
+	str := iter.ReadStringAsBytes()
+	field := *(*string)(unsafe.Pointer(&str))
 	if iter.Error != nil {
 		return
 	}

+ 2 - 0
jsoniter_object_test.go

@@ -3,6 +3,7 @@ package jsoniter
 import (
 	"testing"
 	"encoding/json"
+	"fmt"
 )
 
 func Test_empty_object(t *testing.T) {
@@ -17,6 +18,7 @@ func Test_one_field(t *testing.T) {
 	iter := ParseString(`{"a": "b"}`)
 	field := iter.ReadObject()
 	if field != "a" {
+		fmt.Println(iter.Error)
 		t.Fatal(field)
 	}
 	value := iter.ReadString()

+ 24 - 1
jsoniter_string_test.go

@@ -72,6 +72,18 @@ func Test_string_escape_unicode_with_surrogate(t *testing.T) {
 	}
 }
 
+func Test_string_as_bytes(t *testing.T) {
+	iter := Parse(bytes.NewBufferString(`"hello""world"`), 4096)
+	val := string(iter.ReadStringAsBytes())
+	if val != "hello" {
+		t.Fatal(val)
+	}
+	val = string(iter.ReadStringAsBytes())
+	if val != "world" {
+		t.Fatal(val)
+	}
+}
+
 func Benchmark_jsoniter_unicode(b *testing.B) {
 	for n := 0; n < b.N; n++ {
 		iter := ParseString(`"\ud83d\udc4a"`)
@@ -80,12 +92,23 @@ func Benchmark_jsoniter_unicode(b *testing.B) {
 }
 
 func Benchmark_jsoniter_ascii(b *testing.B) {
+	iter := ParseString(`"hello, world!"`)
+	b.ResetTimer()
 	for n := 0; n < b.N; n++ {
-		iter := ParseString(`"hello"`)
+		iter.Reuse(iter.buf)
 		iter.ReadString()
 	}
 }
 
+func Benchmark_jsoniter_string_as_bytes(b *testing.B) {
+	iter := ParseString(`"hello, world!"`)
+	b.ResetTimer()
+	for n := 0; n < b.N; n++ {
+		iter.Reuse(iter.buf)
+		iter.ReadStringAsBytes()
+	}
+}
+
 func Benchmark_json_unicode(b *testing.B) {
 	for n := 0; n < b.N; n++ {
 		result := ""