Explorar o código

optimize skip

Tao Wen %!s(int64=9) %!d(string=hai) anos
pai
achega
ea435e3473
Modificáronse 3 ficheiros con 223 adicións e 117 borrados
  1. 68 111
      jsoniter.go
  2. 138 0
      jsoniter_find_end_test.go
  3. 17 6
      jsoniter_skip_test.go

+ 68 - 111
jsoniter.go

@@ -303,50 +303,89 @@ func (iter *Iterator) ReadString() (ret string) {
 	return string(iter.ReadStringAsBytes())
 }
 
+// adapted from: https://github.com/buger/jsonparser/blob/master/parser.go
 // Tries to find the end of string
 // Support if string contains escaped quote symbols.
-func stringEnd(data []byte) (int, bool) {
+func (iter *Iterator) findStringEnd() (int, bool) {
 	escaped := false
-	for i, c := range data {
+	for i := iter.head; i < iter.tail; i++ {
+		c := iter.buf[i]
 		if c == '"' {
 			if !escaped {
 				return i + 1, false
 			} else {
 				j := i - 1
 				for {
-					if j < 0 || data[j] != '\\' {
-						return i + 1, true // even number of backslashes
+					if j < iter.head || iter.buf[j] != '\\' {
+						// even number of backslashes
+						// either end of buffer, or " found
+						return i + 1, true
 					}
 					j--
-					if j < 0 || data[j] != '\\' {
-						break // odd number of backslashes
+					if j < iter.head || iter.buf[j] != '\\' {
+						// odd number of backslashes
+						// it is \" or \\\"
+						break
 					}
 					j--
-
 				}
 			}
 		} else if c == '\\' {
 			escaped = true
 		}
 	}
+	j := iter.tail - 1
+	for {
+		if j < iter.head || iter.buf[j] != '\\' {
+			// even number of backslashes
+			// either end of buffer, or " found
+			return -1, false // do not end with \
+		}
+		j--
+		if j < iter.head || iter.buf[j] != '\\' {
+			// odd number of backslashes
+			// it is \" or \\\"
+			break
+		}
+		j--
+
+	}
+	return -1, true // end with \
+}
+
 
-	return -1, escaped
+func (iter *Iterator) skipUntilBreak() {
+	// true, false, null, number
+	for {
+		for i := iter.head; i < iter.tail; i++ {
+			c := iter.buf[i]
+			switch c {
+			case ' ', '\n', '\r', '\t', ',', '}', ']':
+				iter.head = i
+				return
+			}
+		}
+		if (!iter.loadMore()) {
+			return
+		}
+	}
 }
 
 func (iter *Iterator) ReadStringAsBytes() (ret []byte) {
 	c := iter.readByte()
 	if c == 'n' {
-		iter.skipNull()
+		iter.skipUntilBreak()
 		return
 	}
 	if c != '"' {
 		iter.ReportError("ReadString", `expects " or n`)
 		return
 	}
-	end, escaped := stringEnd(iter.buf[iter.head:iter.tail])
+	end, escaped := iter.findStringEnd()
 	if end != -1 && !escaped {
-		ret = iter.buf[iter.head:iter.head+end-1]
-		iter.head += end
+		// fast path: reuse the underlying buffer
+		ret = iter.buf[iter.head:end-1]
+		iter.head = end
 		return ret
 	}
 	str := make([]byte, 0, 8)
@@ -506,7 +545,7 @@ func (iter *Iterator) ReadArray() (ret bool) {
 	}
 	switch c {
 	case 'n': {
-		iter.skipNull()
+		iter.skipUntilBreak()
 		return false // null
 	}
 	case '[': {
@@ -534,7 +573,7 @@ func (iter *Iterator) ReadArray() (ret bool) {
 func (iter *Iterator) ReadArrayCB(cb func()) {
 	c := iter.nextToken()
 	if c == 'n' {
-		iter.skipNull()
+		iter.skipUntilBreak()
 		return // null
 	}
 	if c != '[' {
@@ -567,7 +606,7 @@ func (iter *Iterator) ReadArrayCB(cb func()) {
 func (iter *Iterator) ReadObjectCB(cb func(string)) {
 	c := iter.nextToken()
 	if c == 'n' {
-		iter.skipNull()
+		iter.skipUntilBreak()
 		return // null
 	}
 	if c != '{' {
@@ -605,7 +644,7 @@ func (iter *Iterator) ReadObject() (ret string) {
 	}
 	switch c {
 	case 'n': {
-		iter.skipNull()
+		iter.skipUntilBreak()
 		if iter.Error != nil {
 			return
 		}
@@ -703,13 +742,13 @@ func (iter *Iterator) ReadBool() (ret bool) {
 	}
 	switch c {
 	case 't':
-		iter.skipTrue()
+		iter.skipUntilBreak()
 		if iter.Error != nil {
 			return
 		}
 		return true
 	case 'f':
-		iter.skipFalse()
+		iter.skipUntilBreak()
 		if iter.Error != nil {
 			return
 		}
@@ -720,84 +759,27 @@ func (iter *Iterator) ReadBool() (ret bool) {
 	}
 }
 
-func (iter *Iterator) skipTrue() {
-	for {
-		for i := iter.head; i < iter.tail; i++ {
-			c := iter.buf[i]
-			switch c {
-			case 'r', 'u', 'e':
-				continue
-			}
-			iter.head = i
-			return
-		}
-		if !iter.loadMore() {
-			return
-		}
-	}
-}
-
-func (iter *Iterator) skipFalse() {
-	for {
-		for i := iter.head; i < iter.tail; i++ {
-			c := iter.buf[i]
-			switch c {
-			case 'a', 'l', 's', 'e':
-				continue
-			}
-			iter.head = i
-			return
-		}
-		if !iter.loadMore() {
-			return
-		}
-	}
-}
-
 func (iter *Iterator) ReadNull() (ret bool) {
 	c := iter.readByte()
 	if c == 'n' {
-		iter.skipNull()
+		iter.skipUntilBreak()
 		return true
 	}
 	iter.unreadByte()
 	return false
 }
 
-func (iter *Iterator) skipNull() {
-	for {
-		for i := iter.head; i < iter.tail; i++ {
-			c := iter.buf[i]
-			switch c {
-			case 'u', 'l':
-				continue
-			}
-			iter.head = i
-			return
-		}
-		if !iter.loadMore() {
-			return
-		}
-	}
-}
-
 func (iter *Iterator) Skip() {
 	c := iter.readByte()
 	switch c {
 	case '"':
 		iter.skipString()
-	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
-		iter.skipNumber()
+	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 't', 'f', 'n':
+		iter.skipUntilBreak()
 	case '[':
 		iter.skipArray()
 	case '{':
 		iter.skipObject()
-	case 't':
-		iter.skipTrue()
-	case 'f':
-		iter.skipFalse()
-	case 'n':
-		iter.skipNull()
 	default:
 		iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
 		return
@@ -805,42 +787,17 @@ func (iter *Iterator) Skip() {
 }
 
 func (iter *Iterator) skipString() {
-	escaped := false
 	for {
-		for i := iter.head; i < iter.tail; i++ {
-			c := iter.buf[i]
-			switch c {
-			case '"':
-				if escaped {
-					escaped = false
-				} else {
-					iter.head = i+1
-					return
-				}
-			case '\\':
-				escaped = !escaped
-			default:
-				escaped= false
+		end, escaped := iter.findStringEnd()
+		if end == -1 {
+			if !iter.loadMore() {
+				return
 			}
-		}
-		if !iter.loadMore() {
-			return
-		}
-	}
-}
-
-func (iter *Iterator) skipNumber() {
-	for {
-		for i := iter.head; i < iter.tail; i++ {
-			c := iter.buf[i]
-			switch c {
-			case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
-				continue
+			if escaped {
+				iter.head = 1 // skip the first char as last char read is \
 			}
-			iter.head = i
-			return
-		}
-		if !iter.loadMore() {
+		} else {
+			iter.head = end
 			return
 		}
 	}

+ 138 - 0
jsoniter_find_end_test.go

@@ -0,0 +1,138 @@
+package jsoniter
+
+import (
+	"testing"
+	"io"
+)
+
+func Test_string_end(t *testing.T) {
+	end, escaped := ParseString(`abc"`).findStringEnd()
+	if end != 4 {
+		t.Fatal(end)
+	}
+	if escaped != false {
+		t.Fatal(escaped)
+	}
+	end, escaped = ParseString(`abc\\"`).findStringEnd()
+	if end != 6 {
+		t.Fatal(end)
+	}
+	if escaped != true {
+		t.Fatal(escaped)
+	}
+	end, escaped = ParseString(`abc\\\\"`).findStringEnd()
+	if end != 8 {
+		t.Fatal(end)
+	}
+	if escaped != true {
+		t.Fatal(escaped)
+	}
+	end, escaped = ParseString(`abc\"`).findStringEnd()
+	if end != -1 {
+		t.Fatal(end)
+	}
+	if escaped != false {
+		t.Fatal(escaped)
+	}
+	end, escaped = ParseString(`abc\`).findStringEnd()
+	if end != -1 {
+		t.Fatal(end)
+	}
+	if escaped != true {
+		t.Fatal(escaped)
+	}
+	end, escaped = ParseString(`abc\\`).findStringEnd()
+	if end != -1 {
+		t.Fatal(end)
+	}
+	if escaped != false {
+		t.Fatal(escaped)
+	}
+	end, escaped = ParseString(`\\`).findStringEnd()
+	if end != -1 {
+		t.Fatal(end)
+	}
+	if escaped != false {
+		t.Fatal(escaped)
+	}
+	end, escaped = ParseString(`\`).findStringEnd()
+	if end != -1 {
+		t.Fatal(end)
+	}
+	if escaped != true {
+		t.Fatal(escaped)
+	}
+}
+
+type StagedReader struct {
+	r1 string
+	r2 string
+	r3 string
+	r  int
+}
+
+func (reader *StagedReader) Read(p []byte) (n int, err error) {
+	reader.r++
+	switch reader.r {
+	case 1:
+		copy(p, []byte(reader.r1))
+		return len(reader.r1), nil
+	case 2:
+		copy(p, []byte(reader.r2))
+		return len(reader.r2), nil
+	case 3:
+		copy(p, []byte(reader.r3))
+		return len(reader.r3), nil
+	default:
+		return 0, io.EOF
+	}
+}
+
+func Test_skip_string(t *testing.T) {
+	iter := ParseString(`"abc`)
+	iter.skipString()
+	if iter.head != 1 {
+		t.Fatal(iter.head)
+	}
+	iter = ParseString(`\""abc`)
+	iter.skipString()
+	if iter.head != 3 {
+		t.Fatal(iter.head)
+	}
+	reader := &StagedReader{
+		r1: `abc`,
+		r2: `"`,
+	}
+	iter = Parse(reader, 4096)
+	iter.skipString()
+	if iter.head != 1 {
+		t.Fatal(iter.head)
+	}
+	reader = &StagedReader{
+		r1: `abc`,
+		r2: `1"`,
+	}
+	iter = Parse(reader, 4096)
+	iter.skipString()
+	if iter.head != 2 {
+		t.Fatal(iter.head)
+	}
+	reader = &StagedReader{
+		r1: `abc\`,
+		r2: `"`,
+	}
+	iter = Parse(reader, 4096)
+	iter.skipString()
+	if iter.Error != io.EOF {
+		t.Fatal(iter.Error)
+	}
+	reader = &StagedReader{
+		r1: `abc\`,
+		r2: `""`,
+	}
+	iter = Parse(reader, 4096)
+	iter.skipString()
+	if iter.head != 2 {
+		t.Fatal(iter.head)
+	}
+}

+ 17 - 6
jsoniter_skip_test.go

@@ -5,8 +5,9 @@ import (
 	"encoding/json"
 )
 
-func Test_skip_string(t *testing.T) {
-	iter := ParseString(`["a", "b"]`)
+
+func Test_skip_number(t *testing.T) {
+	iter := ParseString(`[-0.12, "b"]`)
 	iter.ReadArray()
 	iter.Skip()
 	iter.ReadArray()
@@ -15,8 +16,8 @@ func Test_skip_string(t *testing.T) {
 	}
 }
 
-func Test_skip_string_with_escape(t *testing.T) {
-	iter := ParseString(`["a\"", "b"]`)
+func Test_skip_null(t *testing.T) {
+	iter := ParseString(`[null , "b"]`)
 	iter.ReadArray()
 	iter.Skip()
 	iter.ReadArray()
@@ -25,8 +26,18 @@ func Test_skip_string_with_escape(t *testing.T) {
 	}
 }
 
-func Test_skip_number(t *testing.T) {
-	iter := ParseString(`[-0.12, "b"]`)
+func Test_skip_true(t *testing.T) {
+	iter := ParseString(`[true , "b"]`)
+	iter.ReadArray()
+	iter.Skip()
+	iter.ReadArray()
+	if iter.ReadString() != "b" {
+		t.FailNow()
+	}
+}
+
+func Test_skip_false(t *testing.T) {
+	iter := ParseString(`[false , "b"]`)
 	iter.ReadArray()
 	iter.Skip()
 	iter.ReadArray()