Преглед изворни кода

implement skip number and string strictly

Tao Wen пре 8 година
родитељ
комит
5eded4f6ae

+ 2 - 141
feature_iter_skip.go

@@ -80,154 +80,15 @@ func (iter *Iterator) Skip() {
 	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 		iter.skipNumber()
 	case '[':
-		iter.skipArray()
+		panic("not implemented")
 	case '{':
-		iter.skipObject()
+		panic("not implemented")
 	default:
 		iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
 		return
 	}
 }
 
-func (iter *Iterator) skipString() {
-	for {
-		end, escaped := iter.findStringEnd()
-		if end == -1 {
-			if !iter.loadMore() {
-				iter.ReportError("skipString", "incomplete string")
-				return
-			}
-			if escaped {
-				iter.head = 1 // skip the first char as last char read is \
-			}
-		} else {
-			iter.head = end
-			return
-		}
-	}
-}
-
-// adapted from: https://github.com/buger/jsonparser/blob/master/parser.go
-// Tries to find the end of string
-// Support if string contains escaped quote symbols.
-func (iter *Iterator) findStringEnd() (int, bool) {
-	escaped := false
-	for i := iter.head; i < iter.tail; i++ {
-		c := iter.buf[i]
-		if c == '"' {
-			if !escaped {
-				return i + 1, false
-			}
-			j := i - 1
-			for {
-				if j < iter.head || iter.buf[j] != '\\' {
-					// even number of backslashes
-					// either end of buffer, or " found
-					return i + 1, true
-				}
-				j--
-				if j < iter.head || iter.buf[j] != '\\' {
-					// odd number of backslashes
-					// it is \" or \\\"
-					break
-				}
-				j--
-			}
-		} else if c == '\\' {
-			escaped = true
-		}
-	}
-	j := iter.tail - 1
-	for {
-		if j < iter.head || iter.buf[j] != '\\' {
-			// even number of backslashes
-			// either end of buffer, or " found
-			return -1, false // do not end with \
-		}
-		j--
-		if j < iter.head || iter.buf[j] != '\\' {
-			// odd number of backslashes
-			// it is \" or \\\"
-			break
-		}
-		j--
-
-	}
-	return -1, true // end with \
-}
-
-func (iter *Iterator) skipArray() {
-	level := 1
-	for {
-		for i := iter.head; i < iter.tail; i++ {
-			switch iter.buf[i] {
-			case '"': // If inside string, skip it
-				iter.head = i + 1
-				iter.skipString()
-				i = iter.head - 1 // it will be i++ soon
-			case '[': // If open symbol, increase level
-				level++
-			case ']': // If close symbol, increase level
-				level--
-
-				// If we have returned to the original level, we're done
-				if level == 0 {
-					iter.head = i + 1
-					return
-				}
-			}
-		}
-		if !iter.loadMore() {
-			iter.ReportError("skipObject", "incomplete array")
-			return
-		}
-	}
-}
-
-func (iter *Iterator) skipObject() {
-	level := 1
-	for {
-		for i := iter.head; i < iter.tail; i++ {
-			switch iter.buf[i] {
-			case '"': // If inside string, skip it
-				iter.head = i + 1
-				iter.skipString()
-				i = iter.head - 1 // it will be i++ soon
-			case '{': // If open symbol, increase level
-				level++
-			case '}': // If close symbol, increase level
-				level--
-
-				// If we have returned to the original level, we're done
-				if level == 0 {
-					iter.head = i + 1
-					return
-				}
-			}
-		}
-		if !iter.loadMore() {
-			iter.ReportError("skipObject", "incomplete object")
-			return
-		}
-	}
-}
-
-func (iter *Iterator) skipNumber() {
-	for {
-		for i := iter.head; i < iter.tail; i++ {
-			c := iter.buf[i]
-			switch c {
-			case ' ', '\n', '\r', '\t', ',', '}', ']':
-				iter.head = i
-				return
-			}
-		}
-		if !iter.loadMore() {
-			return
-		}
-	}
-}
-
 func (iter *Iterator) skipFourBytes(b1, b2, b3, b4 byte) {
 	if iter.readByte() != b1 {
 		iter.ReportError("skipFourBytes", fmt.Sprintf("expect %s", string([]byte{b1, b2, b3, b4})))

+ 144 - 0
feature_iter_skip_sloppy.go

@@ -0,0 +1,144 @@
+//+build jsoniter-sloppy
+
+package jsoniter
+
+// sloppy but faster implementation, do not validate the input json
+
+func (iter *Iterator) skipNumber() {
+	for {
+		for i := iter.head; i < iter.tail; i++ {
+			c := iter.buf[i]
+			switch c {
+			case ' ', '\n', '\r', '\t', ',', '}', ']':
+				iter.head = i
+				return
+			}
+		}
+		if !iter.loadMore() {
+			return
+		}
+	}
+}
+
+func (iter *Iterator) skipArray() {
+	level := 1
+	for {
+		for i := iter.head; i < iter.tail; i++ {
+			switch iter.buf[i] {
+			case '"': // If inside string, skip it
+				iter.head = i + 1
+				iter.skipString()
+				i = iter.head - 1 // it will be i++ soon
+			case '[': // If open symbol, increase level
+				level++
+			case ']': // If close symbol, increase level
+				level--
+
+				// If we have returned to the original level, we're done
+				if level == 0 {
+					iter.head = i + 1
+					return
+				}
+			}
+		}
+		if !iter.loadMore() {
+			iter.ReportError("skipObject", "incomplete array")
+			return
+		}
+	}
+}
+
+func (iter *Iterator) skipObject() {
+	level := 1
+	for {
+		for i := iter.head; i < iter.tail; i++ {
+			switch iter.buf[i] {
+			case '"': // If inside string, skip it
+				iter.head = i + 1
+				iter.skipString()
+				i = iter.head - 1 // it will be i++ soon
+			case '{': // If open symbol, increase level
+				level++
+			case '}': // If close symbol, increase level
+				level--
+
+				// If we have returned to the original level, we're done
+				if level == 0 {
+					iter.head = i + 1
+					return
+				}
+			}
+		}
+		if !iter.loadMore() {
+			iter.ReportError("skipObject", "incomplete object")
+			return
+		}
+	}
+}
+
+func (iter *Iterator) skipString() {
+	for {
+		end, escaped := iter.findStringEnd()
+		if end == -1 {
+			if !iter.loadMore() {
+				iter.ReportError("skipString", "incomplete string")
+				return
+			}
+			if escaped {
+				iter.head = 1 // skip the first char as last char read is \
+			}
+		} else {
+			iter.head = end
+			return
+		}
+	}
+}
+
+// adapted from: https://github.com/buger/jsonparser/blob/master/parser.go
+// Tries to find the end of string
+// Support if string contains escaped quote symbols.
+func (iter *Iterator) findStringEnd() (int, bool) {
+	escaped := false
+	for i := iter.head; i < iter.tail; i++ {
+		c := iter.buf[i]
+		if c == '"' {
+			if !escaped {
+				return i + 1, false
+			}
+			j := i - 1
+			for {
+				if j < iter.head || iter.buf[j] != '\\' {
+					// even number of backslashes
+					// either end of buffer, or " found
+					return i + 1, true
+				}
+				j--
+				if j < iter.head || iter.buf[j] != '\\' {
+					// odd number of backslashes
+					// it is \" or \\\"
+					break
+				}
+				j--
+			}
+		} else if c == '\\' {
+			escaped = true
+		}
+	}
+	j := iter.tail - 1
+	for {
+		if j < iter.head || iter.buf[j] != '\\' {
+			// even number of backslashes
+			// either end of buffer, or " found
+			return -1, false // do not end with \
+		}
+		j--
+		if j < iter.head || iter.buf[j] != '\\' {
+			// odd number of backslashes
+			// it is \" or \\\"
+			break
+		}
+		j--
+
+	}
+	return -1, true // end with \
+}

+ 67 - 0
feature_iter_skip_strict.go

@@ -0,0 +1,67 @@
+//+build !jsoniter-sloppy
+
+package jsoniter
+
+import "fmt"
+
+func (iter *Iterator) skipNumber() {
+	if !iter.trySkipNumber() {
+		iter.unreadByte()
+		iter.ReadFloat32()
+	}
+}
+
+func (iter *Iterator) trySkipNumber() bool {
+	dotFound := false
+	for i := iter.head; i < iter.tail; i++ {
+		c := iter.buf[i]
+		switch c {
+		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+		case '.':
+			if dotFound {
+				iter.ReportError("validateNumber", `more than one dot found in number`)
+				return true // already failed
+			} else {
+				dotFound = true
+			}
+		default:
+			switch c {
+			case ',', ']', '}', ' ', '\t', '\n', '\r':
+				iter.head = i
+				return true // must be valid
+			}
+			return false // may be invalid
+		}
+	}
+	return false
+}
+
+func (iter *Iterator) skipString() {
+	if !iter.trySkipString() {
+		iter.unreadByte()
+		iter.ReadString()
+	}
+}
+
+func (iter *Iterator) trySkipString() bool {
+	for i := iter.head; i < iter.tail; i++ {
+		c := iter.buf[i]
+		if c == '"' {
+			iter.head = i + 1
+			return true // valid
+		} else if c == '\\' {
+			return false
+		} else if c < ' ' {
+			iter.ReportError("ReadString",
+				fmt.Sprintf(`invalid control character found: %d`, c))
+			return true // already failed
+		}
+	}
+	return false
+}
+
+func (iter *Iterator) skipObject() {
+}
+
+func (iter *Iterator) skipArray() {
+}

+ 13 - 4
jsoniter_skip_test.go

@@ -8,15 +8,24 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-func Test_skip_number(t *testing.T) {
+func Test_skip_number_in_array(t *testing.T) {
+	should := require.New(t)
 	iter := ParseString(ConfigDefault, `[-0.12, "stream"]`)
 	iter.ReadArray()
 	iter.Skip()
 	iter.ReadArray()
-	if iter.ReadString() != "stream" {
-		t.FailNow()
-	}
+	should.Nil(iter.Error)
+	should.Equal("stream", iter.ReadString())
+}
 
+func Test_skip_string_in_array(t *testing.T) {
+	should := require.New(t)
+	iter := ParseString(ConfigDefault, `["hello", "stream"]`)
+	iter.ReadArray()
+	iter.Skip()
+	iter.ReadArray()
+	should.Nil(iter.Error)
+	should.Equal("stream", iter.ReadString())
 }
 
 func Test_skip_null(t *testing.T) {

+ 2 - 0
jsoniter_find_end_test.go → jsoniter_sloppy_test.go

@@ -1,3 +1,5 @@
+//+build jsoniter-sloppy
+
 package jsoniter
 
 import (

+ 16 - 0
skip_tests/number/inputs.go

@@ -0,0 +1,16 @@
+package test
+
+type typeForTest float64
+
+var inputs = []string{
+	"+1",    // invalid
+	"-a",    // invalid
+	"-\x00", // invalid, zero byte
+	"0.1",   // valid
+	"0..1",  // invalid, more dot
+	"1e+1",  // valid
+	"1+1",   // invalid
+	"1E1",   // valid, e or E
+	"1ee1",  // invalid
+	"100a",  // invalid
+}

+ 34 - 0
skip_tests/number/skip_test.go

@@ -0,0 +1,34 @@
+package test
+
+import (
+	"encoding/json"
+	"io"
+	"testing"
+	"github.com/stretchr/testify/require"
+	"github.com/json-iterator/go"
+	"errors"
+)
+
+func Test_skip(t *testing.T) {
+	for _, input := range inputs {
+		t.Run(input, func(t *testing.T) {
+			should := require.New(t)
+			dst := typeForTest(0)
+			stdErr := json.Unmarshal([]byte(input), &dst)
+			iter := jsoniter.ParseString(jsoniter.ConfigDefault, input)
+			iter.Skip()
+			iter.ReadNil() // trigger looking forward
+			err := iter.Error
+			if err == io.EOF {
+				err = nil
+			} else {
+				err = errors.New("remaining bytes")
+			}
+			if stdErr == nil {
+				should.Nil(err)
+			} else {
+				should.NotNil(err)
+			}
+		})
+	}
+}

+ 14 - 0
skip_tests/string/inputs.go

@@ -0,0 +1,14 @@
+package test
+
+type typeForTest string
+
+var inputs = []string{
+	`""`, // valid
+	`"hello"`, // valid
+	`"`, // invalid
+	`"\"`, // invalid
+	`"\x00"`, // invalid
+	"\"\x00\"", // invalid
+	"\"\t\"", // invalid
+	`"\t"`, // valid
+}

+ 1 - 0
skip_tests/string/skip_test.go

@@ -0,0 +1 @@
+../number/skip_test.go