Bladeren bron

implement lazy object three iteration way

Tao Wen 8 jaren geleden
bovenliggende
commit
2d647f04ca
10 gewijzigde bestanden met toevoegingen van 398 en 24 verwijderingen
  1. 61 9
      feature_any.go
  2. 10 0
      feature_any_bool.go
  3. 5 0
      feature_any_float.go
  4. 5 0
      feature_any_int.go
  5. 5 0
      feature_any_invalid.go
  6. 5 0
      feature_any_nil.go
  7. 218 0
      feature_any_object.go
  8. 5 0
      feature_any_string.go
  9. 2 0
      feature_iter_skip.go
  10. 82 15
      jsoniter_object_test.go

+ 61 - 9
feature_any.go

@@ -11,31 +11,50 @@ type Any interface {
 	ToFloat32() float32
 	ToFloat64() float64
 	ToString() string
+	Get(path ...interface{}) Any
+	Keys() []string
+	IterateObject() (func() (string, Any, bool), bool)
+}
+
+type baseAny struct {}
+
+func (any *baseAny) Keys() []string {
+	return []string{}
+}
+
+func (any *baseAny) IterateObject() (func() (string, Any, bool), bool) {
+	return nil, false
 }
 
 func (iter *Iterator) ReadAny() Any {
+	return iter.readAny(nil)
+}
+
+func (iter *Iterator) readAny(reusableIter *Iterator) Any {
 	c := iter.nextToken()
 	switch c {
 	case '"':
-		return iter.readStringAny()
+		return iter.readStringAny(reusableIter)
 	case 'n':
 		iter.skipFixedBytes(3) // null
 		return &nilAny{}
 	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 		iter.unreadByte()
-		return iter.readNumberAny()
+		return iter.readNumberAny(reusableIter)
 	case 't':
 		iter.skipFixedBytes(3) // true
 		return &trueAny{}
 	case 'f':
 		iter.skipFixedBytes(4) // false
 		return &falseAny{}
+	case '{':
+		return iter.readObjectAny(reusableIter)
 	}
 	iter.reportError("ReadAny", fmt.Sprintf("unexpected character: %v", c))
 	return &invalidAny{}
 }
 
-func (iter *Iterator) readNumberAny() Any {
+func (iter *Iterator) readNumberAny(reusableIter *Iterator) Any {
 	dotFound := false
 	var lazyBuf []byte
 	for {
@@ -50,9 +69,9 @@ func (iter *Iterator) readNumberAny() Any {
 				lazyBuf = append(lazyBuf, iter.buf[iter.head:i]...)
 				iter.head = i
 				if dotFound {
-					return &floatLazyAny{lazyBuf, nil, nil, 0}
+					return &floatLazyAny{baseAny{},lazyBuf, reusableIter, nil, 0}
 				} else {
-					return &intLazyAny{lazyBuf, nil, nil, 0}
+					return &intLazyAny{baseAny{}, lazyBuf, reusableIter, nil, 0}
 				}
 			}
 		}
@@ -60,15 +79,15 @@ func (iter *Iterator) readNumberAny() Any {
 		if !iter.loadMore() {
 			iter.head = iter.tail
 			if dotFound {
-				return &floatLazyAny{lazyBuf, nil, nil, 0}
+				return &floatLazyAny{baseAny{}, lazyBuf, reusableIter, nil, 0}
 			} else {
-				return &intLazyAny{lazyBuf, nil, nil, 0}
+				return &intLazyAny{baseAny{}, lazyBuf, reusableIter, nil, 0}
 			}
 		}
 	}
 }
 
-func (iter *Iterator) readStringAny() Any {
+func (iter *Iterator) readStringAny(reusableIter *Iterator) Any {
 	lazyBuf := make([]byte, 1, 8)
 	lazyBuf[0] = '"'
 	for {
@@ -85,7 +104,40 @@ func (iter *Iterator) readStringAny() Any {
 		} else {
 			lazyBuf = append(lazyBuf, iter.buf[iter.head:end]...)
 			iter.head = end
-			return &stringLazyAny{lazyBuf, nil, nil, ""}
+			return &stringLazyAny{baseAny{}, lazyBuf, reusableIter, nil, ""}
+		}
+	}
+}
+
+func (iter *Iterator) readObjectAny(reusableIter *Iterator) Any {
+	level := 1
+	lazyBuf := make([]byte, 1, 32)
+	lazyBuf[0] = '{'
+	for {
+		start := iter.head
+		for i := iter.head; i < iter.tail; i++ {
+			switch iter.buf[i] {
+			case '"': // If inside string, skip it
+				iter.head = i + 1
+				iter.skipString()
+				i = iter.head - 1 // it will be i++ soon
+			case '{': // If open symbol, increase level
+				level++
+			case '}': // If close symbol, increase level
+				level--
+
+				// If we have returned to the original level, we're done
+				if level == 0 {
+					iter.head = i + 1
+					lazyBuf = append(lazyBuf, iter.buf[start:iter.head]...)
+					return &objectLazyAny{lazyBuf, reusableIter, nil, nil, lazyBuf}
+				}
+			}
+		}
+		lazyBuf = append(lazyBuf, iter.buf[iter.head:iter.tail]...)
+		if !iter.loadMore() {
+			iter.reportError("skipObject", "incomplete object")
+			return &invalidAny{}
 		}
 	}
 }

+ 10 - 0
feature_any_bool.go

@@ -1,6 +1,7 @@
 package jsoniter
 
 type trueAny struct {
+	baseAny
 }
 
 func (any *trueAny) LastError() error {
@@ -35,7 +36,12 @@ func (any *trueAny) ToString() string {
 	return "true"
 }
 
+func (any *trueAny) Get(path ...interface{}) Any {
+	return &invalidAny{}
+}
+
 type falseAny struct {
+	baseAny
 }
 
 func (any *falseAny) LastError() error {
@@ -69,3 +75,7 @@ func (any *falseAny) ToFloat64() float64 {
 func (any *falseAny) ToString() string {
 	return "false"
 }
+
+func (any *falseAny) Get(path ...interface{}) Any {
+	return &invalidAny{}
+}

+ 5 - 0
feature_any_float.go

@@ -6,6 +6,7 @@ import (
 )
 
 type floatLazyAny struct {
+	baseAny
 	buf []byte
 	iter *Iterator
 	err error
@@ -63,4 +64,8 @@ func (any *floatLazyAny) ToFloat64() float64 {
 
 func (any *floatLazyAny) ToString() string {
 	return *(*string)(unsafe.Pointer(&any.buf))
+}
+
+func (any *floatLazyAny) Get(path ...interface{}) Any {
+	return &invalidAny{}
 }

+ 5 - 0
feature_any_int.go

@@ -6,6 +6,7 @@ import (
 )
 
 type intLazyAny struct {
+	baseAny
 	buf   []byte
 	iter  *Iterator
 	err   error
@@ -63,4 +64,8 @@ func (any *intLazyAny) ToFloat64() float64 {
 
 func (any *intLazyAny) ToString() string {
 	return *(*string)(unsafe.Pointer(&any.buf))
+}
+
+func (any *intLazyAny) Get(path ...interface{}) Any {
+	return &invalidAny{}
 }

+ 5 - 0
feature_any_invalid.go

@@ -1,6 +1,7 @@
 package jsoniter
 
 type invalidAny struct {
+	baseAny
 }
 
 func (any *invalidAny) LastError() error {
@@ -34,3 +35,7 @@ func (any *invalidAny) ToFloat64() float64 {
 func (any *invalidAny) ToString() string {
 	return ""
 }
+
+func (any *invalidAny) Get(path ...interface{}) Any {
+	return any
+}

+ 5 - 0
feature_any_nil.go

@@ -1,6 +1,7 @@
 package jsoniter
 
 type nilAny struct {
+	baseAny
 }
 
 func (any *nilAny) LastError() error {
@@ -34,3 +35,7 @@ func (any *nilAny) ToFloat64() float64 {
 func (any *nilAny) ToString() string {
 	return ""
 }
+
+func (any *nilAny) Get(path ...interface{}) Any {
+	return &invalidAny{}
+}

+ 218 - 0
feature_any_object.go

@@ -0,0 +1,218 @@
+package jsoniter
+
+import (
+	"unsafe"
+)
+
+type objectLazyAny struct {
+	buf       []byte
+	iter      *Iterator
+	err       error
+	cache     map[string]Any
+	remaining []byte
+}
+
+func (any *objectLazyAny) parse() *Iterator {
+	iter := any.iter
+	if iter == nil {
+		iter = NewIterator()
+		any.iter = iter
+	}
+	iter.ResetBytes(any.remaining)
+	return iter
+}
+
+func (any *objectLazyAny) fillCacheUntil(target string) Any {
+	if any.remaining == nil {
+		return any.cache[target]
+	}
+	if any.cache == nil {
+		any.cache = map[string]Any{}
+	}
+	val := any.cache[target]
+	if val != nil {
+		return val
+	}
+	iter := any.parse()
+	if len(any.remaining) == len(any.buf) {
+		iter.head++
+		c := iter.nextToken()
+		if c != '}' {
+			iter.unreadByte()
+			k := string(iter.readObjectFieldAsBytes())
+			v := iter.readAny(iter)
+			any.cache[k] = v
+			if target == k {
+				any.remaining = iter.buf[iter.head:]
+				return v
+			}
+		} else {
+			any.remaining = nil
+			return nil
+		}
+	}
+	for iter.nextToken() == ',' {
+		k := string(iter.readObjectFieldAsBytes())
+		v := iter.readAny(iter)
+		any.cache[k] = v
+		if target == k {
+			any.remaining = iter.buf[iter.head:]
+			return v
+		}
+	}
+	any.remaining = nil
+	return nil
+}
+
+func (any *objectLazyAny) fillCache() {
+	if any.remaining == nil {
+		return
+	}
+	if any.cache == nil {
+		any.cache = map[string]Any{}
+	}
+	iter := any.parse()
+	if len(any.remaining) == len(any.buf) {
+		iter.head++
+		c := iter.nextToken()
+		if c != '}' {
+			iter.unreadByte()
+			k := string(iter.readObjectFieldAsBytes())
+			v := iter.readAny(iter)
+			any.cache[k] = v
+		} else {
+			any.remaining = nil
+			return
+		}
+	}
+	for iter.nextToken() == ',' {
+		k := string(iter.readObjectFieldAsBytes())
+		v := iter.readAny(iter)
+		any.cache[k] = v
+	}
+	any.remaining = nil
+	return
+}
+
+func (any *objectLazyAny) LastError() error {
+	return nil
+}
+
+func (any *objectLazyAny) ToBool() bool {
+	return false
+}
+
+func (any *objectLazyAny) ToInt() int {
+	return 0
+}
+
+func (any *objectLazyAny) ToInt32() int32 {
+	return 0
+}
+
+func (any *objectLazyAny) ToInt64() int64 {
+	return 0
+}
+
+func (any *objectLazyAny) ToFloat32() float32 {
+	return 0
+}
+
+func (any *objectLazyAny) ToFloat64() float64 {
+	return 0
+}
+
+func (any *objectLazyAny) ToString() string {
+	if len(any.remaining) == len(any.buf) {
+		// nothing has been parsed yet
+		return *(*string)(unsafe.Pointer(&any.buf))
+	} else {
+		// TODO: serialize the cache
+		return ""
+	}
+}
+
+func (any *objectLazyAny) Get(path ...interface{}) Any {
+	key := path[0].(string)
+	return any.fillCacheUntil(key)
+}
+
+func (any *objectLazyAny) Keys() []string {
+	any.fillCache()
+	keys := make([]string, 0, len(any.cache))
+	for key := range any.cache {
+		keys = append(keys, key)
+	}
+	return keys
+}
+func (any *objectLazyAny) IterateObject() (func() (string, Any, bool), bool) {
+	if any.cache == nil {
+		any.cache = map[string]Any{}
+	}
+	remaining := any.remaining
+	if len(remaining) == len(any.buf) {
+		iter := any.parse()
+		iter.head++
+		c := iter.nextToken()
+		if c != '}' {
+			iter.unreadByte()
+			k := string(iter.readObjectFieldAsBytes())
+			v := iter.readAny(iter)
+			any.cache[k] = v
+			remaining = iter.buf[iter.head:]
+			any.remaining = remaining
+		} else {
+			remaining = nil
+			any.remaining = nil
+			return nil, false
+		}
+	}
+	if len(any.cache) == 0 {
+		return nil, false
+	}
+	keys := make([]string, 0, len(any.cache))
+	values := make([]Any, 0, len(any.cache))
+	for key, value := range any.cache {
+		keys = append(keys, key)
+		values = append(values, value)
+	}
+	nextKey := keys[0]
+	nextValue := values[0]
+	i := 1
+	return func() (string, Any, bool) {
+		key := nextKey
+		value := nextValue
+		if i < len(keys) {
+			// read from cache
+			nextKey = keys[i]
+			nextValue = values[i]
+			i++
+			return key, value, true
+		} else {
+			if remaining == nil {
+				return "", nil, false
+			}
+			// read from buffer
+			iter := any.iter
+			if iter == nil {
+				iter = NewIterator()
+				any.iter = iter
+			}
+			iter.ResetBytes(remaining)
+			c := iter.nextToken()
+			if c == ',' {
+				nextKey = string(iter.readObjectFieldAsBytes())
+				nextValue = iter.readAny(iter)
+				any.cache[nextKey] = nextValue
+				remaining = iter.buf[iter.head:]
+				any.remaining = remaining
+				return key, value, true
+			} else {
+				remaining = nil
+				any.remaining = nil
+				return key, value, false
+			}
+		}
+	}, true
+}
+

+ 5 - 0
feature_any_string.go

@@ -5,6 +5,7 @@ import (
 )
 
 type stringLazyAny struct{
+	baseAny
 	buf   []byte
 	iter  *Iterator
 	err   error
@@ -95,4 +96,8 @@ func (any *stringLazyAny) ToFloat64() float64 {
 func (any *stringLazyAny) ToString() string {
 	any.fillCache()
 	return any.cache
+}
+
+func (any *stringLazyAny) Get(path ...interface{}) Any {
+	return &invalidAny{}
 }

+ 2 - 0
feature_iter_skip.go

@@ -141,6 +141,7 @@ func (iter *Iterator) skipArray() {
 			}
 		}
 		if !iter.loadMore() {
+			iter.reportError("skipObject", "incomplete array")
 			return
 		}
 	}
@@ -168,6 +169,7 @@ func (iter *Iterator) skipObject() {
 			}
 		}
 		if !iter.loadMore() {
+			iter.reportError("skipObject", "incomplete object")
 			return
 		}
 	}

+ 82 - 15
jsoniter_object_test.go

@@ -36,27 +36,18 @@ func Test_one_field(t *testing.T) {
 }
 
 func Test_two_field(t *testing.T) {
+	should := require.New(t)
 	iter := ParseString(`{ "a": "b" , "c": "d" }`)
 	field := iter.ReadObject()
-	if field != "a" {
-		t.Fatal(field)
-	}
+	should.Equal("a", field)
 	value := iter.ReadString()
-	if value != "b" {
-		t.Fatal(field)
-	}
+	should.Equal("b", value)
 	field = iter.ReadObject()
-	if field != "c" {
-		t.Fatal(field)
-	}
+	should.Equal("c", field)
 	value = iter.ReadString()
-	if value != "d" {
-		t.Fatal(field)
-	}
+	should.Equal("d", value)
 	field = iter.ReadObject()
-	if field != "" {
-		t.Fatal(field)
-	}
+	should.Equal("", field)
 	iter = ParseString(`{"field1": "1", "field2": 2}`)
 	for field := iter.ReadObject(); field != ""; field = iter.ReadObject() {
 		switch field {
@@ -70,6 +61,82 @@ func Test_two_field(t *testing.T) {
 	}
 }
 
+func Test_read_object_as_any(t *testing.T) {
+	should := require.New(t)
+	any, err := UnmarshalAnyFromString(`{"a":"b","c":"d"}`)
+	should.Nil(err)
+	should.Equal(`{"a":"b","c":"d"}`, any.ToString())
+	// partial parse
+	should.Equal("b", any.Get("a").ToString())
+	should.Equal("d", any.Get("c").ToString())
+	should.Equal(2, len(any.Keys()))
+	any, err = UnmarshalAnyFromString(`{"a":"b","c":"d"}`)
+	// full parse
+	should.Equal(2, len(any.Keys()))
+}
+
+func Test_object_any_lazy_iterator(t *testing.T) {
+	should := require.New(t)
+	any, err := UnmarshalAnyFromString(`{"a":"b","c":"d"}`)
+	should.Nil(err)
+	// iterator parse
+	vals := map[string]string{}
+	var k string
+	var v Any
+	next, hasNext := any.IterateObject()
+	should.True(hasNext)
+
+	k, v, hasNext = next()
+	should.True(hasNext)
+	vals[k] = v.ToString()
+
+	// trigger full parse
+	should.Equal(2, len(any.Keys()))
+
+	k, v, hasNext = next()
+	should.False(hasNext)
+	vals[k] = v.ToString()
+
+	should.Equal(map[string]string{"a":"b", "c":"d"}, vals)
+	vals = map[string]string{}
+	for next, hasNext := any.IterateObject(); hasNext; k, v, hasNext = next() {
+		vals[k] = v.ToString()
+	}
+	should.Equal(map[string]string{"a":"b", "c":"d"}, vals)
+}
+
+
+func Test_object_any_with_two_lazy_iterators(t *testing.T) {
+	should := require.New(t)
+	any, err := UnmarshalAnyFromString(`{"a":"b","c":"d","e":"f"}`)
+	should.Nil(err)
+	var k string
+	var v Any
+	next1, hasNext1 := any.IterateObject()
+	next2, hasNext2 := any.IterateObject()
+	should.True(hasNext1)
+	k, v, hasNext1 = next1()
+	should.True(hasNext1)
+	should.Equal("a", k)
+	should.Equal("b", v.ToString())
+
+	should.True(hasNext2)
+	k, v, hasNext2 = next2()
+	should.True(hasNext2)
+	should.Equal("a", k)
+	should.Equal("b", v.ToString())
+
+	k, v, hasNext1 = next1()
+	should.True(hasNext1)
+	should.Equal("c", k)
+	should.Equal("d", v.ToString())
+
+	k, v, hasNext2 = next2()
+	should.True(hasNext2)
+	should.Equal("c", k)
+	should.Equal("d", v.ToString())
+}
+
 func Test_write_object(t *testing.T) {
 	should := require.New(t)
 	buf := &bytes.Buffer{}