| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858 |
- package jsoniter
- import (
- "io"
- "fmt"
- "unicode/utf16"
- "strconv"
- "unsafe"
- )
- var digits []byte
- func init() {
- digits = make([]byte, 256)
- for i := 0; i < len(digits); i++ {
- digits[i] = 255
- }
- for i := '0'; i <= '9'; i++ {
- digits[i] = byte(i - '0');
- }
- for i := 'a'; i <= 'f'; i++ {
- digits[i] = byte((i - 'a') + 10);
- }
- for i := 'A'; i <= 'F'; i++ {
- digits[i] = byte((i - 'A') + 10);
- }
- }
- type Iterator struct {
- reader io.Reader
- buf []byte
- head int
- tail int
- Error error
- }
- func Parse(reader io.Reader, bufSize int) *Iterator {
- iter := &Iterator{
- reader: reader,
- buf: make([]byte, bufSize),
- head: 0,
- tail: 0,
- }
- iter.skipWhitespaces()
- return iter
- }
- func ParseBytes(input []byte) *Iterator {
- iter := &Iterator{
- reader: nil,
- buf: input,
- head: 0,
- tail: len(input),
- }
- iter.skipWhitespaces()
- return iter
- }
- func (iter *Iterator) Reuse(input []byte) *Iterator {
- // only for benchmarking
- iter.reader = nil
- iter.Error = nil
- iter.buf = input
- iter.head = 0
- iter.tail = len(input)
- iter.skipWhitespaces()
- return iter
- }
- func ParseString(input string) *Iterator {
- return ParseBytes([]byte(input))
- }
- func (iter *Iterator) skipWhitespaces() {
- for {
- for i := iter.head; i < iter.tail; i++ {
- c := iter.buf[i]
- switch c {
- case ' ', '\n', '\t', 'r':
- continue
- }
- iter.head = i
- return
- }
- if !iter.loadMore() {
- return
- }
- }
- }
- func (iter *Iterator) nextToken() byte {
- // a variation of skip whitespaces, returning the next non-whitespace token
- for {
- for i := iter.head; i < iter.tail; i++ {
- c := iter.buf[i]
- switch c {
- case ' ', '\n', '\t', 'r':
- continue
- }
- iter.head = i+1
- return c
- }
- if !iter.loadMore() {
- return 0
- }
- }
- }
- func (iter *Iterator) ReportError(operation string, msg string) {
- if iter.Error != nil {
- return
- }
- peekStart := iter.head - 10
- if peekStart < 0 {
- peekStart = 0
- }
- iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
- string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
- }
- func (iter *Iterator) CurrentBuffer() string {
- peekStart := iter.head - 10
- if peekStart < 0 {
- peekStart = 0
- }
- return fmt.Sprintf("parsing %v ...%s... at %s", iter.head,
- string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
- }
- func (iter *Iterator) readByte() (ret byte) {
- if iter.head == iter.tail {
- if iter.loadMore() {
- ret = iter.buf[iter.head]
- iter.head++
- return ret
- }
- }
- ret = iter.buf[iter.head]
- iter.head++
- return ret
- }
- func (iter *Iterator) loadMore() bool {
- if iter.reader == nil {
- iter.Error = io.EOF
- return false
- }
- for {
- n, err := iter.reader.Read(iter.buf)
- if n == 0 {
- if err != nil {
- iter.Error = err
- return false
- } else {
- // n == 0, err == nil is not EOF
- continue
- }
- } else {
- iter.head = 0
- iter.tail = n
- return true
- }
- }
- }
- func (iter *Iterator) unreadByte() {
- if iter.head == 0 {
- iter.ReportError("unreadByte", "unread too many bytes")
- return
- }
- iter.head -= 1
- return
- }
- const maxUint64 = (1 << 64 - 1)
- const cutoffUint64 = maxUint64 / 10 + 1
- const maxUint32 = (1 << 32 - 1)
- const cutoffUint32 = maxUint32 / 10 + 1
- func (iter *Iterator) ReadUint() (ret uint) {
- val := iter.ReadUint64()
- converted := uint(val)
- if uint64(converted) != val {
- iter.ReportError("ReadUint", "int overflow")
- return
- }
- return converted
- }
- func (iter *Iterator) ReadUint8() (ret uint8) {
- val := iter.ReadUint64()
- converted := uint8(val)
- if uint64(converted) != val {
- iter.ReportError("ReadUint8", "int overflow")
- return
- }
- return converted
- }
- func (iter *Iterator) ReadUint16() (ret uint16) {
- val := iter.ReadUint64()
- converted := uint16(val)
- if uint64(converted) != val {
- iter.ReportError("ReadUint16", "int overflow")
- return
- }
- return converted
- }
- func (iter *Iterator) ReadUint32() (ret uint32) {
- val := iter.ReadUint64()
- converted := uint32(val)
- if uint64(converted) != val {
- iter.ReportError("ReadUint32", "int overflow")
- return
- }
- return converted
- }
- func (iter *Iterator) ReadUint64() (ret uint64) {
- c := iter.readByte()
- v := digits[c]
- if v == 0 {
- return 0 // single zero
- }
- if v == 255 {
- iter.ReportError("ReadUint64", "unexpected character")
- return
- }
- for {
- if ret >= cutoffUint64 {
- iter.ReportError("ReadUint64", "overflow")
- return
- }
- ret = ret * 10 + uint64(v)
- c = iter.readByte()
- v = digits[c]
- if v == 255 {
- iter.unreadByte()
- break
- }
- }
- return ret
- }
- func (iter *Iterator) ReadInt() (ret int) {
- val := iter.ReadInt64()
- converted := int(val)
- if int64(converted) != val {
- iter.ReportError("ReadInt", "int overflow")
- return
- }
- return converted
- }
- func (iter *Iterator) ReadInt8() (ret int8) {
- val := iter.ReadInt64()
- converted := int8(val)
- if int64(converted) != val {
- iter.ReportError("ReadInt8", "int overflow")
- return
- }
- return converted
- }
- func (iter *Iterator) ReadInt16() (ret int16) {
- val := iter.ReadInt64()
- converted := int16(val)
- if int64(converted) != val {
- iter.ReportError("ReadInt16", "int overflow")
- return
- }
- return converted
- }
- func (iter *Iterator) ReadInt32() (ret int32) {
- val := iter.ReadInt64()
- converted := int32(val)
- if int64(converted) != val {
- iter.ReportError("ReadInt32", "int overflow")
- return
- }
- return converted
- }
- func (iter *Iterator) ReadInt64() (ret int64) {
- c := iter.readByte()
- if iter.Error != nil {
- return
- }
- /* optional leading minus */
- if c == '-' {
- n := iter.ReadUint64()
- return -int64(n)
- } else {
- iter.unreadByte()
- n := iter.ReadUint64()
- return int64(n)
- }
- }
- func (iter *Iterator) ReadString() (ret string) {
- return string(iter.ReadStringAsBytes())
- }
- // adapted from: https://github.com/buger/jsonparser/blob/master/parser.go
- // Tries to find the end of string
- // Support if string contains escaped quote symbols.
- func (iter *Iterator) findStringEnd() (int, bool) {
- escaped := false
- for i := iter.head; i < iter.tail; i++ {
- c := iter.buf[i]
- if c == '"' {
- if !escaped {
- return i + 1, false
- } else {
- j := i - 1
- for {
- if j < iter.head || iter.buf[j] != '\\' {
- // even number of backslashes
- // either end of buffer, or " found
- return i + 1, true
- }
- j--
- if j < iter.head || iter.buf[j] != '\\' {
- // odd number of backslashes
- // it is \" or \\\"
- break
- }
- j--
- }
- }
- } else if c == '\\' {
- escaped = true
- }
- }
- j := iter.tail - 1
- for {
- if j < iter.head || iter.buf[j] != '\\' {
- // even number of backslashes
- // either end of buffer, or " found
- return -1, false // do not end with \
- }
- j--
- if j < iter.head || iter.buf[j] != '\\' {
- // odd number of backslashes
- // it is \" or \\\"
- break
- }
- j--
- }
- return -1, true // end with \
- }
- func (iter *Iterator) ReadStringAsBytes() (ret []byte) {
- c := iter.readByte()
- if c == 'n' {
- iter.skipUntilBreak()
- return
- }
- if c != '"' {
- iter.ReportError("ReadString", `expects " or n`)
- return
- }
- end, escaped := iter.findStringEnd()
- if end != -1 && !escaped {
- // fast path: reuse the underlying buffer
- ret = iter.buf[iter.head:end-1]
- iter.head = end
- return ret
- }
- str := make([]byte, 0, 8)
- for iter.Error == nil {
- c = iter.readByte()
- if c == '"' {
- return str
- }
- if c == '\\' {
- c = iter.readByte()
- if iter.Error != nil {
- return
- }
- switch c {
- case 'u':
- r := iter.readU4()
- if iter.Error != nil {
- return
- }
- if utf16.IsSurrogate(r) {
- c = iter.readByte()
- if iter.Error != nil {
- return
- }
- if c != '\\' {
- iter.ReportError("ReadString",
- `expects \u after utf16 surrogate, but \ not found`)
- return
- }
- c = iter.readByte()
- if iter.Error != nil {
- return
- }
- if c != 'u' {
- iter.ReportError("ReadString",
- `expects \u after utf16 surrogate, but \u not found`)
- return
- }
- r2 := iter.readU4()
- if iter.Error != nil {
- return
- }
- combined := utf16.DecodeRune(r, r2)
- str = appendRune(str, combined)
- } else {
- str = appendRune(str, r)
- }
- case '"':
- str = append(str, '"')
- case '\\':
- str = append(str, '\\')
- case '/':
- str = append(str, '/')
- case 'b':
- str = append(str, '\b')
- case 'f':
- str = append(str, '\f')
- case 'n':
- str = append(str, '\n')
- case 'r':
- str = append(str, '\r')
- case 't':
- str = append(str, '\t')
- default:
- iter.ReportError("ReadString",
- `invalid escape char after \`)
- return
- }
- } else {
- str = append(str, c)
- }
- }
- return
- }
- func (iter *Iterator) readU4() (ret rune) {
- for i := 0; i < 4; i++ {
- c := iter.readByte()
- if iter.Error != nil {
- return
- }
- if (c >= '0' && c <= '9') {
- if ret >= cutoffUint32 {
- iter.ReportError("readU4", "overflow")
- return
- }
- ret = ret * 16 + rune(c - '0')
- } else if ((c >= 'a' && c <= 'f') ) {
- if ret >= cutoffUint32 {
- iter.ReportError("readU4", "overflow")
- return
- }
- ret = ret * 16 + rune(c - 'a' + 10)
- } else {
- iter.ReportError("readU4", "expects 0~9 or a~f")
- return
- }
- }
- return ret
- }
- const (
- t1 = 0x00 // 0000 0000
- tx = 0x80 // 1000 0000
- t2 = 0xC0 // 1100 0000
- t3 = 0xE0 // 1110 0000
- t4 = 0xF0 // 1111 0000
- t5 = 0xF8 // 1111 1000
- maskx = 0x3F // 0011 1111
- mask2 = 0x1F // 0001 1111
- mask3 = 0x0F // 0000 1111
- mask4 = 0x07 // 0000 0111
- rune1Max = 1 << 7 - 1
- rune2Max = 1 << 11 - 1
- rune3Max = 1 << 16 - 1
- surrogateMin = 0xD800
- surrogateMax = 0xDFFF
- MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
- RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
- )
- func appendRune(p []byte, r rune) []byte {
- // Negative values are erroneous. Making it unsigned addresses the problem.
- switch i := uint32(r); {
- case i <= rune1Max:
- p = append(p, byte(r))
- return p
- case i <= rune2Max:
- p = append(p, t2 | byte(r >> 6))
- p = append(p, tx | byte(r) & maskx)
- return p
- case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
- r = RuneError
- fallthrough
- case i <= rune3Max:
- p = append(p, t3 | byte(r >> 12))
- p = append(p, tx | byte(r >> 6) & maskx)
- p = append(p, tx | byte(r) & maskx)
- return p
- default:
- p = append(p, t4 | byte(r >> 18))
- p = append(p, tx | byte(r >> 12) & maskx)
- p = append(p, tx | byte(r >> 6) & maskx)
- p = append(p, tx | byte(r) & maskx)
- return p
- }
- }
- func (iter *Iterator) ReadArray() (ret bool) {
- c := iter.nextToken()
- if iter.Error != nil {
- return
- }
- switch c {
- case 'n': {
- iter.skipUntilBreak()
- return false // null
- }
- case '[': {
- c = iter.nextToken()
- if iter.Error != nil {
- return
- }
- if c == ']' {
- return false
- } else {
- iter.unreadByte()
- return true
- }
- }
- case ']': return false
- case ',':
- iter.skipWhitespaces()
- return true
- default:
- iter.ReportError("ReadArray", "expect [ or , or ] or n")
- return
- }
- }
- func (iter *Iterator) ReadArrayCB(cb func()) {
- c := iter.nextToken()
- if c == 'n' {
- iter.skipUntilBreak()
- return // null
- }
- if c != '[' {
- iter.ReportError("ReadArrayCB", "expect [ or n")
- return
- }
- c = iter.nextToken()
- if c == ']' {
- return // []
- } else {
- iter.unreadByte()
- }
- for {
- if iter.Error != nil {
- return
- }
- cb()
- c = iter.nextToken()
- if c == ']' {
- return
- }
- if c != ',' {
- iter.ReportError("ReadArrayCB", "expect , or ]")
- return
- }
- iter.skipWhitespaces()
- }
- }
- func (iter *Iterator) ReadObjectCB(cb func(string)) {
- c := iter.nextToken()
- if c == 'n' {
- iter.skipUntilBreak()
- return // null
- }
- if c != '{' {
- iter.ReportError("ReadObjectCB", "expect { or n")
- return
- }
- c = iter.nextToken()
- if c == '}' {
- return // []
- } else {
- iter.unreadByte()
- }
- for {
- iter.skipWhitespaces()
- field := iter.readObjectField()
- if iter.Error != nil {
- return
- }
- cb(field)
- c = iter.nextToken()
- if c == '}' {
- return // end of object
- }
- if c != ',' {
- iter.ReportError("ReadObjectCB", `expect ,`)
- return
- }
- }
- }
- func (iter *Iterator) ReadObject() (ret string) {
- c := iter.nextToken()
- if iter.Error != nil {
- return
- }
- switch c {
- case 'n': {
- iter.skipUntilBreak()
- if iter.Error != nil {
- return
- }
- return "" // null
- }
- case '{': {
- c = iter.nextToken()
- if iter.Error != nil {
- return
- }
- switch c {
- case '}':
- return "" // end of object
- case '"':
- iter.unreadByte()
- return iter.readObjectField()
- default:
- iter.ReportError("ReadObject", `expect " after {`)
- return
- }
- }
- case ',':
- iter.skipWhitespaces()
- return iter.readObjectField()
- case '}':
- return "" // end of object
- default:
- iter.ReportError("ReadObject", `expect { or , or } or n`)
- return
- }
- }
- func (iter *Iterator) readObjectField() (ret string) {
- str := iter.ReadStringAsBytes()
- field := *(*string)(unsafe.Pointer(&str))
- c := iter.nextToken()
- if c != ':' {
- iter.ReportError("ReadObject", "expect : after object field")
- return
- }
- iter.skipWhitespaces()
- return field
- }
- func (iter *Iterator) ReadFloat32() (ret float32) {
- str := make([]byte, 0, 4)
- for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
- switch c {
- case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- str = append(str, c)
- continue
- default:
- iter.unreadByte()
- }
- break
- }
- if iter.Error != nil && iter.Error != io.EOF {
- return
- }
- val, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&str)), 32)
- if err != nil {
- iter.Error = err
- return
- }
- return float32(val)
- }
- func (iter *Iterator) ReadFloat64() (ret float64) {
- str := make([]byte, 0, 4)
- for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
- switch c {
- case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- str = append(str, c)
- continue
- default:
- iter.unreadByte()
- }
- break
- }
- if iter.Error != nil && iter.Error != io.EOF {
- return
- }
- val, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&str)), 64)
- if err != nil {
- iter.Error = err
- return
- }
- return val
- }
- func (iter *Iterator) ReadBool() (ret bool) {
- c := iter.readByte()
- if iter.Error != nil {
- return
- }
- switch c {
- case 't':
- iter.skipUntilBreak()
- if iter.Error != nil {
- return
- }
- return true
- case 'f':
- iter.skipUntilBreak()
- if iter.Error != nil {
- return
- }
- return false
- default:
- iter.ReportError("ReadBool", "expect t or f")
- return
- }
- }
- func (iter *Iterator) ReadNull() (ret bool) {
- c := iter.readByte()
- if c == 'n' {
- iter.skipUntilBreak()
- return true
- }
- iter.unreadByte()
- return false
- }
- func (iter *Iterator) Skip() {
- c := iter.readByte()
- switch c {
- case '"':
- iter.skipString()
- case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 't', 'f', 'n':
- iter.skipUntilBreak()
- case '[':
- iter.skipArray()
- case '{':
- iter.skipObject()
- default:
- iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
- return
- }
- }
- func (iter *Iterator) skipString() {
- for {
- end, escaped := iter.findStringEnd()
- if end == -1 {
- if !iter.loadMore() {
- return
- }
- if escaped {
- iter.head = 1 // skip the first char as last char read is \
- }
- } else {
- iter.head = end
- return
- }
- }
- }
- func (iter *Iterator) skipArray() {
- level := 1
- for {
- for i := iter.head; i < iter.tail; i++ {
- switch iter.buf[i] {
- case '"': // If inside string, skip it
- iter.head = i + 1
- iter.skipString()
- i = iter.head - 1 // it will be i++ soon
- case '[': // If open symbol, increase level
- level++
- case ']': // If close symbol, increase level
- level--
- // If we have returned to the original level, we're done
- if level == 0 {
- iter.head = i + 1
- return
- }
- }
- }
- if (!iter.loadMore()) {
- return
- }
- }
- }
- func (iter *Iterator) skipObject() {
- level := 1
- for {
- for i := iter.head; i < iter.tail; i++ {
- switch iter.buf[i] {
- case '"': // If inside string, skip it
- iter.head = i + 1
- iter.skipString()
- i = iter.head - 1 // it will be i++ soon
- case '{': // If open symbol, increase level
- level++
- case '}': // If close symbol, increase level
- level--
- // If we have returned to the original level, we're done
- if level == 0 {
- iter.head = i + 1
- return
- }
- }
- }
- if (!iter.loadMore()) {
- return
- }
- }
- }
- func (iter *Iterator) skipUntilBreak() {
- // true, false, null, number
- for {
- for i := iter.head; i < iter.tail; i++ {
- c := iter.buf[i]
- switch c {
- case ' ', '\n', '\r', '\t', ',', '}', ']':
- iter.head = i
- return
- }
- }
- if (!iter.loadMore()) {
- return
- }
- }
- }
|