iterator.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784
  1. package jsoniter
  2. import (
  3. "encoding/base64"
  4. "fmt"
  5. "io"
  6. "unicode/utf16"
  7. )
  8. type ValueType int
  9. const (
  10. Invalid ValueType = iota
  11. String
  12. Number
  13. Null
  14. Bool
  15. Array
  16. Object
  17. )
  18. var atoiDigits []byte
  19. var valueTypes []ValueType
  20. func init() {
  21. atoiDigits = make([]byte, 256)
  22. for i := 0; i < len(atoiDigits); i++ {
  23. atoiDigits[i] = 255
  24. }
  25. for i := '0'; i <= '9'; i++ {
  26. atoiDigits[i] = byte(i - '0')
  27. }
  28. for i := 'a'; i <= 'f'; i++ {
  29. atoiDigits[i] = byte((i - 'a') + 10)
  30. }
  31. for i := 'A'; i <= 'F'; i++ {
  32. atoiDigits[i] = byte((i - 'A') + 10)
  33. }
  34. valueTypes = make([]ValueType, 256)
  35. for i := 0; i < len(valueTypes); i++ {
  36. valueTypes[i] = Invalid
  37. }
  38. valueTypes['"'] = String
  39. valueTypes['-'] = Number
  40. valueTypes['0'] = Number
  41. valueTypes['1'] = Number
  42. valueTypes['2'] = Number
  43. valueTypes['3'] = Number
  44. valueTypes['4'] = Number
  45. valueTypes['5'] = Number
  46. valueTypes['6'] = Number
  47. valueTypes['7'] = Number
  48. valueTypes['8'] = Number
  49. valueTypes['9'] = Number
  50. valueTypes['t'] = Bool
  51. valueTypes['f'] = Bool
  52. valueTypes['n'] = Null
  53. valueTypes['['] = Array
  54. valueTypes['{'] = Object
  55. }
  56. // Iterator is a fast and flexible JSON parser
  57. type Iterator struct {
  58. reader io.Reader
  59. buf []byte
  60. head int
  61. tail int
  62. Error error
  63. }
  64. // Create creates an empty Iterator instance
  65. func NewIterator() *Iterator {
  66. return &Iterator{
  67. reader: nil,
  68. buf: nil,
  69. head: 0,
  70. tail: 0,
  71. }
  72. }
  73. // Parse parses a json buffer in io.Reader into an Iterator instance
  74. func Parse(reader io.Reader, bufSize int) *Iterator {
  75. return &Iterator{
  76. reader: reader,
  77. buf: make([]byte, bufSize),
  78. head: 0,
  79. tail: 0,
  80. }
  81. }
  82. // ParseBytes parses a json byte slice into an Iterator instance
  83. func ParseBytes(input []byte) *Iterator {
  84. return &Iterator{
  85. reader: nil,
  86. buf: input,
  87. head: 0,
  88. tail: len(input),
  89. }
  90. }
  91. // ParseString parses a json string into an Iterator instance
  92. func ParseString(input string) *Iterator {
  93. return ParseBytes([]byte(input))
  94. }
  95. // Reset can reset an Iterator instance for another json buffer in io.Reader
  96. func (iter *Iterator) Reset(reader io.Reader) *Iterator {
  97. iter.reader = reader
  98. iter.head = 0
  99. iter.tail = 0
  100. return iter
  101. }
  102. // ResetBytes can reset an Iterator instance for another json byte slice
  103. func (iter *Iterator) ResetBytes(input []byte) *Iterator {
  104. iter.reader = nil
  105. iter.Error = nil
  106. iter.buf = input
  107. iter.head = 0
  108. iter.tail = len(input)
  109. return iter
  110. }
  111. // WhatIsNext gets ValueType of relatively next json object
  112. func (iter *Iterator) WhatIsNext() ValueType {
  113. valueType := valueTypes[iter.nextToken()]
  114. iter.unreadByte()
  115. return valueType
  116. }
  117. func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
  118. for i := iter.head; i < iter.tail; i++ {
  119. c := iter.buf[i]
  120. switch c {
  121. case ' ', '\n', '\t', '\r':
  122. continue
  123. }
  124. iter.head = i
  125. return false
  126. }
  127. return true
  128. }
  129. func (iter *Iterator) nextToken() byte {
  130. // a variation of skip whitespaces, returning the next non-whitespace token
  131. for {
  132. for i := iter.head; i < iter.tail; i++ {
  133. c := iter.buf[i]
  134. switch c {
  135. case ' ', '\n', '\t', '\r':
  136. continue
  137. }
  138. iter.head = i + 1
  139. return c
  140. }
  141. if !iter.loadMore() {
  142. return 0
  143. }
  144. }
  145. }
  146. func (iter *Iterator) reportError(operation string, msg string) {
  147. if iter.Error != nil {
  148. return
  149. }
  150. peekStart := iter.head - 10
  151. if peekStart < 0 {
  152. peekStart = 0
  153. }
  154. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  155. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  156. }
  157. // CurrentBuffer gets current buffer as string
  158. func (iter *Iterator) CurrentBuffer() string {
  159. peekStart := iter.head - 10
  160. if peekStart < 0 {
  161. peekStart = 0
  162. }
  163. return fmt.Sprintf("parsing %v ...|%s|... at %s", iter.head,
  164. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  165. }
  166. func (iter *Iterator) readByte() (ret byte) {
  167. if iter.head == iter.tail {
  168. if iter.loadMore() {
  169. ret = iter.buf[iter.head]
  170. iter.head++
  171. return ret
  172. }
  173. return 0
  174. }
  175. ret = iter.buf[iter.head]
  176. iter.head++
  177. return ret
  178. }
  179. func (iter *Iterator) loadMore() bool {
  180. if iter.reader == nil {
  181. iter.Error = io.EOF
  182. return false
  183. }
  184. for {
  185. n, err := iter.reader.Read(iter.buf)
  186. if n == 0 {
  187. if err != nil {
  188. iter.Error = err
  189. return false
  190. }
  191. } else {
  192. iter.head = 0
  193. iter.tail = n
  194. return true
  195. }
  196. }
  197. }
  198. func (iter *Iterator) unreadByte() {
  199. if iter.head == 0 {
  200. iter.reportError("unreadByte", "unread too many bytes")
  201. return
  202. }
  203. iter.head--
  204. return
  205. }
  206. const maxUint64 = (1<<64 - 1)
  207. const cutoffUint64 = maxUint64/10 + 1
  208. const maxUint32 = (1<<32 - 1)
  209. const cutoffUint32 = maxUint32/10 + 1
  210. // ReadUint reads a json object as Uint
  211. func (iter *Iterator) ReadUint() (ret uint) {
  212. val := iter.ReadUint64()
  213. converted := uint(val)
  214. if uint64(converted) != val {
  215. iter.reportError("ReadUint", "int overflow")
  216. return
  217. }
  218. return converted
  219. }
  220. // ReadUint8 reads a json object as Uint8
  221. func (iter *Iterator) ReadUint8() (ret uint8) {
  222. val := iter.ReadUint64()
  223. converted := uint8(val)
  224. if uint64(converted) != val {
  225. iter.reportError("ReadUint8", "int overflow")
  226. return
  227. }
  228. return converted
  229. }
  230. // ReadUint16 reads a json object as Uint16
  231. func (iter *Iterator) ReadUint16() (ret uint16) {
  232. val := iter.ReadUint64()
  233. converted := uint16(val)
  234. if uint64(converted) != val {
  235. iter.reportError("ReadUint16", "int overflow")
  236. return
  237. }
  238. return converted
  239. }
  240. // ReadUint32 reads a json object as Uint32
  241. func (iter *Iterator) ReadUint32() (ret uint32) {
  242. val := iter.ReadUint64()
  243. converted := uint32(val)
  244. if uint64(converted) != val {
  245. iter.reportError("ReadUint32", "int overflow")
  246. return
  247. }
  248. return converted
  249. }
  250. // ReadUint64 reads a json object as Uint64
  251. func (iter *Iterator) ReadUint64() (ret uint64) {
  252. c := iter.nextToken()
  253. v := atoiDigits[c]
  254. if v == 0 {
  255. return 0 // single zero
  256. }
  257. if v == 255 {
  258. iter.reportError("ReadUint64", "unexpected character")
  259. return
  260. }
  261. for {
  262. if ret >= cutoffUint64 {
  263. iter.reportError("ReadUint64", "overflow")
  264. return
  265. }
  266. ret = ret*10 + uint64(v)
  267. c = iter.readByte()
  268. v = atoiDigits[c]
  269. if v == 255 {
  270. iter.unreadByte()
  271. break
  272. }
  273. }
  274. return ret
  275. }
  276. // ReadInt reads a json object as Int
  277. func (iter *Iterator) ReadInt() (ret int) {
  278. val := iter.ReadInt64()
  279. converted := int(val)
  280. if int64(converted) != val {
  281. iter.reportError("ReadInt", "int overflow")
  282. return
  283. }
  284. return converted
  285. }
  286. // ReadInt8 reads a json object as Int8
  287. func (iter *Iterator) ReadInt8() (ret int8) {
  288. val := iter.ReadInt64()
  289. converted := int8(val)
  290. if int64(converted) != val {
  291. iter.reportError("ReadInt8", "int overflow")
  292. return
  293. }
  294. return converted
  295. }
  296. // ReadInt16 reads a json object as Int16
  297. func (iter *Iterator) ReadInt16() (ret int16) {
  298. val := iter.ReadInt64()
  299. converted := int16(val)
  300. if int64(converted) != val {
  301. iter.reportError("ReadInt16", "int overflow")
  302. return
  303. }
  304. return converted
  305. }
  306. // ReadInt32 reads a json object as Int32
  307. func (iter *Iterator) ReadInt32() (ret int32) {
  308. val := iter.ReadInt64()
  309. converted := int32(val)
  310. if int64(converted) != val {
  311. iter.reportError("ReadInt32", "int overflow")
  312. return
  313. }
  314. return converted
  315. }
  316. // ReadInt64 reads a json object as Int64
  317. func (iter *Iterator) ReadInt64() (ret int64) {
  318. c := iter.nextToken()
  319. if iter.Error != nil {
  320. return
  321. }
  322. /* optional leading minus */
  323. if c == '-' {
  324. n := iter.ReadUint64()
  325. return -int64(n)
  326. }
  327. iter.unreadByte()
  328. n := iter.ReadUint64()
  329. return int64(n)
  330. }
  331. // ReadString reads a json object as String
  332. func (iter *Iterator) ReadString() (ret string) {
  333. return string(iter.readStringAsBytes())
  334. }
  335. func (iter *Iterator) readStringAsBytes() (ret []byte) {
  336. c := iter.nextToken()
  337. if c == '"' {
  338. end := iter.findStringEndWithoutEscape()
  339. if end != -1 {
  340. // fast path: reuse the underlying buffer
  341. ret = iter.buf[iter.head : end-1]
  342. iter.head = end
  343. return ret
  344. }
  345. return iter.readStringAsBytesSlowPath()
  346. }
  347. if c == 'n' {
  348. iter.skipUntilBreak()
  349. return
  350. }
  351. iter.reportError("ReadString", `expects " or n`)
  352. return
  353. }
  354. func (iter *Iterator) readStringAsBytesSlowPath() (ret []byte) {
  355. str := make([]byte, 0, 8)
  356. var c byte
  357. for iter.Error == nil {
  358. c = iter.readByte()
  359. if c == '"' {
  360. return str
  361. }
  362. if c == '\\' {
  363. c = iter.readByte()
  364. if iter.Error != nil {
  365. return
  366. }
  367. switch c {
  368. case 'u':
  369. r := iter.readU4()
  370. if iter.Error != nil {
  371. return
  372. }
  373. if utf16.IsSurrogate(r) {
  374. c = iter.readByte()
  375. if iter.Error != nil {
  376. return
  377. }
  378. if c != '\\' {
  379. iter.reportError("ReadString",
  380. `expects \u after utf16 surrogate, but \ not found`)
  381. return
  382. }
  383. c = iter.readByte()
  384. if iter.Error != nil {
  385. return
  386. }
  387. if c != 'u' {
  388. iter.reportError("ReadString",
  389. `expects \u after utf16 surrogate, but \u not found`)
  390. return
  391. }
  392. r2 := iter.readU4()
  393. if iter.Error != nil {
  394. return
  395. }
  396. combined := utf16.DecodeRune(r, r2)
  397. str = appendRune(str, combined)
  398. } else {
  399. str = appendRune(str, r)
  400. }
  401. case '"':
  402. str = append(str, '"')
  403. case '\\':
  404. str = append(str, '\\')
  405. case '/':
  406. str = append(str, '/')
  407. case 'b':
  408. str = append(str, '\b')
  409. case 'f':
  410. str = append(str, '\f')
  411. case 'n':
  412. str = append(str, '\n')
  413. case 'r':
  414. str = append(str, '\r')
  415. case 't':
  416. str = append(str, '\t')
  417. default:
  418. iter.reportError("ReadString",
  419. `invalid escape char after \`)
  420. return
  421. }
  422. } else {
  423. str = append(str, c)
  424. }
  425. }
  426. return
  427. }
  428. func (iter *Iterator) readU4() (ret rune) {
  429. for i := 0; i < 4; i++ {
  430. c := iter.readByte()
  431. if iter.Error != nil {
  432. return
  433. }
  434. if c >= '0' && c <= '9' {
  435. if ret >= cutoffUint32 {
  436. iter.reportError("readU4", "overflow")
  437. return
  438. }
  439. ret = ret*16 + rune(c-'0')
  440. } else if c >= 'a' && c <= 'f' {
  441. if ret >= cutoffUint32 {
  442. iter.reportError("readU4", "overflow")
  443. return
  444. }
  445. ret = ret*16 + rune(c-'a'+10)
  446. } else {
  447. iter.reportError("readU4", "expects 0~9 or a~f")
  448. return
  449. }
  450. }
  451. return ret
  452. }
  453. const (
  454. t1 = 0x00 // 0000 0000
  455. tx = 0x80 // 1000 0000
  456. t2 = 0xC0 // 1100 0000
  457. t3 = 0xE0 // 1110 0000
  458. t4 = 0xF0 // 1111 0000
  459. t5 = 0xF8 // 1111 1000
  460. maskx = 0x3F // 0011 1111
  461. mask2 = 0x1F // 0001 1111
  462. mask3 = 0x0F // 0000 1111
  463. mask4 = 0x07 // 0000 0111
  464. rune1Max = 1<<7 - 1
  465. rune2Max = 1<<11 - 1
  466. rune3Max = 1<<16 - 1
  467. surrogateMin = 0xD800
  468. surrogateMax = 0xDFFF
  469. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  470. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  471. )
  472. func appendRune(p []byte, r rune) []byte {
  473. // Negative values are erroneous. Making it unsigned addresses the problem.
  474. switch i := uint32(r); {
  475. case i <= rune1Max:
  476. p = append(p, byte(r))
  477. return p
  478. case i <= rune2Max:
  479. p = append(p, t2|byte(r>>6))
  480. p = append(p, tx|byte(r)&maskx)
  481. return p
  482. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  483. r = RuneError
  484. fallthrough
  485. case i <= rune3Max:
  486. p = append(p, t3|byte(r>>12))
  487. p = append(p, tx|byte(r>>6)&maskx)
  488. p = append(p, tx|byte(r)&maskx)
  489. return p
  490. default:
  491. p = append(p, t4|byte(r>>18))
  492. p = append(p, tx|byte(r>>12)&maskx)
  493. p = append(p, tx|byte(r>>6)&maskx)
  494. p = append(p, tx|byte(r)&maskx)
  495. return p
  496. }
  497. }
  498. // ReadArray reads a json object as Array
  499. func (iter *Iterator) ReadArray() (ret bool) {
  500. c := iter.nextToken()
  501. if iter.Error != nil {
  502. return
  503. }
  504. switch c {
  505. case 'n':
  506. iter.skipUntilBreak()
  507. return false // null
  508. case '[':
  509. c = iter.nextToken()
  510. if iter.Error != nil {
  511. return
  512. }
  513. if c == ']' {
  514. return false
  515. }
  516. iter.unreadByte()
  517. return true
  518. case ']':
  519. return false
  520. case ',':
  521. return true
  522. default:
  523. iter.reportError("ReadArray", "expect [ or , or ] or n, but found: "+string([]byte{c}))
  524. return
  525. }
  526. }
  527. // ReadBool reads a json object as Bool
  528. func (iter *Iterator) ReadBool() (ret bool) {
  529. c := iter.nextToken()
  530. if iter.Error != nil {
  531. return
  532. }
  533. switch c {
  534. case 't':
  535. iter.skipUntilBreak()
  536. return true
  537. case 'f':
  538. iter.skipUntilBreak()
  539. return false
  540. default:
  541. iter.reportError("ReadBool", "expect t or f")
  542. return
  543. }
  544. }
  545. // ReadBase64 reads a json object as Base64 in byte slice
  546. func (iter *Iterator) ReadBase64() (ret []byte) {
  547. src := iter.readStringAsBytes()
  548. if iter.Error != nil {
  549. return
  550. }
  551. b64 := base64.StdEncoding
  552. ret = make([]byte, b64.DecodedLen(len(src)))
  553. n, err := b64.Decode(ret, src)
  554. if err != nil {
  555. iter.Error = err
  556. return
  557. }
  558. return ret[:n]
  559. }
  560. // ReadNil reads a json object as nil and
  561. // returns whether it's a nil or not
  562. func (iter *Iterator) ReadNil() (ret bool) {
  563. c := iter.nextToken()
  564. if c == 'n' {
  565. iter.skipUntilBreak()
  566. return true
  567. }
  568. iter.unreadByte()
  569. return false
  570. }
  571. // Skip skips a json object and positions to relatively the next json object
  572. func (iter *Iterator) Skip() {
  573. c := iter.nextToken()
  574. switch c {
  575. case '"':
  576. iter.skipString()
  577. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 't', 'f', 'n':
  578. iter.skipUntilBreak()
  579. case '[':
  580. iter.skipArray()
  581. case '{':
  582. iter.skipObject()
  583. default:
  584. iter.reportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
  585. return
  586. }
  587. }
  588. func (iter *Iterator) skipString() {
  589. for {
  590. end, escaped := iter.findStringEnd()
  591. if end == -1 {
  592. if !iter.loadMore() {
  593. return
  594. }
  595. if escaped {
  596. iter.head = 1 // skip the first char as last char read is \
  597. }
  598. } else {
  599. iter.head = end
  600. return
  601. }
  602. }
  603. }
  604. // adapted from: https://github.com/buger/jsonparser/blob/master/parser.go
  605. // Tries to find the end of string
  606. // Support if string contains escaped quote symbols.
  607. func (iter *Iterator) findStringEnd() (int, bool) {
  608. escaped := false
  609. for i := iter.head; i < iter.tail; i++ {
  610. c := iter.buf[i]
  611. if c == '"' {
  612. if !escaped {
  613. return i + 1, false
  614. }
  615. j := i - 1
  616. for {
  617. if j < iter.head || iter.buf[j] != '\\' {
  618. // even number of backslashes
  619. // either end of buffer, or " found
  620. return i + 1, true
  621. }
  622. j--
  623. if j < iter.head || iter.buf[j] != '\\' {
  624. // odd number of backslashes
  625. // it is \" or \\\"
  626. break
  627. }
  628. j--
  629. }
  630. } else if c == '\\' {
  631. escaped = true
  632. }
  633. }
  634. j := iter.tail - 1
  635. for {
  636. if j < iter.head || iter.buf[j] != '\\' {
  637. // even number of backslashes
  638. // either end of buffer, or " found
  639. return -1, false // do not end with \
  640. }
  641. j--
  642. if j < iter.head || iter.buf[j] != '\\' {
  643. // odd number of backslashes
  644. // it is \" or \\\"
  645. break
  646. }
  647. j--
  648. }
  649. return -1, true // end with \
  650. }
  651. func (iter *Iterator) findStringEndWithoutEscape() int {
  652. for i := iter.head; i < iter.tail; i++ {
  653. c := iter.buf[i]
  654. if c == '"' {
  655. return i + 1
  656. } else if c == '\\' {
  657. return -1
  658. }
  659. }
  660. return -1
  661. }
  662. func (iter *Iterator) skipArray() {
  663. level := 1
  664. for {
  665. for i := iter.head; i < iter.tail; i++ {
  666. switch iter.buf[i] {
  667. case '"': // If inside string, skip it
  668. iter.head = i + 1
  669. iter.skipString()
  670. i = iter.head - 1 // it will be i++ soon
  671. case '[': // If open symbol, increase level
  672. level++
  673. case ']': // If close symbol, increase level
  674. level--
  675. // If we have returned to the original level, we're done
  676. if level == 0 {
  677. iter.head = i + 1
  678. return
  679. }
  680. }
  681. }
  682. if !iter.loadMore() {
  683. return
  684. }
  685. }
  686. }
  687. func (iter *Iterator) skipObject() {
  688. level := 1
  689. for {
  690. for i := iter.head; i < iter.tail; i++ {
  691. switch iter.buf[i] {
  692. case '"': // If inside string, skip it
  693. iter.head = i + 1
  694. iter.skipString()
  695. i = iter.head - 1 // it will be i++ soon
  696. case '{': // If open symbol, increase level
  697. level++
  698. case '}': // If close symbol, increase level
  699. level--
  700. // If we have returned to the original level, we're done
  701. if level == 0 {
  702. iter.head = i + 1
  703. return
  704. }
  705. }
  706. }
  707. if !iter.loadMore() {
  708. return
  709. }
  710. }
  711. }
  712. func (iter *Iterator) skipUntilBreak() {
  713. // true, false, null, number
  714. for {
  715. for i := iter.head; i < iter.tail; i++ {
  716. c := iter.buf[i]
  717. switch c {
  718. case ' ', '\n', '\r', '\t', ',', '}', ']':
  719. iter.head = i
  720. return
  721. }
  722. }
  723. if !iter.loadMore() {
  724. return
  725. }
  726. }
  727. }