jsoniter.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822
  1. package jsoniter
  2. import (
  3. "io"
  4. "fmt"
  5. "unicode/utf16"
  6. "strconv"
  7. "unsafe"
  8. )
  9. var digits []byte
  10. func init() {
  11. digits = make([]byte, 256)
  12. for i := 0; i < len(digits); i++ {
  13. digits[i] = 255
  14. }
  15. for i := '0'; i <= '9'; i++ {
  16. digits[i] = byte(i - '0');
  17. }
  18. for i := 'a'; i <= 'f'; i++ {
  19. digits[i] = byte((i - 'a') + 10);
  20. }
  21. for i := 'A'; i <= 'F'; i++ {
  22. digits[i] = byte((i - 'A') + 10);
  23. }
  24. }
  25. type Iterator struct {
  26. reader io.Reader
  27. buf []byte
  28. head int
  29. tail int
  30. Error error
  31. }
  32. func Parse(reader io.Reader, bufSize int) *Iterator {
  33. iter := &Iterator{
  34. reader: reader,
  35. buf: make([]byte, bufSize),
  36. head: 0,
  37. tail: 0,
  38. }
  39. iter.skipWhitespaces()
  40. return iter
  41. }
  42. func ParseBytes(input []byte) *Iterator {
  43. iter := &Iterator{
  44. reader: nil,
  45. buf: input,
  46. head: 0,
  47. tail: len(input),
  48. }
  49. iter.skipWhitespaces()
  50. return iter
  51. }
  52. func (iter *Iterator) Reuse(input []byte) *Iterator {
  53. // only for benchmarking
  54. iter.reader = nil
  55. iter.Error = nil
  56. iter.buf = input
  57. iter.head = 0
  58. iter.tail = len(input)
  59. iter.skipWhitespaces()
  60. return iter
  61. }
  62. func ParseString(input string) *Iterator {
  63. return ParseBytes([]byte(input))
  64. }
  65. func (iter *Iterator) skipWhitespaces() {
  66. for {
  67. for i := iter.head; i < iter.tail; i++ {
  68. c := iter.buf[i]
  69. switch c {
  70. case ' ', '\n', '\t', '\r':
  71. continue
  72. }
  73. iter.head = i
  74. return
  75. }
  76. if !iter.loadMore() {
  77. return
  78. }
  79. }
  80. }
  81. func (iter *Iterator) nextToken() byte {
  82. // a variation of skip whitespaces, returning the next non-whitespace token
  83. for {
  84. for i := iter.head; i < iter.tail; i++ {
  85. c := iter.buf[i]
  86. switch c {
  87. case ' ', '\n', '\t', 'r':
  88. continue
  89. }
  90. iter.head = i+1
  91. return c
  92. }
  93. if !iter.loadMore() {
  94. return 0
  95. }
  96. }
  97. }
  98. func (iter *Iterator) ReportError(operation string, msg string) {
  99. if iter.Error != nil {
  100. return
  101. }
  102. peekStart := iter.head - 10
  103. if peekStart < 0 {
  104. peekStart = 0
  105. }
  106. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  107. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  108. }
  109. func (iter *Iterator) CurrentBuffer() string {
  110. peekStart := iter.head - 10
  111. if peekStart < 0 {
  112. peekStart = 0
  113. }
  114. return fmt.Sprintf("parsing %v ...%s... at %s", iter.head,
  115. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  116. }
  117. func (iter *Iterator) readByte() (ret byte) {
  118. if iter.head == iter.tail {
  119. if iter.loadMore() {
  120. ret = iter.buf[iter.head]
  121. iter.head++
  122. return ret
  123. } else {
  124. return 0
  125. }
  126. }
  127. ret = iter.buf[iter.head]
  128. iter.head++
  129. return ret
  130. }
  131. func (iter *Iterator) loadMore() bool {
  132. if iter.reader == nil {
  133. iter.Error = io.EOF
  134. return false
  135. }
  136. for {
  137. n, err := iter.reader.Read(iter.buf)
  138. if n == 0 {
  139. if err != nil {
  140. iter.Error = err
  141. return false
  142. } else {
  143. // n == 0, err == nil is not EOF
  144. continue
  145. }
  146. } else {
  147. iter.head = 0
  148. iter.tail = n
  149. return true
  150. }
  151. }
  152. }
  153. func (iter *Iterator) unreadByte() {
  154. if iter.head == 0 {
  155. iter.ReportError("unreadByte", "unread too many bytes")
  156. return
  157. }
  158. iter.head -= 1
  159. return
  160. }
  161. const maxUint64 = (1 << 64 - 1)
  162. const cutoffUint64 = maxUint64 / 10 + 1
  163. const maxUint32 = (1 << 32 - 1)
  164. const cutoffUint32 = maxUint32 / 10 + 1
  165. func (iter *Iterator) ReadUint() (ret uint) {
  166. val := iter.ReadUint64()
  167. converted := uint(val)
  168. if uint64(converted) != val {
  169. iter.ReportError("ReadUint", "int overflow")
  170. return
  171. }
  172. return converted
  173. }
  174. func (iter *Iterator) ReadUint8() (ret uint8) {
  175. val := iter.ReadUint64()
  176. converted := uint8(val)
  177. if uint64(converted) != val {
  178. iter.ReportError("ReadUint8", "int overflow")
  179. return
  180. }
  181. return converted
  182. }
  183. func (iter *Iterator) ReadUint16() (ret uint16) {
  184. val := iter.ReadUint64()
  185. converted := uint16(val)
  186. if uint64(converted) != val {
  187. iter.ReportError("ReadUint16", "int overflow")
  188. return
  189. }
  190. return converted
  191. }
  192. func (iter *Iterator) ReadUint32() (ret uint32) {
  193. val := iter.ReadUint64()
  194. converted := uint32(val)
  195. if uint64(converted) != val {
  196. iter.ReportError("ReadUint32", "int overflow")
  197. return
  198. }
  199. return converted
  200. }
  201. func (iter *Iterator) ReadUint64() (ret uint64) {
  202. c := iter.readByte()
  203. v := digits[c]
  204. if v == 0 {
  205. return 0 // single zero
  206. }
  207. if v == 255 {
  208. iter.ReportError("ReadUint64", "unexpected character")
  209. return
  210. }
  211. for {
  212. if ret >= cutoffUint64 {
  213. iter.ReportError("ReadUint64", "overflow")
  214. return
  215. }
  216. ret = ret * 10 + uint64(v)
  217. c = iter.readByte()
  218. v = digits[c]
  219. if v == 255 {
  220. iter.unreadByte()
  221. break
  222. }
  223. }
  224. return ret
  225. }
  226. func (iter *Iterator) ReadInt() (ret int) {
  227. val := iter.ReadInt64()
  228. converted := int(val)
  229. if int64(converted) != val {
  230. iter.ReportError("ReadInt", "int overflow")
  231. return
  232. }
  233. return converted
  234. }
  235. func (iter *Iterator) ReadInt8() (ret int8) {
  236. val := iter.ReadInt64()
  237. converted := int8(val)
  238. if int64(converted) != val {
  239. iter.ReportError("ReadInt8", "int overflow")
  240. return
  241. }
  242. return converted
  243. }
  244. func (iter *Iterator) ReadInt16() (ret int16) {
  245. val := iter.ReadInt64()
  246. converted := int16(val)
  247. if int64(converted) != val {
  248. iter.ReportError("ReadInt16", "int overflow")
  249. return
  250. }
  251. return converted
  252. }
  253. func (iter *Iterator) ReadInt32() (ret int32) {
  254. val := iter.ReadInt64()
  255. converted := int32(val)
  256. if int64(converted) != val {
  257. iter.ReportError("ReadInt32", "int overflow")
  258. return
  259. }
  260. return converted
  261. }
  262. func (iter *Iterator) ReadInt64() (ret int64) {
  263. c := iter.readByte()
  264. if iter.Error != nil {
  265. return
  266. }
  267. /* optional leading minus */
  268. if c == '-' {
  269. n := iter.ReadUint64()
  270. return -int64(n)
  271. } else {
  272. iter.unreadByte()
  273. n := iter.ReadUint64()
  274. return int64(n)
  275. }
  276. }
  277. func (iter *Iterator) ReadString() (ret string) {
  278. return string(iter.ReadStringAsBytes())
  279. }
  280. func (iter *Iterator) ReadStringAsBytes() (ret []byte) {
  281. c := iter.readByte()
  282. if c == 'n' {
  283. iter.skipUntilBreak()
  284. return
  285. }
  286. if c != '"' {
  287. iter.ReportError("ReadString", `expects " or n`)
  288. return
  289. }
  290. end := iter.findStringEndWithoutEscape()
  291. if end != -1 {
  292. // fast path: reuse the underlying buffer
  293. ret = iter.buf[iter.head:end-1]
  294. iter.head = end
  295. return ret
  296. }
  297. str := make([]byte, 0, 8)
  298. for iter.Error == nil {
  299. c = iter.readByte()
  300. if c == '"' {
  301. return str
  302. }
  303. if c == '\\' {
  304. c = iter.readByte()
  305. if iter.Error != nil {
  306. return
  307. }
  308. switch c {
  309. case 'u':
  310. r := iter.readU4()
  311. if iter.Error != nil {
  312. return
  313. }
  314. if utf16.IsSurrogate(r) {
  315. c = iter.readByte()
  316. if iter.Error != nil {
  317. return
  318. }
  319. if c != '\\' {
  320. iter.ReportError("ReadString",
  321. `expects \u after utf16 surrogate, but \ not found`)
  322. return
  323. }
  324. c = iter.readByte()
  325. if iter.Error != nil {
  326. return
  327. }
  328. if c != 'u' {
  329. iter.ReportError("ReadString",
  330. `expects \u after utf16 surrogate, but \u not found`)
  331. return
  332. }
  333. r2 := iter.readU4()
  334. if iter.Error != nil {
  335. return
  336. }
  337. combined := utf16.DecodeRune(r, r2)
  338. str = appendRune(str, combined)
  339. } else {
  340. str = appendRune(str, r)
  341. }
  342. case '"':
  343. str = append(str, '"')
  344. case '\\':
  345. str = append(str, '\\')
  346. case '/':
  347. str = append(str, '/')
  348. case 'b':
  349. str = append(str, '\b')
  350. case 'f':
  351. str = append(str, '\f')
  352. case 'n':
  353. str = append(str, '\n')
  354. case 'r':
  355. str = append(str, '\r')
  356. case 't':
  357. str = append(str, '\t')
  358. default:
  359. iter.ReportError("ReadString",
  360. `invalid escape char after \`)
  361. return
  362. }
  363. } else {
  364. str = append(str, c)
  365. }
  366. }
  367. return
  368. }
  369. func (iter *Iterator) readU4() (ret rune) {
  370. for i := 0; i < 4; i++ {
  371. c := iter.readByte()
  372. if iter.Error != nil {
  373. return
  374. }
  375. if (c >= '0' && c <= '9') {
  376. if ret >= cutoffUint32 {
  377. iter.ReportError("readU4", "overflow")
  378. return
  379. }
  380. ret = ret * 16 + rune(c - '0')
  381. } else if ((c >= 'a' && c <= 'f') ) {
  382. if ret >= cutoffUint32 {
  383. iter.ReportError("readU4", "overflow")
  384. return
  385. }
  386. ret = ret * 16 + rune(c - 'a' + 10)
  387. } else {
  388. iter.ReportError("readU4", "expects 0~9 or a~f")
  389. return
  390. }
  391. }
  392. return ret
  393. }
  394. const (
  395. t1 = 0x00 // 0000 0000
  396. tx = 0x80 // 1000 0000
  397. t2 = 0xC0 // 1100 0000
  398. t3 = 0xE0 // 1110 0000
  399. t4 = 0xF0 // 1111 0000
  400. t5 = 0xF8 // 1111 1000
  401. maskx = 0x3F // 0011 1111
  402. mask2 = 0x1F // 0001 1111
  403. mask3 = 0x0F // 0000 1111
  404. mask4 = 0x07 // 0000 0111
  405. rune1Max = 1 << 7 - 1
  406. rune2Max = 1 << 11 - 1
  407. rune3Max = 1 << 16 - 1
  408. surrogateMin = 0xD800
  409. surrogateMax = 0xDFFF
  410. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  411. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  412. )
  413. func appendRune(p []byte, r rune) []byte {
  414. // Negative values are erroneous. Making it unsigned addresses the problem.
  415. switch i := uint32(r); {
  416. case i <= rune1Max:
  417. p = append(p, byte(r))
  418. return p
  419. case i <= rune2Max:
  420. p = append(p, t2 | byte(r >> 6))
  421. p = append(p, tx | byte(r) & maskx)
  422. return p
  423. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  424. r = RuneError
  425. fallthrough
  426. case i <= rune3Max:
  427. p = append(p, t3 | byte(r >> 12))
  428. p = append(p, tx | byte(r >> 6) & maskx)
  429. p = append(p, tx | byte(r) & maskx)
  430. return p
  431. default:
  432. p = append(p, t4 | byte(r >> 18))
  433. p = append(p, tx | byte(r >> 12) & maskx)
  434. p = append(p, tx | byte(r >> 6) & maskx)
  435. p = append(p, tx | byte(r) & maskx)
  436. return p
  437. }
  438. }
  439. func (iter *Iterator) ReadArray() (ret bool) {
  440. c := iter.nextToken()
  441. if iter.Error != nil {
  442. return
  443. }
  444. switch c {
  445. case 'n': {
  446. iter.skipUntilBreak()
  447. return false // null
  448. }
  449. case '[': {
  450. c = iter.nextToken()
  451. if iter.Error != nil {
  452. return
  453. }
  454. if c == ']' {
  455. return false
  456. } else {
  457. iter.unreadByte()
  458. return true
  459. }
  460. }
  461. case ']': return false
  462. case ',':
  463. iter.skipWhitespaces()
  464. return true
  465. default:
  466. iter.ReportError("ReadArray", "expect [ or , or ] or n")
  467. return
  468. }
  469. }
  470. func (iter *Iterator) ReadObject() (ret string) {
  471. c := iter.nextToken()
  472. if iter.Error != nil {
  473. return
  474. }
  475. switch c {
  476. case 'n': {
  477. iter.skipUntilBreak()
  478. if iter.Error != nil {
  479. return
  480. }
  481. return "" // null
  482. }
  483. case '{': {
  484. c = iter.nextToken()
  485. if iter.Error != nil {
  486. return
  487. }
  488. switch c {
  489. case '}':
  490. return "" // end of object
  491. case '"':
  492. iter.unreadByte()
  493. return iter.readObjectField()
  494. default:
  495. iter.ReportError("ReadObject", `expect " after {`)
  496. return
  497. }
  498. }
  499. case ',':
  500. iter.skipWhitespaces()
  501. return iter.readObjectField()
  502. case '}':
  503. return "" // end of object
  504. default:
  505. iter.ReportError("ReadObject", `expect { or , or } or n`)
  506. return
  507. }
  508. }
  509. func (iter *Iterator) readObjectField() (ret string) {
  510. str := iter.ReadStringAsBytes()
  511. field := *(*string)(unsafe.Pointer(&str))
  512. c := iter.nextToken()
  513. if c != ':' {
  514. iter.ReportError("ReadObject", "expect : after object field")
  515. return
  516. }
  517. iter.skipWhitespaces()
  518. return field
  519. }
  520. func (iter *Iterator) ReadFloat32() (ret float32) {
  521. strBuf := [8]byte{}
  522. str := strBuf[0:0]
  523. hasMore := true
  524. for(hasMore) {
  525. for i := iter.head; i < iter.tail; i++ {
  526. c := iter.buf[i]
  527. switch c {
  528. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  529. str = append(str, c)
  530. continue
  531. default:
  532. hasMore = false
  533. break
  534. }
  535. }
  536. if hasMore {
  537. if !iter.loadMore() {
  538. break
  539. }
  540. }
  541. }
  542. if iter.Error != nil && iter.Error != io.EOF {
  543. return
  544. }
  545. val, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&str)), 32)
  546. if err != nil {
  547. iter.Error = err
  548. return
  549. }
  550. return float32(val)
  551. }
  552. func (iter *Iterator) ReadFloat64() (ret float64) {
  553. strBuf := [8]byte{}
  554. str := strBuf[0:0]
  555. hasMore := true
  556. for(hasMore) {
  557. for i := iter.head; i < iter.tail; i++ {
  558. c := iter.buf[i]
  559. switch c {
  560. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  561. str = append(str, c)
  562. continue
  563. default:
  564. hasMore = false
  565. break
  566. }
  567. }
  568. if hasMore {
  569. if !iter.loadMore() {
  570. break
  571. }
  572. }
  573. }
  574. if iter.Error != nil && iter.Error != io.EOF {
  575. return
  576. }
  577. val, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&str)), 64)
  578. if err != nil {
  579. iter.Error = err
  580. return
  581. }
  582. return val
  583. }
  584. func (iter *Iterator) ReadBool() (ret bool) {
  585. c := iter.readByte()
  586. if iter.Error != nil {
  587. return
  588. }
  589. switch c {
  590. case 't':
  591. iter.skipUntilBreak()
  592. return true
  593. case 'f':
  594. iter.skipUntilBreak()
  595. return false
  596. default:
  597. iter.ReportError("ReadBool", "expect t or f")
  598. return
  599. }
  600. }
  601. func (iter *Iterator) ReadNull() (ret bool) {
  602. c := iter.readByte()
  603. if c == 'n' {
  604. iter.skipUntilBreak()
  605. return true
  606. }
  607. iter.unreadByte()
  608. return false
  609. }
  610. func (iter *Iterator) Skip() {
  611. c := iter.readByte()
  612. switch c {
  613. case '"':
  614. iter.skipString()
  615. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 't', 'f', 'n':
  616. iter.skipUntilBreak()
  617. case '[':
  618. iter.skipArray()
  619. case '{':
  620. iter.skipObject()
  621. default:
  622. iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
  623. return
  624. }
  625. }
  626. func (iter *Iterator) skipString() {
  627. for {
  628. end, escaped := iter.findStringEnd()
  629. if end == -1 {
  630. if !iter.loadMore() {
  631. return
  632. }
  633. if escaped {
  634. iter.head = 1 // skip the first char as last char read is \
  635. }
  636. } else {
  637. iter.head = end
  638. return
  639. }
  640. }
  641. }
  642. // adapted from: https://github.com/buger/jsonparser/blob/master/parser.go
  643. // Tries to find the end of string
  644. // Support if string contains escaped quote symbols.
  645. func (iter *Iterator) findStringEnd() (int, bool) {
  646. escaped := false
  647. for i := iter.head; i < iter.tail; i++ {
  648. c := iter.buf[i]
  649. if c == '"' {
  650. if !escaped {
  651. return i + 1, false
  652. } else {
  653. j := i - 1
  654. for {
  655. if j < iter.head || iter.buf[j] != '\\' {
  656. // even number of backslashes
  657. // either end of buffer, or " found
  658. return i + 1, true
  659. }
  660. j--
  661. if j < iter.head || iter.buf[j] != '\\' {
  662. // odd number of backslashes
  663. // it is \" or \\\"
  664. break
  665. }
  666. j--
  667. }
  668. }
  669. } else if c == '\\' {
  670. escaped = true
  671. }
  672. }
  673. j := iter.tail - 1
  674. for {
  675. if j < iter.head || iter.buf[j] != '\\' {
  676. // even number of backslashes
  677. // either end of buffer, or " found
  678. return -1, false // do not end with \
  679. }
  680. j--
  681. if j < iter.head || iter.buf[j] != '\\' {
  682. // odd number of backslashes
  683. // it is \" or \\\"
  684. break
  685. }
  686. j--
  687. }
  688. return -1, true // end with \
  689. }
  690. func (iter *Iterator) findStringEndWithoutEscape() int {
  691. for i := iter.head; i < iter.tail; i++ {
  692. c := iter.buf[i]
  693. if c == '"' {
  694. return i + 1
  695. } else if c == '\\' {
  696. return -1
  697. }
  698. }
  699. return -1
  700. }
  701. func (iter *Iterator) skipArray() {
  702. level := 1
  703. for {
  704. for i := iter.head; i < iter.tail; i++ {
  705. switch iter.buf[i] {
  706. case '"': // If inside string, skip it
  707. iter.head = i + 1
  708. iter.skipString()
  709. i = iter.head - 1 // it will be i++ soon
  710. case '[': // If open symbol, increase level
  711. level++
  712. case ']': // If close symbol, increase level
  713. level--
  714. // If we have returned to the original level, we're done
  715. if level == 0 {
  716. iter.head = i + 1
  717. return
  718. }
  719. }
  720. }
  721. if (!iter.loadMore()) {
  722. return
  723. }
  724. }
  725. }
  726. func (iter *Iterator) skipObject() {
  727. level := 1
  728. for {
  729. for i := iter.head; i < iter.tail; i++ {
  730. switch iter.buf[i] {
  731. case '"': // If inside string, skip it
  732. iter.head = i + 1
  733. iter.skipString()
  734. i = iter.head - 1 // it will be i++ soon
  735. case '{': // If open symbol, increase level
  736. level++
  737. case '}': // If close symbol, increase level
  738. level--
  739. // If we have returned to the original level, we're done
  740. if level == 0 {
  741. iter.head = i + 1
  742. return
  743. }
  744. }
  745. }
  746. if (!iter.loadMore()) {
  747. return
  748. }
  749. }
  750. }
  751. func (iter *Iterator) skipUntilBreak() {
  752. // true, false, null, number
  753. for {
  754. for i := iter.head; i < iter.tail; i++ {
  755. c := iter.buf[i]
  756. switch c {
  757. case ' ', '\n', '\r', '\t', ',', '}', ']':
  758. iter.head = i
  759. return
  760. }
  761. }
  762. if (!iter.loadMore()) {
  763. return
  764. }
  765. }
  766. }