jsoniter.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714
  1. package jsoniter
  2. import (
  3. "io"
  4. "fmt"
  5. "unicode/utf16"
  6. "strconv"
  7. )
  8. type Iterator struct {
  9. reader io.Reader
  10. buf []byte
  11. head int
  12. tail int
  13. Error error
  14. }
  15. func Parse(reader io.Reader, bufSize int) *Iterator {
  16. iter := &Iterator{
  17. reader: reader,
  18. buf: make([]byte, bufSize),
  19. head: 0,
  20. tail: 0,
  21. }
  22. iter.skipWhitespaces()
  23. return iter
  24. }
  25. func ParseBytes(input []byte) *Iterator {
  26. iter := &Iterator{
  27. reader: nil,
  28. buf: input,
  29. head: 0,
  30. tail: len(input),
  31. }
  32. iter.skipWhitespaces()
  33. return iter
  34. }
  35. func ParseString(input string) *Iterator {
  36. return ParseBytes([]byte(input))
  37. }
  38. func (iter *Iterator) skipWhitespaces() {
  39. c := iter.readByte()
  40. for c == ' ' || c == '\n' {
  41. c = iter.readByte()
  42. }
  43. iter.unreadByte()
  44. }
  45. func (iter *Iterator) ReportError(operation string, msg string) {
  46. peekStart := iter.head - 10
  47. if peekStart < 0 {
  48. peekStart = 0
  49. }
  50. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  51. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  52. }
  53. func (iter *Iterator) readByte() (ret byte) {
  54. if iter.head == iter.tail {
  55. if iter.reader == nil {
  56. iter.Error = io.EOF
  57. return
  58. }
  59. n, err := iter.reader.Read(iter.buf)
  60. if err != nil {
  61. iter.Error = err
  62. return
  63. }
  64. if n == 0 {
  65. iter.Error = io.EOF
  66. return
  67. }
  68. iter.head = 0
  69. iter.tail = n
  70. }
  71. ret = iter.buf[iter.head]
  72. iter.head += 1
  73. return ret
  74. }
  75. func (iter *Iterator) unreadByte() {
  76. if iter.head == 0 {
  77. iter.ReportError("unreadByte", "unread too many bytes")
  78. return
  79. }
  80. iter.head -= 1
  81. return
  82. }
  83. const maxUint64 = (1 << 64 - 1)
  84. const cutoffUint64 = maxUint64 / 10 + 1
  85. const maxUint32 = (1 << 32 - 1)
  86. const cutoffUint32 = maxUint32 / 10 + 1
  87. func (iter *Iterator) ReadUint64() (ret uint64) {
  88. c := iter.readByte()
  89. if iter.Error != nil {
  90. return
  91. }
  92. /* a single zero, or a series of integers */
  93. if c == '0' {
  94. return 0
  95. } else if c >= '1' && c <= '9' {
  96. for c >= '0' && c <= '9' {
  97. var v byte
  98. v = c - '0'
  99. if ret >= cutoffUint64 {
  100. iter.ReportError("ReadUint64", "overflow")
  101. return
  102. }
  103. ret = ret * uint64(10) + uint64(v)
  104. c = iter.readByte()
  105. if iter.Error != nil {
  106. if iter.Error == io.EOF {
  107. break
  108. } else {
  109. return 0
  110. }
  111. }
  112. }
  113. if iter.Error != io.EOF {
  114. iter.unreadByte()
  115. }
  116. } else {
  117. iter.ReportError("ReadUint64", "expects 0~9")
  118. return
  119. }
  120. return ret
  121. }
  122. func (iter *Iterator) ReadInt64() (ret int64) {
  123. c := iter.readByte()
  124. if iter.Error != nil {
  125. return
  126. }
  127. /* optional leading minus */
  128. if c == '-' {
  129. n := iter.ReadUint64()
  130. return -int64(n)
  131. } else {
  132. iter.unreadByte()
  133. n := iter.ReadUint64()
  134. return int64(n)
  135. }
  136. }
  137. func (iter *Iterator) ReadString() (ret string) {
  138. str := make([]byte, 0, 10)
  139. c := iter.readByte()
  140. if iter.Error != nil {
  141. return
  142. }
  143. switch c {
  144. case 'n':
  145. iter.skipNull()
  146. if iter.Error != nil {
  147. return
  148. }
  149. return ""
  150. case '"':
  151. // nothing
  152. default:
  153. iter.ReportError("ReadString", `expects " or n`)
  154. return
  155. }
  156. for {
  157. c = iter.readByte()
  158. if iter.Error != nil {
  159. return
  160. }
  161. switch c {
  162. case '\\':
  163. c = iter.readByte()
  164. if iter.Error != nil {
  165. return
  166. }
  167. switch c {
  168. case 'u':
  169. r := iter.readU4()
  170. if iter.Error != nil {
  171. return
  172. }
  173. if utf16.IsSurrogate(r) {
  174. c = iter.readByte()
  175. if iter.Error != nil {
  176. return
  177. }
  178. if c != '\\' {
  179. iter.ReportError("ReadString",
  180. `expects \u after utf16 surrogate, but \ not found`)
  181. return
  182. }
  183. c = iter.readByte()
  184. if iter.Error != nil {
  185. return
  186. }
  187. if c != 'u' {
  188. iter.ReportError("ReadString",
  189. `expects \u after utf16 surrogate, but \u not found`)
  190. return
  191. }
  192. r2 := iter.readU4()
  193. if iter.Error != nil {
  194. return
  195. }
  196. combined := utf16.DecodeRune(r, r2)
  197. str = appendRune(str, combined)
  198. } else {
  199. str = appendRune(str, r)
  200. }
  201. case '"':
  202. str = append(str, '"')
  203. case '\\':
  204. str = append(str, '\\')
  205. case '/':
  206. str = append(str, '/')
  207. case 'b':
  208. str = append(str, '\b')
  209. case 'f':
  210. str = append(str, '\f')
  211. case 'n':
  212. str = append(str, '\n')
  213. case 'r':
  214. str = append(str, '\r')
  215. case 't':
  216. str = append(str, '\t')
  217. default:
  218. iter.ReportError("ReadString",
  219. `invalid escape char after \`)
  220. return
  221. }
  222. case '"':
  223. return string(str)
  224. default:
  225. str = append(str, c)
  226. }
  227. }
  228. }
  229. func (iter *Iterator) readU4() (ret rune) {
  230. for i := 0; i < 4; i++ {
  231. c := iter.readByte()
  232. if iter.Error != nil {
  233. return
  234. }
  235. if (c >= '0' && c <= '9') {
  236. if ret >= cutoffUint32 {
  237. iter.ReportError("readU4", "overflow")
  238. return
  239. }
  240. ret = ret * 16 + rune(c - '0')
  241. } else if ((c >= 'a' && c <= 'f') ) {
  242. if ret >= cutoffUint32 {
  243. iter.ReportError("readU4", "overflow")
  244. return
  245. }
  246. ret = ret * 16 + rune(c - 'a' + 10)
  247. } else {
  248. iter.ReportError("readU4", "expects 0~9 or a~f")
  249. return
  250. }
  251. }
  252. return ret
  253. }
  254. const (
  255. t1 = 0x00 // 0000 0000
  256. tx = 0x80 // 1000 0000
  257. t2 = 0xC0 // 1100 0000
  258. t3 = 0xE0 // 1110 0000
  259. t4 = 0xF0 // 1111 0000
  260. t5 = 0xF8 // 1111 1000
  261. maskx = 0x3F // 0011 1111
  262. mask2 = 0x1F // 0001 1111
  263. mask3 = 0x0F // 0000 1111
  264. mask4 = 0x07 // 0000 0111
  265. rune1Max = 1 << 7 - 1
  266. rune2Max = 1 << 11 - 1
  267. rune3Max = 1 << 16 - 1
  268. surrogateMin = 0xD800
  269. surrogateMax = 0xDFFF
  270. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  271. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  272. )
  273. func appendRune(p []byte, r rune) []byte {
  274. // Negative values are erroneous. Making it unsigned addresses the problem.
  275. switch i := uint32(r); {
  276. case i <= rune1Max:
  277. p = append(p, byte(r))
  278. return p
  279. case i <= rune2Max:
  280. p = append(p, t2 | byte(r >> 6))
  281. p = append(p, tx | byte(r) & maskx)
  282. return p
  283. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  284. r = RuneError
  285. fallthrough
  286. case i <= rune3Max:
  287. p = append(p, t3 | byte(r >> 12))
  288. p = append(p, tx | byte(r >> 6) & maskx)
  289. p = append(p, tx | byte(r) & maskx)
  290. return p
  291. default:
  292. p = append(p, t4 | byte(r >> 18))
  293. p = append(p, tx | byte(r >> 12) & maskx)
  294. p = append(p, tx | byte(r >> 6) & maskx)
  295. p = append(p, tx | byte(r) & maskx)
  296. return p
  297. }
  298. }
  299. func (iter *Iterator) ReadArray() (ret bool) {
  300. iter.skipWhitespaces()
  301. c := iter.readByte()
  302. if iter.Error != nil {
  303. return
  304. }
  305. switch c {
  306. case 'n': {
  307. iter.skipNull()
  308. if iter.Error != nil {
  309. return
  310. }
  311. return false // null
  312. }
  313. case '[': {
  314. iter.skipWhitespaces()
  315. c = iter.readByte()
  316. if iter.Error != nil {
  317. return
  318. }
  319. if c == ']' {
  320. return false
  321. } else {
  322. iter.unreadByte()
  323. return true
  324. }
  325. }
  326. case ']': return false
  327. case ',':
  328. iter.skipWhitespaces()
  329. return true
  330. default:
  331. iter.ReportError("ReadArray", "expect [ or , or ] or n")
  332. return
  333. }
  334. }
  335. func (iter *Iterator) ReadObject() (ret string) {
  336. iter.skipWhitespaces()
  337. c := iter.readByte()
  338. if iter.Error != nil {
  339. return
  340. }
  341. switch c {
  342. case 'n': {
  343. iter.skipNull()
  344. if iter.Error != nil {
  345. return
  346. }
  347. return "" // null
  348. }
  349. case '{': {
  350. iter.skipWhitespaces()
  351. c = iter.readByte()
  352. if iter.Error != nil {
  353. return
  354. }
  355. switch c {
  356. case '}':
  357. return "" // end of object
  358. case '"':
  359. iter.unreadByte()
  360. field := iter.readObjectField()
  361. if iter.Error != nil {
  362. return
  363. }
  364. return field
  365. default:
  366. iter.ReportError("ReadObject", `expect " after {`)
  367. return
  368. }
  369. }
  370. case ',':
  371. iter.skipWhitespaces()
  372. field := iter.readObjectField()
  373. if iter.Error != nil {
  374. return
  375. }
  376. return field
  377. case '}':
  378. return "" // end of object
  379. default:
  380. iter.ReportError("ReadObject", `expect { or , or } or n`)
  381. return
  382. }
  383. }
  384. func (iter *Iterator) readObjectField() (ret string) {
  385. field := iter.ReadString()
  386. if iter.Error != nil {
  387. return
  388. }
  389. iter.skipWhitespaces()
  390. c := iter.readByte()
  391. if iter.Error != nil {
  392. return
  393. }
  394. if c != ':' {
  395. iter.ReportError("ReadObject", "expect : after object field")
  396. return
  397. }
  398. iter.skipWhitespaces()
  399. return field
  400. }
  401. func (iter *Iterator) ReadFloat64() (ret float64) {
  402. str := make([]byte, 0, 10)
  403. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  404. switch c {
  405. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  406. str = append(str, c)
  407. default:
  408. iter.unreadByte()
  409. val, err := strconv.ParseFloat(string(str), 64)
  410. if err != nil {
  411. iter.Error = err
  412. return
  413. }
  414. return val
  415. }
  416. }
  417. if iter.Error == io.EOF {
  418. val, err := strconv.ParseFloat(string(str), 64)
  419. if err != nil {
  420. iter.Error = err
  421. return
  422. }
  423. return val
  424. }
  425. return
  426. }
  427. func (iter *Iterator) ReadBool() (ret bool) {
  428. c := iter.readByte()
  429. if iter.Error != nil {
  430. return
  431. }
  432. switch c {
  433. case 't':
  434. iter.skipTrue()
  435. if iter.Error != nil {
  436. return
  437. }
  438. return true
  439. case 'f':
  440. iter.skipFalse()
  441. if iter.Error != nil {
  442. return
  443. }
  444. return false
  445. default:
  446. iter.ReportError("ReadBool", "expect t or f")
  447. return
  448. }
  449. }
  450. func (iter *Iterator) skipTrue() {
  451. c := iter.readByte()
  452. if iter.Error != nil {
  453. return
  454. }
  455. if c != 'r' {
  456. iter.ReportError("skipTrue", "expect r of true")
  457. return
  458. }
  459. c = iter.readByte()
  460. if iter.Error != nil {
  461. return
  462. }
  463. if c != 'u' {
  464. iter.ReportError("skipTrue", "expect u of true")
  465. return
  466. }
  467. c = iter.readByte()
  468. if iter.Error != nil {
  469. return
  470. }
  471. if c != 'e' {
  472. iter.ReportError("skipTrue", "expect e of true")
  473. return
  474. }
  475. }
  476. func (iter *Iterator) skipFalse() {
  477. c := iter.readByte()
  478. if iter.Error != nil {
  479. return
  480. }
  481. if c != 'a' {
  482. iter.ReportError("skipFalse", "expect a of false")
  483. return
  484. }
  485. c = iter.readByte()
  486. if iter.Error != nil {
  487. return
  488. }
  489. if c != 'l' {
  490. iter.ReportError("skipFalse", "expect l of false")
  491. return
  492. }
  493. c = iter.readByte()
  494. if iter.Error != nil {
  495. return
  496. }
  497. if c != 's' {
  498. iter.ReportError("skipFalse", "expect s of false")
  499. return
  500. }
  501. c = iter.readByte()
  502. if iter.Error != nil {
  503. return
  504. }
  505. if c != 'e' {
  506. iter.ReportError("skipFalse", "expect e of false")
  507. return
  508. }
  509. }
  510. func (iter *Iterator) ReadNull() (ret bool) {
  511. c := iter.readByte()
  512. if iter.Error != nil {
  513. return
  514. }
  515. if c == 'n' {
  516. iter.skipNull()
  517. if iter.Error != nil {
  518. return
  519. }
  520. return true
  521. }
  522. iter.unreadByte()
  523. return false
  524. }
  525. func (iter *Iterator) skipNull() {
  526. c := iter.readByte()
  527. if iter.Error != nil {
  528. return
  529. }
  530. if c != 'u' {
  531. iter.ReportError("skipNull", "expect u of null")
  532. return
  533. }
  534. c = iter.readByte()
  535. if iter.Error != nil {
  536. return
  537. }
  538. if c != 'l' {
  539. iter.ReportError("skipNull", "expect l of null")
  540. return
  541. }
  542. c = iter.readByte()
  543. if iter.Error != nil {
  544. return
  545. }
  546. if c != 'l' {
  547. iter.ReportError("skipNull", "expect l of null")
  548. return
  549. }
  550. }
  551. func (iter *Iterator) Skip() {
  552. c := iter.readByte()
  553. if iter.Error != nil {
  554. return
  555. }
  556. switch c {
  557. case '"':
  558. iter.skipString()
  559. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  560. iter.skipNumber()
  561. case '[':
  562. iter.skipArray()
  563. case '{':
  564. iter.skipObject()
  565. case 't':
  566. iter.skipTrue()
  567. case 'f':
  568. iter.skipFalse()
  569. case 'n':
  570. iter.skipNull()
  571. default:
  572. iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
  573. return
  574. }
  575. }
  576. func (iter *Iterator) skipString() {
  577. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  578. switch c {
  579. case '"':
  580. return // end of string found
  581. case '\\':
  582. iter.readByte() // " after \\ does not count
  583. if iter.Error != nil {
  584. return
  585. }
  586. }
  587. }
  588. }
  589. func (iter *Iterator) skipNumber() {
  590. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  591. switch c {
  592. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  593. continue
  594. default:
  595. iter.unreadByte()
  596. return
  597. }
  598. }
  599. }
  600. func (iter *Iterator) skipArray() {
  601. for {
  602. iter.skipWhitespaces()
  603. c := iter.readByte()
  604. if iter.Error != nil {
  605. return
  606. }
  607. if c == ']' {
  608. return
  609. }
  610. iter.unreadByte()
  611. iter.Skip()
  612. iter.skipWhitespaces()
  613. c = iter.readByte()
  614. if iter.Error != nil {
  615. return
  616. }
  617. switch c {
  618. case ',':
  619. iter.skipWhitespaces()
  620. continue
  621. case ']':
  622. return
  623. default:
  624. iter.ReportError("skipArray", "expects , or ]")
  625. return
  626. }
  627. }
  628. }
  629. func (iter *Iterator) skipObject() {
  630. iter.skipWhitespaces()
  631. c := iter.readByte()
  632. if iter.Error != nil {
  633. return
  634. }
  635. if c == '}' {
  636. return // end of object
  637. } else {
  638. iter.unreadByte()
  639. }
  640. for {
  641. iter.skipWhitespaces()
  642. c := iter.readByte()
  643. if iter.Error != nil {
  644. return
  645. }
  646. if c != '"' {
  647. iter.ReportError("skipObject", `expects "`)
  648. return
  649. }
  650. iter.skipString()
  651. iter.skipWhitespaces()
  652. c = iter.readByte()
  653. if iter.Error != nil {
  654. return
  655. }
  656. if c != ':' {
  657. iter.ReportError("skipObject", `expects :`)
  658. return
  659. }
  660. iter.skipWhitespaces()
  661. iter.Skip()
  662. iter.skipWhitespaces()
  663. c = iter.readByte()
  664. if iter.Error != nil {
  665. return
  666. }
  667. switch c {
  668. case ',':
  669. iter.skipWhitespaces()
  670. continue
  671. case '}':
  672. return // end of object
  673. default:
  674. iter.ReportError("skipObject", "expects , or }")
  675. return
  676. }
  677. }
  678. }