jsoniter.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724
  1. package jsoniter
  2. import (
  3. "io"
  4. "fmt"
  5. "unicode/utf16"
  6. "strconv"
  7. )
  8. type Iterator struct {
  9. reader io.Reader
  10. buf []byte
  11. head int
  12. tail int
  13. Error error
  14. }
  15. func Parse(reader io.Reader, bufSize int) *Iterator {
  16. iter := &Iterator{
  17. reader: reader,
  18. buf: make([]byte, bufSize),
  19. head: 0,
  20. tail: 0,
  21. }
  22. iter.skipWhitespaces()
  23. return iter
  24. }
  25. func ParseBytes(input []byte) *Iterator {
  26. iter := &Iterator{
  27. reader: nil,
  28. buf: input,
  29. head: 0,
  30. tail: len(input),
  31. }
  32. iter.skipWhitespaces()
  33. return iter
  34. }
  35. func ParseString(input string) *Iterator {
  36. return ParseBytes([]byte(input))
  37. }
  38. func (iter *Iterator) skipWhitespaces() {
  39. c := iter.readByte()
  40. for c == ' ' || c == '\n' {
  41. c = iter.readByte()
  42. }
  43. iter.unreadByte()
  44. }
  45. func (iter *Iterator) ReportError(operation string, msg string) {
  46. peekStart := iter.head - 10
  47. if peekStart < 0 {
  48. peekStart = 0
  49. }
  50. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  51. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  52. }
  53. func (iter *Iterator) readByte() (ret byte) {
  54. if iter.head == iter.tail {
  55. if iter.reader == nil {
  56. iter.Error = io.EOF
  57. return
  58. }
  59. n, err := iter.reader.Read(iter.buf)
  60. if err != nil {
  61. iter.Error = err
  62. return
  63. }
  64. if n == 0 {
  65. iter.Error = io.EOF
  66. return
  67. }
  68. iter.head = 0
  69. iter.tail = n
  70. }
  71. ret = iter.buf[iter.head]
  72. iter.head += 1
  73. return ret
  74. }
  75. func (iter *Iterator) unreadByte() {
  76. if iter.head == 0 {
  77. iter.ReportError("unreadByte", "unread too many bytes")
  78. return
  79. }
  80. iter.head -= 1
  81. return
  82. }
  83. const maxUint64 = (1 << 64 - 1)
  84. const cutoffUint64 = maxUint64 / 10 + 1
  85. const maxUint32 = (1 << 32 - 1)
  86. const cutoffUint32 = maxUint32 / 10 + 1
  87. func (iter *Iterator) ReadUint64() (ret uint64) {
  88. c := iter.readByte()
  89. if iter.Error != nil {
  90. return
  91. }
  92. /* a single zero, or a series of integers */
  93. if c == '0' {
  94. return 0
  95. } else if c >= '1' && c <= '9' {
  96. for c >= '0' && c <= '9' {
  97. var v byte
  98. v = c - '0'
  99. if ret >= cutoffUint64 {
  100. iter.ReportError("ReadUint64", "overflow")
  101. return
  102. }
  103. ret = ret * uint64(10) + uint64(v)
  104. c = iter.readByte()
  105. if iter.Error != nil {
  106. if iter.Error == io.EOF {
  107. break
  108. } else {
  109. return 0
  110. }
  111. }
  112. }
  113. if iter.Error != io.EOF {
  114. iter.unreadByte()
  115. }
  116. } else {
  117. iter.ReportError("ReadUint64", "expects 0~9")
  118. return
  119. }
  120. return ret
  121. }
  122. func (iter *Iterator) ReadInt() (ret int) {
  123. val := iter.ReadInt64()
  124. converted := int(val)
  125. if int64(converted) != val {
  126. iter.ReportError("ReadInt", "int overflow")
  127. return
  128. }
  129. return converted
  130. }
  131. func (iter *Iterator) ReadInt64() (ret int64) {
  132. c := iter.readByte()
  133. if iter.Error != nil {
  134. return
  135. }
  136. /* optional leading minus */
  137. if c == '-' {
  138. n := iter.ReadUint64()
  139. return -int64(n)
  140. } else {
  141. iter.unreadByte()
  142. n := iter.ReadUint64()
  143. return int64(n)
  144. }
  145. }
  146. func (iter *Iterator) ReadString() (ret string) {
  147. str := make([]byte, 0, 10)
  148. c := iter.readByte()
  149. if iter.Error != nil {
  150. return
  151. }
  152. switch c {
  153. case 'n':
  154. iter.skipNull()
  155. if iter.Error != nil {
  156. return
  157. }
  158. return ""
  159. case '"':
  160. // nothing
  161. default:
  162. iter.ReportError("ReadString", `expects " or n`)
  163. return
  164. }
  165. for {
  166. c = iter.readByte()
  167. if iter.Error != nil {
  168. return
  169. }
  170. switch c {
  171. case '\\':
  172. c = iter.readByte()
  173. if iter.Error != nil {
  174. return
  175. }
  176. switch c {
  177. case 'u':
  178. r := iter.readU4()
  179. if iter.Error != nil {
  180. return
  181. }
  182. if utf16.IsSurrogate(r) {
  183. c = iter.readByte()
  184. if iter.Error != nil {
  185. return
  186. }
  187. if c != '\\' {
  188. iter.ReportError("ReadString",
  189. `expects \u after utf16 surrogate, but \ not found`)
  190. return
  191. }
  192. c = iter.readByte()
  193. if iter.Error != nil {
  194. return
  195. }
  196. if c != 'u' {
  197. iter.ReportError("ReadString",
  198. `expects \u after utf16 surrogate, but \u not found`)
  199. return
  200. }
  201. r2 := iter.readU4()
  202. if iter.Error != nil {
  203. return
  204. }
  205. combined := utf16.DecodeRune(r, r2)
  206. str = appendRune(str, combined)
  207. } else {
  208. str = appendRune(str, r)
  209. }
  210. case '"':
  211. str = append(str, '"')
  212. case '\\':
  213. str = append(str, '\\')
  214. case '/':
  215. str = append(str, '/')
  216. case 'b':
  217. str = append(str, '\b')
  218. case 'f':
  219. str = append(str, '\f')
  220. case 'n':
  221. str = append(str, '\n')
  222. case 'r':
  223. str = append(str, '\r')
  224. case 't':
  225. str = append(str, '\t')
  226. default:
  227. iter.ReportError("ReadString",
  228. `invalid escape char after \`)
  229. return
  230. }
  231. case '"':
  232. return string(str)
  233. default:
  234. str = append(str, c)
  235. }
  236. }
  237. }
  238. func (iter *Iterator) readU4() (ret rune) {
  239. for i := 0; i < 4; i++ {
  240. c := iter.readByte()
  241. if iter.Error != nil {
  242. return
  243. }
  244. if (c >= '0' && c <= '9') {
  245. if ret >= cutoffUint32 {
  246. iter.ReportError("readU4", "overflow")
  247. return
  248. }
  249. ret = ret * 16 + rune(c - '0')
  250. } else if ((c >= 'a' && c <= 'f') ) {
  251. if ret >= cutoffUint32 {
  252. iter.ReportError("readU4", "overflow")
  253. return
  254. }
  255. ret = ret * 16 + rune(c - 'a' + 10)
  256. } else {
  257. iter.ReportError("readU4", "expects 0~9 or a~f")
  258. return
  259. }
  260. }
  261. return ret
  262. }
  263. const (
  264. t1 = 0x00 // 0000 0000
  265. tx = 0x80 // 1000 0000
  266. t2 = 0xC0 // 1100 0000
  267. t3 = 0xE0 // 1110 0000
  268. t4 = 0xF0 // 1111 0000
  269. t5 = 0xF8 // 1111 1000
  270. maskx = 0x3F // 0011 1111
  271. mask2 = 0x1F // 0001 1111
  272. mask3 = 0x0F // 0000 1111
  273. mask4 = 0x07 // 0000 0111
  274. rune1Max = 1 << 7 - 1
  275. rune2Max = 1 << 11 - 1
  276. rune3Max = 1 << 16 - 1
  277. surrogateMin = 0xD800
  278. surrogateMax = 0xDFFF
  279. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  280. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  281. )
  282. func appendRune(p []byte, r rune) []byte {
  283. // Negative values are erroneous. Making it unsigned addresses the problem.
  284. switch i := uint32(r); {
  285. case i <= rune1Max:
  286. p = append(p, byte(r))
  287. return p
  288. case i <= rune2Max:
  289. p = append(p, t2 | byte(r >> 6))
  290. p = append(p, tx | byte(r) & maskx)
  291. return p
  292. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  293. r = RuneError
  294. fallthrough
  295. case i <= rune3Max:
  296. p = append(p, t3 | byte(r >> 12))
  297. p = append(p, tx | byte(r >> 6) & maskx)
  298. p = append(p, tx | byte(r) & maskx)
  299. return p
  300. default:
  301. p = append(p, t4 | byte(r >> 18))
  302. p = append(p, tx | byte(r >> 12) & maskx)
  303. p = append(p, tx | byte(r >> 6) & maskx)
  304. p = append(p, tx | byte(r) & maskx)
  305. return p
  306. }
  307. }
  308. func (iter *Iterator) ReadArray() (ret bool) {
  309. iter.skipWhitespaces()
  310. c := iter.readByte()
  311. if iter.Error != nil {
  312. return
  313. }
  314. switch c {
  315. case 'n': {
  316. iter.skipNull()
  317. if iter.Error != nil {
  318. return
  319. }
  320. return false // null
  321. }
  322. case '[': {
  323. iter.skipWhitespaces()
  324. c = iter.readByte()
  325. if iter.Error != nil {
  326. return
  327. }
  328. if c == ']' {
  329. return false
  330. } else {
  331. iter.unreadByte()
  332. return true
  333. }
  334. }
  335. case ']': return false
  336. case ',':
  337. iter.skipWhitespaces()
  338. return true
  339. default:
  340. iter.ReportError("ReadArray", "expect [ or , or ] or n")
  341. return
  342. }
  343. }
  344. func (iter *Iterator) ReadObject() (ret string) {
  345. iter.skipWhitespaces()
  346. c := iter.readByte()
  347. if iter.Error != nil {
  348. return
  349. }
  350. switch c {
  351. case 'n': {
  352. iter.skipNull()
  353. if iter.Error != nil {
  354. return
  355. }
  356. return "" // null
  357. }
  358. case '{': {
  359. iter.skipWhitespaces()
  360. c = iter.readByte()
  361. if iter.Error != nil {
  362. return
  363. }
  364. switch c {
  365. case '}':
  366. return "" // end of object
  367. case '"':
  368. iter.unreadByte()
  369. field := iter.readObjectField()
  370. if iter.Error != nil {
  371. return
  372. }
  373. return field
  374. default:
  375. iter.ReportError("ReadObject", `expect " after {`)
  376. return
  377. }
  378. }
  379. case ',':
  380. iter.skipWhitespaces()
  381. field := iter.readObjectField()
  382. if iter.Error != nil {
  383. return
  384. }
  385. return field
  386. case '}':
  387. return "" // end of object
  388. default:
  389. iter.ReportError("ReadObject", `expect { or , or } or n`)
  390. return
  391. }
  392. }
  393. func (iter *Iterator) readObjectField() (ret string) {
  394. field := iter.ReadString()
  395. if iter.Error != nil {
  396. return
  397. }
  398. iter.skipWhitespaces()
  399. c := iter.readByte()
  400. if iter.Error != nil {
  401. return
  402. }
  403. if c != ':' {
  404. iter.ReportError("ReadObject", "expect : after object field")
  405. return
  406. }
  407. iter.skipWhitespaces()
  408. return field
  409. }
  410. func (iter *Iterator) ReadFloat64() (ret float64) {
  411. str := make([]byte, 0, 10)
  412. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  413. switch c {
  414. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  415. str = append(str, c)
  416. default:
  417. iter.unreadByte()
  418. val, err := strconv.ParseFloat(string(str), 64)
  419. if err != nil {
  420. iter.Error = err
  421. return
  422. }
  423. return val
  424. }
  425. }
  426. if iter.Error == io.EOF {
  427. val, err := strconv.ParseFloat(string(str), 64)
  428. if err != nil {
  429. iter.Error = err
  430. return
  431. }
  432. return val
  433. }
  434. return
  435. }
  436. func (iter *Iterator) ReadBool() (ret bool) {
  437. c := iter.readByte()
  438. if iter.Error != nil {
  439. return
  440. }
  441. switch c {
  442. case 't':
  443. iter.skipTrue()
  444. if iter.Error != nil {
  445. return
  446. }
  447. return true
  448. case 'f':
  449. iter.skipFalse()
  450. if iter.Error != nil {
  451. return
  452. }
  453. return false
  454. default:
  455. iter.ReportError("ReadBool", "expect t or f")
  456. return
  457. }
  458. }
  459. func (iter *Iterator) skipTrue() {
  460. c := iter.readByte()
  461. if iter.Error != nil {
  462. return
  463. }
  464. if c != 'r' {
  465. iter.ReportError("skipTrue", "expect r of true")
  466. return
  467. }
  468. c = iter.readByte()
  469. if iter.Error != nil {
  470. return
  471. }
  472. if c != 'u' {
  473. iter.ReportError("skipTrue", "expect u of true")
  474. return
  475. }
  476. c = iter.readByte()
  477. if iter.Error != nil {
  478. return
  479. }
  480. if c != 'e' {
  481. iter.ReportError("skipTrue", "expect e of true")
  482. return
  483. }
  484. }
  485. func (iter *Iterator) skipFalse() {
  486. c := iter.readByte()
  487. if iter.Error != nil {
  488. return
  489. }
  490. if c != 'a' {
  491. iter.ReportError("skipFalse", "expect a of false")
  492. return
  493. }
  494. c = iter.readByte()
  495. if iter.Error != nil {
  496. return
  497. }
  498. if c != 'l' {
  499. iter.ReportError("skipFalse", "expect l of false")
  500. return
  501. }
  502. c = iter.readByte()
  503. if iter.Error != nil {
  504. return
  505. }
  506. if c != 's' {
  507. iter.ReportError("skipFalse", "expect s of false")
  508. return
  509. }
  510. c = iter.readByte()
  511. if iter.Error != nil {
  512. return
  513. }
  514. if c != 'e' {
  515. iter.ReportError("skipFalse", "expect e of false")
  516. return
  517. }
  518. }
  519. func (iter *Iterator) ReadNull() (ret bool) {
  520. c := iter.readByte()
  521. if iter.Error != nil {
  522. return
  523. }
  524. if c == 'n' {
  525. iter.skipNull()
  526. if iter.Error != nil {
  527. return
  528. }
  529. return true
  530. }
  531. iter.unreadByte()
  532. return false
  533. }
  534. func (iter *Iterator) skipNull() {
  535. c := iter.readByte()
  536. if iter.Error != nil {
  537. return
  538. }
  539. if c != 'u' {
  540. iter.ReportError("skipNull", "expect u of null")
  541. return
  542. }
  543. c = iter.readByte()
  544. if iter.Error != nil {
  545. return
  546. }
  547. if c != 'l' {
  548. iter.ReportError("skipNull", "expect l of null")
  549. return
  550. }
  551. c = iter.readByte()
  552. if iter.Error != nil {
  553. return
  554. }
  555. if c != 'l' {
  556. iter.ReportError("skipNull", "expect l of null")
  557. return
  558. }
  559. }
  560. func (iter *Iterator) Skip() {
  561. c := iter.readByte()
  562. if iter.Error != nil {
  563. return
  564. }
  565. switch c {
  566. case '"':
  567. iter.skipString()
  568. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  569. iter.skipNumber()
  570. case '[':
  571. iter.skipArray()
  572. case '{':
  573. iter.skipObject()
  574. case 't':
  575. iter.skipTrue()
  576. case 'f':
  577. iter.skipFalse()
  578. case 'n':
  579. iter.skipNull()
  580. default:
  581. iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
  582. return
  583. }
  584. }
  585. func (iter *Iterator) skipString() {
  586. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  587. switch c {
  588. case '"':
  589. return // end of string found
  590. case '\\':
  591. iter.readByte() // " after \\ does not count
  592. if iter.Error != nil {
  593. return
  594. }
  595. }
  596. }
  597. }
  598. func (iter *Iterator) skipNumber() {
  599. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  600. switch c {
  601. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  602. continue
  603. default:
  604. iter.unreadByte()
  605. return
  606. }
  607. }
  608. }
  609. func (iter *Iterator) skipArray() {
  610. for {
  611. iter.skipWhitespaces()
  612. c := iter.readByte()
  613. if iter.Error != nil {
  614. return
  615. }
  616. if c == ']' {
  617. return
  618. }
  619. iter.unreadByte()
  620. iter.Skip()
  621. iter.skipWhitespaces()
  622. c = iter.readByte()
  623. if iter.Error != nil {
  624. return
  625. }
  626. switch c {
  627. case ',':
  628. iter.skipWhitespaces()
  629. continue
  630. case ']':
  631. return
  632. default:
  633. iter.ReportError("skipArray", "expects , or ]")
  634. return
  635. }
  636. }
  637. }
  638. func (iter *Iterator) skipObject() {
  639. iter.skipWhitespaces()
  640. c := iter.readByte()
  641. if iter.Error != nil {
  642. return
  643. }
  644. if c == '}' {
  645. return // end of object
  646. } else {
  647. iter.unreadByte()
  648. }
  649. for {
  650. iter.skipWhitespaces()
  651. c := iter.readByte()
  652. if iter.Error != nil {
  653. return
  654. }
  655. if c != '"' {
  656. iter.ReportError("skipObject", `expects "`)
  657. return
  658. }
  659. iter.skipString()
  660. iter.skipWhitespaces()
  661. c = iter.readByte()
  662. if iter.Error != nil {
  663. return
  664. }
  665. if c != ':' {
  666. iter.ReportError("skipObject", `expects :`)
  667. return
  668. }
  669. iter.skipWhitespaces()
  670. iter.Skip()
  671. iter.skipWhitespaces()
  672. c = iter.readByte()
  673. if iter.Error != nil {
  674. return
  675. }
  676. switch c {
  677. case ',':
  678. iter.skipWhitespaces()
  679. continue
  680. case '}':
  681. return // end of object
  682. default:
  683. iter.ReportError("skipObject", "expects , or }")
  684. return
  685. }
  686. }
  687. }