jsoniter.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821
  1. package jsoniter
  2. import (
  3. "io"
  4. "fmt"
  5. "unicode/utf16"
  6. "strconv"
  7. )
  8. type Iterator struct {
  9. reader io.Reader
  10. buf []byte
  11. head int
  12. tail int
  13. Error error
  14. }
  15. func Parse(reader io.Reader, bufSize int) *Iterator {
  16. iter := &Iterator{
  17. reader: reader,
  18. buf: make([]byte, bufSize),
  19. head: 0,
  20. tail: 0,
  21. }
  22. iter.skipWhitespaces()
  23. return iter
  24. }
  25. func ParseBytes(input []byte) *Iterator {
  26. iter := &Iterator{
  27. reader: nil,
  28. buf: input,
  29. head: 0,
  30. tail: len(input),
  31. }
  32. iter.skipWhitespaces()
  33. return iter
  34. }
  35. func ParseString(input string) *Iterator {
  36. return ParseBytes([]byte(input))
  37. }
  38. func (iter *Iterator) skipWhitespaces() {
  39. c := iter.readByte()
  40. for c == ' ' || c == '\n' {
  41. c = iter.readByte()
  42. }
  43. iter.unreadByte()
  44. }
  45. func (iter *Iterator) ReportError(operation string, msg string) {
  46. peekStart := iter.head - 10
  47. if peekStart < 0 {
  48. peekStart = 0
  49. }
  50. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  51. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  52. }
  53. func (iter *Iterator) readByte() (ret byte) {
  54. if iter.head == iter.tail {
  55. if iter.reader == nil {
  56. iter.Error = io.EOF
  57. return
  58. }
  59. n, err := iter.reader.Read(iter.buf)
  60. if err != nil {
  61. iter.Error = err
  62. return
  63. }
  64. if n == 0 {
  65. iter.Error = io.EOF
  66. return
  67. }
  68. iter.head = 0
  69. iter.tail = n
  70. }
  71. ret = iter.buf[iter.head]
  72. iter.head += 1
  73. return ret
  74. }
  75. func (iter *Iterator) unreadByte() {
  76. if iter.head == 0 {
  77. iter.ReportError("unreadByte", "unread too many bytes")
  78. return
  79. }
  80. iter.head -= 1
  81. return
  82. }
  83. const maxUint64 = (1 << 64 - 1)
  84. const cutoffUint64 = maxUint64 / 10 + 1
  85. const maxUint32 = (1 << 32 - 1)
  86. const cutoffUint32 = maxUint32 / 10 + 1
  87. func (iter *Iterator) ReadUint() (ret uint) {
  88. val := iter.ReadUint64()
  89. converted := uint(val)
  90. if uint64(converted) != val {
  91. iter.ReportError("ReadUint", "int overflow")
  92. return
  93. }
  94. return converted
  95. }
  96. func (iter *Iterator) ReadUint8() (ret uint8) {
  97. val := iter.ReadUint64()
  98. converted := uint8(val)
  99. if uint64(converted) != val {
  100. iter.ReportError("ReadUint8", "int overflow")
  101. return
  102. }
  103. return converted
  104. }
  105. func (iter *Iterator) ReadUint16() (ret uint16) {
  106. val := iter.ReadUint64()
  107. converted := uint16(val)
  108. if uint64(converted) != val {
  109. iter.ReportError("ReadUint16", "int overflow")
  110. return
  111. }
  112. return converted
  113. }
  114. func (iter *Iterator) ReadUint32() (ret uint32) {
  115. val := iter.ReadUint64()
  116. converted := uint32(val)
  117. if uint64(converted) != val {
  118. iter.ReportError("ReadUint32", "int overflow")
  119. return
  120. }
  121. return converted
  122. }
  123. func (iter *Iterator) ReadUint64() (ret uint64) {
  124. c := iter.readByte()
  125. if iter.Error != nil {
  126. return
  127. }
  128. /* a single zero, or a series of integers */
  129. if c == '0' {
  130. return 0
  131. } else if c >= '1' && c <= '9' {
  132. for c >= '0' && c <= '9' {
  133. var v byte
  134. v = c - '0'
  135. if ret >= cutoffUint64 {
  136. iter.ReportError("ReadUint64", "overflow")
  137. return
  138. }
  139. ret = ret * uint64(10) + uint64(v)
  140. c = iter.readByte()
  141. if iter.Error != nil {
  142. if iter.Error == io.EOF {
  143. break
  144. } else {
  145. return 0
  146. }
  147. }
  148. }
  149. if iter.Error != io.EOF {
  150. iter.unreadByte()
  151. }
  152. } else {
  153. iter.ReportError("ReadUint64", "expects 0~9")
  154. return
  155. }
  156. return ret
  157. }
  158. func (iter *Iterator) ReadInt() (ret int) {
  159. val := iter.ReadInt64()
  160. converted := int(val)
  161. if int64(converted) != val {
  162. iter.ReportError("ReadInt", "int overflow")
  163. return
  164. }
  165. return converted
  166. }
  167. func (iter *Iterator) ReadInt8() (ret int8) {
  168. val := iter.ReadInt64()
  169. converted := int8(val)
  170. if int64(converted) != val {
  171. iter.ReportError("ReadInt8", "int overflow")
  172. return
  173. }
  174. return converted
  175. }
  176. func (iter *Iterator) ReadInt16() (ret int16) {
  177. val := iter.ReadInt64()
  178. converted := int16(val)
  179. if int64(converted) != val {
  180. iter.ReportError("ReadInt16", "int overflow")
  181. return
  182. }
  183. return converted
  184. }
  185. func (iter *Iterator) ReadInt32() (ret int32) {
  186. val := iter.ReadInt64()
  187. converted := int32(val)
  188. if int64(converted) != val {
  189. iter.ReportError("ReadInt32", "int overflow")
  190. return
  191. }
  192. return converted
  193. }
  194. func (iter *Iterator) ReadInt64() (ret int64) {
  195. c := iter.readByte()
  196. if iter.Error != nil {
  197. return
  198. }
  199. /* optional leading minus */
  200. if c == '-' {
  201. n := iter.ReadUint64()
  202. return -int64(n)
  203. } else {
  204. iter.unreadByte()
  205. n := iter.ReadUint64()
  206. return int64(n)
  207. }
  208. }
  209. func (iter *Iterator) ReadString() (ret string) {
  210. str := make([]byte, 0, 10)
  211. c := iter.readByte()
  212. if iter.Error != nil {
  213. return
  214. }
  215. switch c {
  216. case 'n':
  217. iter.skipNull()
  218. if iter.Error != nil {
  219. return
  220. }
  221. return ""
  222. case '"':
  223. // nothing
  224. default:
  225. iter.ReportError("ReadString", `expects " or n`)
  226. return
  227. }
  228. for {
  229. c = iter.readByte()
  230. if iter.Error != nil {
  231. return
  232. }
  233. switch c {
  234. case '\\':
  235. c = iter.readByte()
  236. if iter.Error != nil {
  237. return
  238. }
  239. switch c {
  240. case 'u':
  241. r := iter.readU4()
  242. if iter.Error != nil {
  243. return
  244. }
  245. if utf16.IsSurrogate(r) {
  246. c = iter.readByte()
  247. if iter.Error != nil {
  248. return
  249. }
  250. if c != '\\' {
  251. iter.ReportError("ReadString",
  252. `expects \u after utf16 surrogate, but \ not found`)
  253. return
  254. }
  255. c = iter.readByte()
  256. if iter.Error != nil {
  257. return
  258. }
  259. if c != 'u' {
  260. iter.ReportError("ReadString",
  261. `expects \u after utf16 surrogate, but \u not found`)
  262. return
  263. }
  264. r2 := iter.readU4()
  265. if iter.Error != nil {
  266. return
  267. }
  268. combined := utf16.DecodeRune(r, r2)
  269. str = appendRune(str, combined)
  270. } else {
  271. str = appendRune(str, r)
  272. }
  273. case '"':
  274. str = append(str, '"')
  275. case '\\':
  276. str = append(str, '\\')
  277. case '/':
  278. str = append(str, '/')
  279. case 'b':
  280. str = append(str, '\b')
  281. case 'f':
  282. str = append(str, '\f')
  283. case 'n':
  284. str = append(str, '\n')
  285. case 'r':
  286. str = append(str, '\r')
  287. case 't':
  288. str = append(str, '\t')
  289. default:
  290. iter.ReportError("ReadString",
  291. `invalid escape char after \`)
  292. return
  293. }
  294. case '"':
  295. return string(str)
  296. default:
  297. str = append(str, c)
  298. }
  299. }
  300. }
  301. func (iter *Iterator) readU4() (ret rune) {
  302. for i := 0; i < 4; i++ {
  303. c := iter.readByte()
  304. if iter.Error != nil {
  305. return
  306. }
  307. if (c >= '0' && c <= '9') {
  308. if ret >= cutoffUint32 {
  309. iter.ReportError("readU4", "overflow")
  310. return
  311. }
  312. ret = ret * 16 + rune(c - '0')
  313. } else if ((c >= 'a' && c <= 'f') ) {
  314. if ret >= cutoffUint32 {
  315. iter.ReportError("readU4", "overflow")
  316. return
  317. }
  318. ret = ret * 16 + rune(c - 'a' + 10)
  319. } else {
  320. iter.ReportError("readU4", "expects 0~9 or a~f")
  321. return
  322. }
  323. }
  324. return ret
  325. }
  326. const (
  327. t1 = 0x00 // 0000 0000
  328. tx = 0x80 // 1000 0000
  329. t2 = 0xC0 // 1100 0000
  330. t3 = 0xE0 // 1110 0000
  331. t4 = 0xF0 // 1111 0000
  332. t5 = 0xF8 // 1111 1000
  333. maskx = 0x3F // 0011 1111
  334. mask2 = 0x1F // 0001 1111
  335. mask3 = 0x0F // 0000 1111
  336. mask4 = 0x07 // 0000 0111
  337. rune1Max = 1 << 7 - 1
  338. rune2Max = 1 << 11 - 1
  339. rune3Max = 1 << 16 - 1
  340. surrogateMin = 0xD800
  341. surrogateMax = 0xDFFF
  342. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  343. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  344. )
  345. func appendRune(p []byte, r rune) []byte {
  346. // Negative values are erroneous. Making it unsigned addresses the problem.
  347. switch i := uint32(r); {
  348. case i <= rune1Max:
  349. p = append(p, byte(r))
  350. return p
  351. case i <= rune2Max:
  352. p = append(p, t2 | byte(r >> 6))
  353. p = append(p, tx | byte(r) & maskx)
  354. return p
  355. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  356. r = RuneError
  357. fallthrough
  358. case i <= rune3Max:
  359. p = append(p, t3 | byte(r >> 12))
  360. p = append(p, tx | byte(r >> 6) & maskx)
  361. p = append(p, tx | byte(r) & maskx)
  362. return p
  363. default:
  364. p = append(p, t4 | byte(r >> 18))
  365. p = append(p, tx | byte(r >> 12) & maskx)
  366. p = append(p, tx | byte(r >> 6) & maskx)
  367. p = append(p, tx | byte(r) & maskx)
  368. return p
  369. }
  370. }
  371. func (iter *Iterator) ReadArray() (ret bool) {
  372. iter.skipWhitespaces()
  373. c := iter.readByte()
  374. if iter.Error != nil {
  375. return
  376. }
  377. switch c {
  378. case 'n': {
  379. iter.skipNull()
  380. if iter.Error != nil {
  381. return
  382. }
  383. return false // null
  384. }
  385. case '[': {
  386. iter.skipWhitespaces()
  387. c = iter.readByte()
  388. if iter.Error != nil {
  389. return
  390. }
  391. if c == ']' {
  392. return false
  393. } else {
  394. iter.unreadByte()
  395. return true
  396. }
  397. }
  398. case ']': return false
  399. case ',':
  400. iter.skipWhitespaces()
  401. return true
  402. default:
  403. iter.ReportError("ReadArray", "expect [ or , or ] or n")
  404. return
  405. }
  406. }
  407. func (iter *Iterator) ReadObject() (ret string) {
  408. iter.skipWhitespaces()
  409. c := iter.readByte()
  410. if iter.Error != nil {
  411. return
  412. }
  413. switch c {
  414. case 'n': {
  415. iter.skipNull()
  416. if iter.Error != nil {
  417. return
  418. }
  419. return "" // null
  420. }
  421. case '{': {
  422. iter.skipWhitespaces()
  423. c = iter.readByte()
  424. if iter.Error != nil {
  425. return
  426. }
  427. switch c {
  428. case '}':
  429. return "" // end of object
  430. case '"':
  431. iter.unreadByte()
  432. field := iter.readObjectField()
  433. if iter.Error != nil {
  434. return
  435. }
  436. return field
  437. default:
  438. iter.ReportError("ReadObject", `expect " after {`)
  439. return
  440. }
  441. }
  442. case ',':
  443. iter.skipWhitespaces()
  444. field := iter.readObjectField()
  445. if iter.Error != nil {
  446. return
  447. }
  448. return field
  449. case '}':
  450. return "" // end of object
  451. default:
  452. iter.ReportError("ReadObject", `expect { or , or } or n`)
  453. return
  454. }
  455. }
  456. func (iter *Iterator) readObjectField() (ret string) {
  457. field := iter.ReadString()
  458. if iter.Error != nil {
  459. return
  460. }
  461. iter.skipWhitespaces()
  462. c := iter.readByte()
  463. if iter.Error != nil {
  464. return
  465. }
  466. if c != ':' {
  467. iter.ReportError("ReadObject", "expect : after object field")
  468. return
  469. }
  470. iter.skipWhitespaces()
  471. return field
  472. }
  473. func (iter *Iterator) ReadFloat32() (ret float32) {
  474. str := make([]byte, 0, 10)
  475. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  476. switch c {
  477. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  478. str = append(str, c)
  479. default:
  480. iter.unreadByte()
  481. val, err := strconv.ParseFloat(string(str), 32)
  482. if err != nil {
  483. iter.Error = err
  484. return
  485. }
  486. return float32(val)
  487. }
  488. }
  489. if iter.Error == io.EOF {
  490. val, err := strconv.ParseFloat(string(str), 32)
  491. if err != nil {
  492. iter.Error = err
  493. return
  494. }
  495. return float32(val)
  496. }
  497. return
  498. }
  499. func (iter *Iterator) ReadFloat64() (ret float64) {
  500. str := make([]byte, 0, 10)
  501. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  502. switch c {
  503. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  504. str = append(str, c)
  505. default:
  506. iter.unreadByte()
  507. val, err := strconv.ParseFloat(string(str), 64)
  508. if err != nil {
  509. iter.Error = err
  510. return
  511. }
  512. return val
  513. }
  514. }
  515. if iter.Error == io.EOF {
  516. val, err := strconv.ParseFloat(string(str), 64)
  517. if err != nil {
  518. iter.Error = err
  519. return
  520. }
  521. return val
  522. }
  523. return
  524. }
  525. func (iter *Iterator) ReadBool() (ret bool) {
  526. c := iter.readByte()
  527. if iter.Error != nil {
  528. return
  529. }
  530. switch c {
  531. case 't':
  532. iter.skipTrue()
  533. if iter.Error != nil {
  534. return
  535. }
  536. return true
  537. case 'f':
  538. iter.skipFalse()
  539. if iter.Error != nil {
  540. return
  541. }
  542. return false
  543. default:
  544. iter.ReportError("ReadBool", "expect t or f")
  545. return
  546. }
  547. }
  548. func (iter *Iterator) skipTrue() {
  549. c := iter.readByte()
  550. if iter.Error != nil {
  551. return
  552. }
  553. if c != 'r' {
  554. iter.ReportError("skipTrue", "expect r of true")
  555. return
  556. }
  557. c = iter.readByte()
  558. if iter.Error != nil {
  559. return
  560. }
  561. if c != 'u' {
  562. iter.ReportError("skipTrue", "expect u of true")
  563. return
  564. }
  565. c = iter.readByte()
  566. if iter.Error != nil {
  567. return
  568. }
  569. if c != 'e' {
  570. iter.ReportError("skipTrue", "expect e of true")
  571. return
  572. }
  573. }
  574. func (iter *Iterator) skipFalse() {
  575. c := iter.readByte()
  576. if iter.Error != nil {
  577. return
  578. }
  579. if c != 'a' {
  580. iter.ReportError("skipFalse", "expect a of false")
  581. return
  582. }
  583. c = iter.readByte()
  584. if iter.Error != nil {
  585. return
  586. }
  587. if c != 'l' {
  588. iter.ReportError("skipFalse", "expect l of false")
  589. return
  590. }
  591. c = iter.readByte()
  592. if iter.Error != nil {
  593. return
  594. }
  595. if c != 's' {
  596. iter.ReportError("skipFalse", "expect s of false")
  597. return
  598. }
  599. c = iter.readByte()
  600. if iter.Error != nil {
  601. return
  602. }
  603. if c != 'e' {
  604. iter.ReportError("skipFalse", "expect e of false")
  605. return
  606. }
  607. }
  608. func (iter *Iterator) ReadNull() (ret bool) {
  609. c := iter.readByte()
  610. if iter.Error != nil {
  611. return
  612. }
  613. if c == 'n' {
  614. iter.skipNull()
  615. if iter.Error != nil {
  616. return
  617. }
  618. return true
  619. }
  620. iter.unreadByte()
  621. return false
  622. }
  623. func (iter *Iterator) skipNull() {
  624. c := iter.readByte()
  625. if iter.Error != nil {
  626. return
  627. }
  628. if c != 'u' {
  629. iter.ReportError("skipNull", "expect u of null")
  630. return
  631. }
  632. c = iter.readByte()
  633. if iter.Error != nil {
  634. return
  635. }
  636. if c != 'l' {
  637. iter.ReportError("skipNull", "expect l of null")
  638. return
  639. }
  640. c = iter.readByte()
  641. if iter.Error != nil {
  642. return
  643. }
  644. if c != 'l' {
  645. iter.ReportError("skipNull", "expect l of null")
  646. return
  647. }
  648. }
  649. func (iter *Iterator) Skip() {
  650. c := iter.readByte()
  651. if iter.Error != nil {
  652. return
  653. }
  654. switch c {
  655. case '"':
  656. iter.skipString()
  657. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  658. iter.skipNumber()
  659. case '[':
  660. iter.skipArray()
  661. case '{':
  662. iter.skipObject()
  663. case 't':
  664. iter.skipTrue()
  665. case 'f':
  666. iter.skipFalse()
  667. case 'n':
  668. iter.skipNull()
  669. default:
  670. iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
  671. return
  672. }
  673. }
  674. func (iter *Iterator) skipString() {
  675. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  676. switch c {
  677. case '"':
  678. return // end of string found
  679. case '\\':
  680. iter.readByte() // " after \\ does not count
  681. if iter.Error != nil {
  682. return
  683. }
  684. }
  685. }
  686. }
  687. func (iter *Iterator) skipNumber() {
  688. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  689. switch c {
  690. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  691. continue
  692. default:
  693. iter.unreadByte()
  694. return
  695. }
  696. }
  697. }
  698. func (iter *Iterator) skipArray() {
  699. for {
  700. iter.skipWhitespaces()
  701. c := iter.readByte()
  702. if iter.Error != nil {
  703. return
  704. }
  705. if c == ']' {
  706. return
  707. }
  708. iter.unreadByte()
  709. iter.Skip()
  710. iter.skipWhitespaces()
  711. c = iter.readByte()
  712. if iter.Error != nil {
  713. return
  714. }
  715. switch c {
  716. case ',':
  717. iter.skipWhitespaces()
  718. continue
  719. case ']':
  720. return
  721. default:
  722. iter.ReportError("skipArray", "expects , or ]")
  723. return
  724. }
  725. }
  726. }
  727. func (iter *Iterator) skipObject() {
  728. iter.skipWhitespaces()
  729. c := iter.readByte()
  730. if iter.Error != nil {
  731. return
  732. }
  733. if c == '}' {
  734. return // end of object
  735. } else {
  736. iter.unreadByte()
  737. }
  738. for {
  739. iter.skipWhitespaces()
  740. c := iter.readByte()
  741. if iter.Error != nil {
  742. return
  743. }
  744. if c != '"' {
  745. iter.ReportError("skipObject", `expects "`)
  746. return
  747. }
  748. iter.skipString()
  749. iter.skipWhitespaces()
  750. c = iter.readByte()
  751. if iter.Error != nil {
  752. return
  753. }
  754. if c != ':' {
  755. iter.ReportError("skipObject", `expects :`)
  756. return
  757. }
  758. iter.skipWhitespaces()
  759. iter.Skip()
  760. iter.skipWhitespaces()
  761. c = iter.readByte()
  762. if iter.Error != nil {
  763. return
  764. }
  765. switch c {
  766. case ',':
  767. iter.skipWhitespaces()
  768. continue
  769. case '}':
  770. return // end of object
  771. default:
  772. iter.ReportError("skipObject", "expects , or }")
  773. return
  774. }
  775. }
  776. }