jsoniter.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830
  1. package jsoniter
  2. import (
  3. "io"
  4. "fmt"
  5. "unicode/utf16"
  6. "strconv"
  7. )
  8. type Iterator struct {
  9. reader io.Reader
  10. buf []byte
  11. head int
  12. tail int
  13. Error error
  14. }
  15. func Parse(reader io.Reader, bufSize int) *Iterator {
  16. iter := &Iterator{
  17. reader: reader,
  18. buf: make([]byte, bufSize),
  19. head: 0,
  20. tail: 0,
  21. }
  22. iter.skipWhitespaces()
  23. return iter
  24. }
  25. func ParseBytes(input []byte) *Iterator {
  26. iter := &Iterator{
  27. reader: nil,
  28. buf: input,
  29. head: 0,
  30. tail: len(input),
  31. }
  32. iter.skipWhitespaces()
  33. return iter
  34. }
  35. func ParseString(input string) *Iterator {
  36. return ParseBytes([]byte(input))
  37. }
  38. func (iter *Iterator) skipWhitespaces() {
  39. c := iter.readByte()
  40. for c == ' ' || c == '\n' || c == '\t' {
  41. c = iter.readByte()
  42. }
  43. iter.unreadByte()
  44. }
  45. func (iter *Iterator) ReportError(operation string, msg string) {
  46. peekStart := iter.head - 10
  47. if peekStart < 0 {
  48. peekStart = 0
  49. }
  50. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  51. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  52. }
  53. func (iter *Iterator) CurrentBuffer() string {
  54. peekStart := iter.head - 10
  55. if peekStart < 0 {
  56. peekStart = 0
  57. }
  58. return fmt.Sprintf("parsing %v ...%s... at %s", iter.head,
  59. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  60. }
  61. func (iter *Iterator) readByte() (ret byte) {
  62. if iter.head == iter.tail {
  63. if iter.reader == nil {
  64. iter.Error = io.EOF
  65. return
  66. }
  67. n, err := iter.reader.Read(iter.buf)
  68. if err != nil {
  69. iter.Error = err
  70. return
  71. }
  72. if n == 0 {
  73. iter.Error = io.EOF
  74. return
  75. }
  76. iter.head = 0
  77. iter.tail = n
  78. }
  79. ret = iter.buf[iter.head]
  80. iter.head += 1
  81. return ret
  82. }
  83. func (iter *Iterator) unreadByte() {
  84. if iter.head == 0 {
  85. iter.ReportError("unreadByte", "unread too many bytes")
  86. return
  87. }
  88. iter.head -= 1
  89. return
  90. }
  91. const maxUint64 = (1 << 64 - 1)
  92. const cutoffUint64 = maxUint64 / 10 + 1
  93. const maxUint32 = (1 << 32 - 1)
  94. const cutoffUint32 = maxUint32 / 10 + 1
  95. func (iter *Iterator) ReadUint() (ret uint) {
  96. val := iter.ReadUint64()
  97. converted := uint(val)
  98. if uint64(converted) != val {
  99. iter.ReportError("ReadUint", "int overflow")
  100. return
  101. }
  102. return converted
  103. }
  104. func (iter *Iterator) ReadUint8() (ret uint8) {
  105. val := iter.ReadUint64()
  106. converted := uint8(val)
  107. if uint64(converted) != val {
  108. iter.ReportError("ReadUint8", "int overflow")
  109. return
  110. }
  111. return converted
  112. }
  113. func (iter *Iterator) ReadUint16() (ret uint16) {
  114. val := iter.ReadUint64()
  115. converted := uint16(val)
  116. if uint64(converted) != val {
  117. iter.ReportError("ReadUint16", "int overflow")
  118. return
  119. }
  120. return converted
  121. }
  122. func (iter *Iterator) ReadUint32() (ret uint32) {
  123. val := iter.ReadUint64()
  124. converted := uint32(val)
  125. if uint64(converted) != val {
  126. iter.ReportError("ReadUint32", "int overflow")
  127. return
  128. }
  129. return converted
  130. }
  131. func (iter *Iterator) ReadUint64() (ret uint64) {
  132. c := iter.readByte()
  133. if iter.Error != nil {
  134. return
  135. }
  136. /* a single zero, or a series of integers */
  137. if c == '0' {
  138. return 0
  139. } else if c >= '1' && c <= '9' {
  140. for c >= '0' && c <= '9' {
  141. var v byte
  142. v = c - '0'
  143. if ret >= cutoffUint64 {
  144. iter.ReportError("ReadUint64", "overflow")
  145. return
  146. }
  147. ret = ret * uint64(10) + uint64(v)
  148. c = iter.readByte()
  149. if iter.Error != nil {
  150. if iter.Error == io.EOF {
  151. break
  152. } else {
  153. return 0
  154. }
  155. }
  156. }
  157. if iter.Error != io.EOF {
  158. iter.unreadByte()
  159. }
  160. } else {
  161. iter.ReportError("ReadUint64", "expects 0~9")
  162. return
  163. }
  164. return ret
  165. }
  166. func (iter *Iterator) ReadInt() (ret int) {
  167. val := iter.ReadInt64()
  168. converted := int(val)
  169. if int64(converted) != val {
  170. iter.ReportError("ReadInt", "int overflow")
  171. return
  172. }
  173. return converted
  174. }
  175. func (iter *Iterator) ReadInt8() (ret int8) {
  176. val := iter.ReadInt64()
  177. converted := int8(val)
  178. if int64(converted) != val {
  179. iter.ReportError("ReadInt8", "int overflow")
  180. return
  181. }
  182. return converted
  183. }
  184. func (iter *Iterator) ReadInt16() (ret int16) {
  185. val := iter.ReadInt64()
  186. converted := int16(val)
  187. if int64(converted) != val {
  188. iter.ReportError("ReadInt16", "int overflow")
  189. return
  190. }
  191. return converted
  192. }
  193. func (iter *Iterator) ReadInt32() (ret int32) {
  194. val := iter.ReadInt64()
  195. converted := int32(val)
  196. if int64(converted) != val {
  197. iter.ReportError("ReadInt32", "int overflow")
  198. return
  199. }
  200. return converted
  201. }
  202. func (iter *Iterator) ReadInt64() (ret int64) {
  203. c := iter.readByte()
  204. if iter.Error != nil {
  205. return
  206. }
  207. /* optional leading minus */
  208. if c == '-' {
  209. n := iter.ReadUint64()
  210. return -int64(n)
  211. } else {
  212. iter.unreadByte()
  213. n := iter.ReadUint64()
  214. return int64(n)
  215. }
  216. }
  217. func (iter *Iterator) ReadString() (ret string) {
  218. str := make([]byte, 0, 10)
  219. c := iter.readByte()
  220. if iter.Error != nil {
  221. return
  222. }
  223. switch c {
  224. case 'n':
  225. iter.skipNull()
  226. if iter.Error != nil {
  227. return
  228. }
  229. return ""
  230. case '"':
  231. // nothing
  232. default:
  233. iter.ReportError("ReadString", `expects " or n`)
  234. return
  235. }
  236. for {
  237. c = iter.readByte()
  238. if iter.Error != nil {
  239. return
  240. }
  241. switch c {
  242. case '\\':
  243. c = iter.readByte()
  244. if iter.Error != nil {
  245. return
  246. }
  247. switch c {
  248. case 'u':
  249. r := iter.readU4()
  250. if iter.Error != nil {
  251. return
  252. }
  253. if utf16.IsSurrogate(r) {
  254. c = iter.readByte()
  255. if iter.Error != nil {
  256. return
  257. }
  258. if c != '\\' {
  259. iter.ReportError("ReadString",
  260. `expects \u after utf16 surrogate, but \ not found`)
  261. return
  262. }
  263. c = iter.readByte()
  264. if iter.Error != nil {
  265. return
  266. }
  267. if c != 'u' {
  268. iter.ReportError("ReadString",
  269. `expects \u after utf16 surrogate, but \u not found`)
  270. return
  271. }
  272. r2 := iter.readU4()
  273. if iter.Error != nil {
  274. return
  275. }
  276. combined := utf16.DecodeRune(r, r2)
  277. str = appendRune(str, combined)
  278. } else {
  279. str = appendRune(str, r)
  280. }
  281. case '"':
  282. str = append(str, '"')
  283. case '\\':
  284. str = append(str, '\\')
  285. case '/':
  286. str = append(str, '/')
  287. case 'b':
  288. str = append(str, '\b')
  289. case 'f':
  290. str = append(str, '\f')
  291. case 'n':
  292. str = append(str, '\n')
  293. case 'r':
  294. str = append(str, '\r')
  295. case 't':
  296. str = append(str, '\t')
  297. default:
  298. iter.ReportError("ReadString",
  299. `invalid escape char after \`)
  300. return
  301. }
  302. case '"':
  303. return string(str)
  304. default:
  305. str = append(str, c)
  306. }
  307. }
  308. }
  309. func (iter *Iterator) readU4() (ret rune) {
  310. for i := 0; i < 4; i++ {
  311. c := iter.readByte()
  312. if iter.Error != nil {
  313. return
  314. }
  315. if (c >= '0' && c <= '9') {
  316. if ret >= cutoffUint32 {
  317. iter.ReportError("readU4", "overflow")
  318. return
  319. }
  320. ret = ret * 16 + rune(c - '0')
  321. } else if ((c >= 'a' && c <= 'f') ) {
  322. if ret >= cutoffUint32 {
  323. iter.ReportError("readU4", "overflow")
  324. return
  325. }
  326. ret = ret * 16 + rune(c - 'a' + 10)
  327. } else {
  328. iter.ReportError("readU4", "expects 0~9 or a~f")
  329. return
  330. }
  331. }
  332. return ret
  333. }
  334. const (
  335. t1 = 0x00 // 0000 0000
  336. tx = 0x80 // 1000 0000
  337. t2 = 0xC0 // 1100 0000
  338. t3 = 0xE0 // 1110 0000
  339. t4 = 0xF0 // 1111 0000
  340. t5 = 0xF8 // 1111 1000
  341. maskx = 0x3F // 0011 1111
  342. mask2 = 0x1F // 0001 1111
  343. mask3 = 0x0F // 0000 1111
  344. mask4 = 0x07 // 0000 0111
  345. rune1Max = 1 << 7 - 1
  346. rune2Max = 1 << 11 - 1
  347. rune3Max = 1 << 16 - 1
  348. surrogateMin = 0xD800
  349. surrogateMax = 0xDFFF
  350. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  351. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  352. )
  353. func appendRune(p []byte, r rune) []byte {
  354. // Negative values are erroneous. Making it unsigned addresses the problem.
  355. switch i := uint32(r); {
  356. case i <= rune1Max:
  357. p = append(p, byte(r))
  358. return p
  359. case i <= rune2Max:
  360. p = append(p, t2 | byte(r >> 6))
  361. p = append(p, tx | byte(r) & maskx)
  362. return p
  363. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  364. r = RuneError
  365. fallthrough
  366. case i <= rune3Max:
  367. p = append(p, t3 | byte(r >> 12))
  368. p = append(p, tx | byte(r >> 6) & maskx)
  369. p = append(p, tx | byte(r) & maskx)
  370. return p
  371. default:
  372. p = append(p, t4 | byte(r >> 18))
  373. p = append(p, tx | byte(r >> 12) & maskx)
  374. p = append(p, tx | byte(r >> 6) & maskx)
  375. p = append(p, tx | byte(r) & maskx)
  376. return p
  377. }
  378. }
  379. func (iter *Iterator) ReadArray() (ret bool) {
  380. iter.skipWhitespaces()
  381. c := iter.readByte()
  382. if iter.Error != nil {
  383. return
  384. }
  385. switch c {
  386. case 'n': {
  387. iter.skipNull()
  388. if iter.Error != nil {
  389. return
  390. }
  391. return false // null
  392. }
  393. case '[': {
  394. iter.skipWhitespaces()
  395. c = iter.readByte()
  396. if iter.Error != nil {
  397. return
  398. }
  399. if c == ']' {
  400. return false
  401. } else {
  402. iter.unreadByte()
  403. return true
  404. }
  405. }
  406. case ']': return false
  407. case ',':
  408. iter.skipWhitespaces()
  409. return true
  410. default:
  411. iter.ReportError("ReadArray", "expect [ or , or ] or n")
  412. return
  413. }
  414. }
  415. func (iter *Iterator) ReadObject() (ret string) {
  416. iter.skipWhitespaces()
  417. c := iter.readByte()
  418. if iter.Error != nil {
  419. return
  420. }
  421. switch c {
  422. case 'n': {
  423. iter.skipNull()
  424. if iter.Error != nil {
  425. return
  426. }
  427. return "" // null
  428. }
  429. case '{': {
  430. iter.skipWhitespaces()
  431. c = iter.readByte()
  432. if iter.Error != nil {
  433. return
  434. }
  435. switch c {
  436. case '}':
  437. return "" // end of object
  438. case '"':
  439. iter.unreadByte()
  440. field := iter.readObjectField()
  441. if iter.Error != nil {
  442. return
  443. }
  444. return field
  445. default:
  446. iter.ReportError("ReadObject", `expect " after {`)
  447. return
  448. }
  449. }
  450. case ',':
  451. iter.skipWhitespaces()
  452. field := iter.readObjectField()
  453. if iter.Error != nil {
  454. return
  455. }
  456. return field
  457. case '}':
  458. return "" // end of object
  459. default:
  460. iter.ReportError("ReadObject", `expect { or , or } or n`)
  461. return
  462. }
  463. }
  464. func (iter *Iterator) readObjectField() (ret string) {
  465. field := iter.ReadString()
  466. if iter.Error != nil {
  467. return
  468. }
  469. iter.skipWhitespaces()
  470. c := iter.readByte()
  471. if iter.Error != nil {
  472. return
  473. }
  474. if c != ':' {
  475. iter.ReportError("ReadObject", "expect : after object field")
  476. return
  477. }
  478. iter.skipWhitespaces()
  479. return field
  480. }
  481. func (iter *Iterator) ReadFloat32() (ret float32) {
  482. str := make([]byte, 0, 10)
  483. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  484. switch c {
  485. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  486. str = append(str, c)
  487. default:
  488. iter.unreadByte()
  489. val, err := strconv.ParseFloat(string(str), 32)
  490. if err != nil {
  491. iter.Error = err
  492. return
  493. }
  494. return float32(val)
  495. }
  496. }
  497. if iter.Error == io.EOF {
  498. val, err := strconv.ParseFloat(string(str), 32)
  499. if err != nil {
  500. iter.Error = err
  501. return
  502. }
  503. return float32(val)
  504. }
  505. return
  506. }
  507. func (iter *Iterator) ReadFloat64() (ret float64) {
  508. str := make([]byte, 0, 10)
  509. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  510. switch c {
  511. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  512. str = append(str, c)
  513. default:
  514. iter.unreadByte()
  515. val, err := strconv.ParseFloat(string(str), 64)
  516. if err != nil {
  517. iter.Error = err
  518. return
  519. }
  520. return val
  521. }
  522. }
  523. if iter.Error == io.EOF {
  524. val, err := strconv.ParseFloat(string(str), 64)
  525. if err != nil {
  526. iter.Error = err
  527. return
  528. }
  529. return val
  530. }
  531. return
  532. }
  533. func (iter *Iterator) ReadBool() (ret bool) {
  534. c := iter.readByte()
  535. if iter.Error != nil {
  536. return
  537. }
  538. switch c {
  539. case 't':
  540. iter.skipTrue()
  541. if iter.Error != nil {
  542. return
  543. }
  544. return true
  545. case 'f':
  546. iter.skipFalse()
  547. if iter.Error != nil {
  548. return
  549. }
  550. return false
  551. default:
  552. iter.ReportError("ReadBool", "expect t or f")
  553. return
  554. }
  555. }
  556. func (iter *Iterator) skipTrue() {
  557. c := iter.readByte()
  558. if iter.Error != nil {
  559. return
  560. }
  561. if c != 'r' {
  562. iter.ReportError("skipTrue", "expect r of true")
  563. return
  564. }
  565. c = iter.readByte()
  566. if iter.Error != nil {
  567. return
  568. }
  569. if c != 'u' {
  570. iter.ReportError("skipTrue", "expect u of true")
  571. return
  572. }
  573. c = iter.readByte()
  574. if iter.Error != nil {
  575. return
  576. }
  577. if c != 'e' {
  578. iter.ReportError("skipTrue", "expect e of true")
  579. return
  580. }
  581. }
  582. func (iter *Iterator) skipFalse() {
  583. c := iter.readByte()
  584. if iter.Error != nil {
  585. return
  586. }
  587. if c != 'a' {
  588. iter.ReportError("skipFalse", "expect a of false")
  589. return
  590. }
  591. c = iter.readByte()
  592. if iter.Error != nil {
  593. return
  594. }
  595. if c != 'l' {
  596. iter.ReportError("skipFalse", "expect l of false")
  597. return
  598. }
  599. c = iter.readByte()
  600. if iter.Error != nil {
  601. return
  602. }
  603. if c != 's' {
  604. iter.ReportError("skipFalse", "expect s of false")
  605. return
  606. }
  607. c = iter.readByte()
  608. if iter.Error != nil {
  609. return
  610. }
  611. if c != 'e' {
  612. iter.ReportError("skipFalse", "expect e of false")
  613. return
  614. }
  615. }
  616. func (iter *Iterator) ReadNull() (ret bool) {
  617. c := iter.readByte()
  618. if iter.Error != nil {
  619. return
  620. }
  621. if c == 'n' {
  622. iter.skipNull()
  623. if iter.Error != nil {
  624. return
  625. }
  626. return true
  627. }
  628. iter.unreadByte()
  629. return false
  630. }
  631. func (iter *Iterator) skipNull() {
  632. c := iter.readByte()
  633. if iter.Error != nil {
  634. return
  635. }
  636. if c != 'u' {
  637. iter.ReportError("skipNull", "expect u of null")
  638. return
  639. }
  640. c = iter.readByte()
  641. if iter.Error != nil {
  642. return
  643. }
  644. if c != 'l' {
  645. iter.ReportError("skipNull", "expect l of null")
  646. return
  647. }
  648. c = iter.readByte()
  649. if iter.Error != nil {
  650. return
  651. }
  652. if c != 'l' {
  653. iter.ReportError("skipNull", "expect l of null")
  654. return
  655. }
  656. }
  657. func (iter *Iterator) Skip() {
  658. c := iter.readByte()
  659. if iter.Error != nil {
  660. return
  661. }
  662. switch c {
  663. case '"':
  664. iter.skipString()
  665. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  666. iter.skipNumber()
  667. case '[':
  668. iter.skipArray()
  669. case '{':
  670. iter.skipObject()
  671. case 't':
  672. iter.skipTrue()
  673. case 'f':
  674. iter.skipFalse()
  675. case 'n':
  676. iter.skipNull()
  677. default:
  678. iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
  679. return
  680. }
  681. }
  682. func (iter *Iterator) skipString() {
  683. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  684. switch c {
  685. case '"':
  686. return // end of string found
  687. case '\\':
  688. iter.readByte() // " after \\ does not count
  689. if iter.Error != nil {
  690. return
  691. }
  692. }
  693. }
  694. }
  695. func (iter *Iterator) skipNumber() {
  696. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  697. switch c {
  698. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  699. continue
  700. default:
  701. iter.unreadByte()
  702. return
  703. }
  704. }
  705. }
  706. func (iter *Iterator) skipArray() {
  707. for {
  708. iter.skipWhitespaces()
  709. c := iter.readByte()
  710. if iter.Error != nil {
  711. return
  712. }
  713. if c == ']' {
  714. return
  715. }
  716. iter.unreadByte()
  717. iter.Skip()
  718. iter.skipWhitespaces()
  719. c = iter.readByte()
  720. if iter.Error != nil {
  721. return
  722. }
  723. switch c {
  724. case ',':
  725. iter.skipWhitespaces()
  726. continue
  727. case ']':
  728. return
  729. default:
  730. iter.ReportError("skipArray", "expects , or ]")
  731. return
  732. }
  733. }
  734. }
  735. func (iter *Iterator) skipObject() {
  736. iter.skipWhitespaces()
  737. c := iter.readByte()
  738. if iter.Error != nil {
  739. return
  740. }
  741. if c == '}' {
  742. return // end of object
  743. } else {
  744. iter.unreadByte()
  745. }
  746. for {
  747. iter.skipWhitespaces()
  748. c := iter.readByte()
  749. if iter.Error != nil {
  750. return
  751. }
  752. if c != '"' {
  753. iter.ReportError("skipObject", `expects "`)
  754. return
  755. }
  756. iter.skipString()
  757. iter.skipWhitespaces()
  758. c = iter.readByte()
  759. if iter.Error != nil {
  760. return
  761. }
  762. if c != ':' {
  763. iter.ReportError("skipObject", `expects :`)
  764. return
  765. }
  766. iter.skipWhitespaces()
  767. iter.Skip()
  768. iter.skipWhitespaces()
  769. c = iter.readByte()
  770. if iter.Error != nil {
  771. return
  772. }
  773. switch c {
  774. case ',':
  775. iter.skipWhitespaces()
  776. continue
  777. case '}':
  778. return // end of object
  779. default:
  780. iter.ReportError("skipObject", "expects , or }")
  781. return
  782. }
  783. }
  784. }