jsoniter.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835
  1. package jsoniter
  2. import (
  3. "io"
  4. "fmt"
  5. "unicode/utf16"
  6. "strconv"
  7. )
  8. type Iterator struct {
  9. reader io.Reader
  10. buf []byte
  11. head int
  12. tail int
  13. Error error
  14. }
  15. func Parse(reader io.Reader, bufSize int) *Iterator {
  16. iter := &Iterator{
  17. reader: reader,
  18. buf: make([]byte, bufSize),
  19. head: 0,
  20. tail: 0,
  21. }
  22. iter.skipWhitespaces()
  23. return iter
  24. }
  25. func ParseBytes(input []byte) *Iterator {
  26. iter := &Iterator{
  27. reader: nil,
  28. buf: input,
  29. head: 0,
  30. tail: len(input),
  31. }
  32. iter.skipWhitespaces()
  33. return iter
  34. }
  35. func ParseString(input string) *Iterator {
  36. return ParseBytes([]byte(input))
  37. }
  38. func (iter *Iterator) skipWhitespaces() {
  39. c := iter.readByte()
  40. for c == ' ' || c == '\n' || c == '\t' {
  41. c = iter.readByte()
  42. }
  43. iter.unreadByte()
  44. }
  45. func (iter *Iterator) ReportError(operation string, msg string) {
  46. peekStart := iter.head - 10
  47. if peekStart < 0 {
  48. peekStart = 0
  49. }
  50. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  51. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  52. }
  53. func (iter *Iterator) CurrentBuffer() string {
  54. peekStart := iter.head - 10
  55. if peekStart < 0 {
  56. peekStart = 0
  57. }
  58. return fmt.Sprintf("parsing %v ...%s... at %s", iter.head,
  59. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  60. }
  61. func (iter *Iterator) readByte() (ret byte) {
  62. if iter.head == iter.tail {
  63. if iter.reader == nil {
  64. iter.Error = io.EOF
  65. return
  66. }
  67. for {
  68. n, err := iter.reader.Read(iter.buf)
  69. if n == 0 {
  70. if err != nil {
  71. iter.Error = err
  72. return
  73. } else {
  74. // n == 0, err == nil is not EOF
  75. continue
  76. }
  77. } else {
  78. iter.head = 0
  79. iter.tail = n
  80. break
  81. }
  82. }
  83. }
  84. ret = iter.buf[iter.head]
  85. iter.head++
  86. return ret
  87. }
  88. func (iter *Iterator) unreadByte() {
  89. if iter.head == 0 {
  90. iter.ReportError("unreadByte", "unread too many bytes")
  91. return
  92. }
  93. iter.head -= 1
  94. return
  95. }
  96. const maxUint64 = (1 << 64 - 1)
  97. const cutoffUint64 = maxUint64 / 10 + 1
  98. const maxUint32 = (1 << 32 - 1)
  99. const cutoffUint32 = maxUint32 / 10 + 1
  100. func (iter *Iterator) ReadUint() (ret uint) {
  101. val := iter.ReadUint64()
  102. converted := uint(val)
  103. if uint64(converted) != val {
  104. iter.ReportError("ReadUint", "int overflow")
  105. return
  106. }
  107. return converted
  108. }
  109. func (iter *Iterator) ReadUint8() (ret uint8) {
  110. val := iter.ReadUint64()
  111. converted := uint8(val)
  112. if uint64(converted) != val {
  113. iter.ReportError("ReadUint8", "int overflow")
  114. return
  115. }
  116. return converted
  117. }
  118. func (iter *Iterator) ReadUint16() (ret uint16) {
  119. val := iter.ReadUint64()
  120. converted := uint16(val)
  121. if uint64(converted) != val {
  122. iter.ReportError("ReadUint16", "int overflow")
  123. return
  124. }
  125. return converted
  126. }
  127. func (iter *Iterator) ReadUint32() (ret uint32) {
  128. val := iter.ReadUint64()
  129. converted := uint32(val)
  130. if uint64(converted) != val {
  131. iter.ReportError("ReadUint32", "int overflow")
  132. return
  133. }
  134. return converted
  135. }
  136. func (iter *Iterator) ReadUint64() (ret uint64) {
  137. c := iter.readByte()
  138. if iter.Error != nil {
  139. return
  140. }
  141. /* a single zero, or a series of integers */
  142. if c == '0' {
  143. return 0
  144. } else if c >= '1' && c <= '9' {
  145. for c >= '0' && c <= '9' {
  146. var v byte
  147. v = c - '0'
  148. if ret >= cutoffUint64 {
  149. iter.ReportError("ReadUint64", "overflow")
  150. return
  151. }
  152. ret = ret * uint64(10) + uint64(v)
  153. c = iter.readByte()
  154. if iter.Error != nil {
  155. if iter.Error == io.EOF {
  156. break
  157. } else {
  158. return 0
  159. }
  160. }
  161. }
  162. if iter.Error != io.EOF {
  163. iter.unreadByte()
  164. }
  165. } else {
  166. iter.ReportError("ReadUint64", "expects 0~9")
  167. return
  168. }
  169. return ret
  170. }
  171. func (iter *Iterator) ReadInt() (ret int) {
  172. val := iter.ReadInt64()
  173. converted := int(val)
  174. if int64(converted) != val {
  175. iter.ReportError("ReadInt", "int overflow")
  176. return
  177. }
  178. return converted
  179. }
  180. func (iter *Iterator) ReadInt8() (ret int8) {
  181. val := iter.ReadInt64()
  182. converted := int8(val)
  183. if int64(converted) != val {
  184. iter.ReportError("ReadInt8", "int overflow")
  185. return
  186. }
  187. return converted
  188. }
  189. func (iter *Iterator) ReadInt16() (ret int16) {
  190. val := iter.ReadInt64()
  191. converted := int16(val)
  192. if int64(converted) != val {
  193. iter.ReportError("ReadInt16", "int overflow")
  194. return
  195. }
  196. return converted
  197. }
  198. func (iter *Iterator) ReadInt32() (ret int32) {
  199. val := iter.ReadInt64()
  200. converted := int32(val)
  201. if int64(converted) != val {
  202. iter.ReportError("ReadInt32", "int overflow")
  203. return
  204. }
  205. return converted
  206. }
  207. func (iter *Iterator) ReadInt64() (ret int64) {
  208. c := iter.readByte()
  209. if iter.Error != nil {
  210. return
  211. }
  212. /* optional leading minus */
  213. if c == '-' {
  214. n := iter.ReadUint64()
  215. return -int64(n)
  216. } else {
  217. iter.unreadByte()
  218. n := iter.ReadUint64()
  219. return int64(n)
  220. }
  221. }
  222. func (iter *Iterator) ReadString() (ret string) {
  223. str := make([]byte, 0, 10)
  224. c := iter.readByte()
  225. if iter.Error != nil {
  226. return
  227. }
  228. switch c {
  229. case 'n':
  230. iter.skipNull()
  231. if iter.Error != nil {
  232. return
  233. }
  234. return ""
  235. case '"':
  236. // nothing
  237. default:
  238. iter.ReportError("ReadString", `expects " or n`)
  239. return
  240. }
  241. for {
  242. c = iter.readByte()
  243. if iter.Error != nil {
  244. return
  245. }
  246. switch c {
  247. case '\\':
  248. c = iter.readByte()
  249. if iter.Error != nil {
  250. return
  251. }
  252. switch c {
  253. case 'u':
  254. r := iter.readU4()
  255. if iter.Error != nil {
  256. return
  257. }
  258. if utf16.IsSurrogate(r) {
  259. c = iter.readByte()
  260. if iter.Error != nil {
  261. return
  262. }
  263. if c != '\\' {
  264. iter.ReportError("ReadString",
  265. `expects \u after utf16 surrogate, but \ not found`)
  266. return
  267. }
  268. c = iter.readByte()
  269. if iter.Error != nil {
  270. return
  271. }
  272. if c != 'u' {
  273. iter.ReportError("ReadString",
  274. `expects \u after utf16 surrogate, but \u not found`)
  275. return
  276. }
  277. r2 := iter.readU4()
  278. if iter.Error != nil {
  279. return
  280. }
  281. combined := utf16.DecodeRune(r, r2)
  282. str = appendRune(str, combined)
  283. } else {
  284. str = appendRune(str, r)
  285. }
  286. case '"':
  287. str = append(str, '"')
  288. case '\\':
  289. str = append(str, '\\')
  290. case '/':
  291. str = append(str, '/')
  292. case 'b':
  293. str = append(str, '\b')
  294. case 'f':
  295. str = append(str, '\f')
  296. case 'n':
  297. str = append(str, '\n')
  298. case 'r':
  299. str = append(str, '\r')
  300. case 't':
  301. str = append(str, '\t')
  302. default:
  303. iter.ReportError("ReadString",
  304. `invalid escape char after \`)
  305. return
  306. }
  307. case '"':
  308. return string(str)
  309. default:
  310. str = append(str, c)
  311. }
  312. }
  313. }
  314. func (iter *Iterator) readU4() (ret rune) {
  315. for i := 0; i < 4; i++ {
  316. c := iter.readByte()
  317. if iter.Error != nil {
  318. return
  319. }
  320. if (c >= '0' && c <= '9') {
  321. if ret >= cutoffUint32 {
  322. iter.ReportError("readU4", "overflow")
  323. return
  324. }
  325. ret = ret * 16 + rune(c - '0')
  326. } else if ((c >= 'a' && c <= 'f') ) {
  327. if ret >= cutoffUint32 {
  328. iter.ReportError("readU4", "overflow")
  329. return
  330. }
  331. ret = ret * 16 + rune(c - 'a' + 10)
  332. } else {
  333. iter.ReportError("readU4", "expects 0~9 or a~f")
  334. return
  335. }
  336. }
  337. return ret
  338. }
  339. const (
  340. t1 = 0x00 // 0000 0000
  341. tx = 0x80 // 1000 0000
  342. t2 = 0xC0 // 1100 0000
  343. t3 = 0xE0 // 1110 0000
  344. t4 = 0xF0 // 1111 0000
  345. t5 = 0xF8 // 1111 1000
  346. maskx = 0x3F // 0011 1111
  347. mask2 = 0x1F // 0001 1111
  348. mask3 = 0x0F // 0000 1111
  349. mask4 = 0x07 // 0000 0111
  350. rune1Max = 1 << 7 - 1
  351. rune2Max = 1 << 11 - 1
  352. rune3Max = 1 << 16 - 1
  353. surrogateMin = 0xD800
  354. surrogateMax = 0xDFFF
  355. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  356. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  357. )
  358. func appendRune(p []byte, r rune) []byte {
  359. // Negative values are erroneous. Making it unsigned addresses the problem.
  360. switch i := uint32(r); {
  361. case i <= rune1Max:
  362. p = append(p, byte(r))
  363. return p
  364. case i <= rune2Max:
  365. p = append(p, t2 | byte(r >> 6))
  366. p = append(p, tx | byte(r) & maskx)
  367. return p
  368. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  369. r = RuneError
  370. fallthrough
  371. case i <= rune3Max:
  372. p = append(p, t3 | byte(r >> 12))
  373. p = append(p, tx | byte(r >> 6) & maskx)
  374. p = append(p, tx | byte(r) & maskx)
  375. return p
  376. default:
  377. p = append(p, t4 | byte(r >> 18))
  378. p = append(p, tx | byte(r >> 12) & maskx)
  379. p = append(p, tx | byte(r >> 6) & maskx)
  380. p = append(p, tx | byte(r) & maskx)
  381. return p
  382. }
  383. }
  384. func (iter *Iterator) ReadArray() (ret bool) {
  385. iter.skipWhitespaces()
  386. c := iter.readByte()
  387. if iter.Error != nil {
  388. return
  389. }
  390. switch c {
  391. case 'n': {
  392. iter.skipNull()
  393. if iter.Error != nil {
  394. return
  395. }
  396. return false // null
  397. }
  398. case '[': {
  399. iter.skipWhitespaces()
  400. c = iter.readByte()
  401. if iter.Error != nil {
  402. return
  403. }
  404. if c == ']' {
  405. return false
  406. } else {
  407. iter.unreadByte()
  408. return true
  409. }
  410. }
  411. case ']': return false
  412. case ',':
  413. iter.skipWhitespaces()
  414. return true
  415. default:
  416. iter.ReportError("ReadArray", "expect [ or , or ] or n")
  417. return
  418. }
  419. }
  420. func (iter *Iterator) ReadObject() (ret string) {
  421. iter.skipWhitespaces()
  422. c := iter.readByte()
  423. if iter.Error != nil {
  424. return
  425. }
  426. switch c {
  427. case 'n': {
  428. iter.skipNull()
  429. if iter.Error != nil {
  430. return
  431. }
  432. return "" // null
  433. }
  434. case '{': {
  435. iter.skipWhitespaces()
  436. c = iter.readByte()
  437. if iter.Error != nil {
  438. return
  439. }
  440. switch c {
  441. case '}':
  442. return "" // end of object
  443. case '"':
  444. iter.unreadByte()
  445. field := iter.readObjectField()
  446. if iter.Error != nil {
  447. return
  448. }
  449. return field
  450. default:
  451. iter.ReportError("ReadObject", `expect " after {`)
  452. return
  453. }
  454. }
  455. case ',':
  456. iter.skipWhitespaces()
  457. field := iter.readObjectField()
  458. if iter.Error != nil {
  459. return
  460. }
  461. return field
  462. case '}':
  463. return "" // end of object
  464. default:
  465. iter.ReportError("ReadObject", `expect { or , or } or n`)
  466. return
  467. }
  468. }
  469. func (iter *Iterator) readObjectField() (ret string) {
  470. field := iter.ReadString()
  471. if iter.Error != nil {
  472. return
  473. }
  474. iter.skipWhitespaces()
  475. c := iter.readByte()
  476. if iter.Error != nil {
  477. return
  478. }
  479. if c != ':' {
  480. iter.ReportError("ReadObject", "expect : after object field")
  481. return
  482. }
  483. iter.skipWhitespaces()
  484. return field
  485. }
  486. func (iter *Iterator) ReadFloat32() (ret float32) {
  487. str := make([]byte, 0, 10)
  488. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  489. switch c {
  490. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  491. str = append(str, c)
  492. default:
  493. iter.unreadByte()
  494. val, err := strconv.ParseFloat(string(str), 32)
  495. if err != nil {
  496. iter.Error = err
  497. return
  498. }
  499. return float32(val)
  500. }
  501. }
  502. if iter.Error == io.EOF {
  503. val, err := strconv.ParseFloat(string(str), 32)
  504. if err != nil {
  505. iter.Error = err
  506. return
  507. }
  508. return float32(val)
  509. }
  510. return
  511. }
  512. func (iter *Iterator) ReadFloat64() (ret float64) {
  513. str := make([]byte, 0, 10)
  514. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  515. switch c {
  516. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  517. str = append(str, c)
  518. default:
  519. iter.unreadByte()
  520. val, err := strconv.ParseFloat(string(str), 64)
  521. if err != nil {
  522. iter.Error = err
  523. return
  524. }
  525. return val
  526. }
  527. }
  528. if iter.Error == io.EOF {
  529. val, err := strconv.ParseFloat(string(str), 64)
  530. if err != nil {
  531. iter.Error = err
  532. return
  533. }
  534. return val
  535. }
  536. return
  537. }
  538. func (iter *Iterator) ReadBool() (ret bool) {
  539. c := iter.readByte()
  540. if iter.Error != nil {
  541. return
  542. }
  543. switch c {
  544. case 't':
  545. iter.skipTrue()
  546. if iter.Error != nil {
  547. return
  548. }
  549. return true
  550. case 'f':
  551. iter.skipFalse()
  552. if iter.Error != nil {
  553. return
  554. }
  555. return false
  556. default:
  557. iter.ReportError("ReadBool", "expect t or f")
  558. return
  559. }
  560. }
  561. func (iter *Iterator) skipTrue() {
  562. c := iter.readByte()
  563. if iter.Error != nil {
  564. return
  565. }
  566. if c != 'r' {
  567. iter.ReportError("skipTrue", "expect r of true")
  568. return
  569. }
  570. c = iter.readByte()
  571. if iter.Error != nil {
  572. return
  573. }
  574. if c != 'u' {
  575. iter.ReportError("skipTrue", "expect u of true")
  576. return
  577. }
  578. c = iter.readByte()
  579. if iter.Error != nil {
  580. return
  581. }
  582. if c != 'e' {
  583. iter.ReportError("skipTrue", "expect e of true")
  584. return
  585. }
  586. }
  587. func (iter *Iterator) skipFalse() {
  588. c := iter.readByte()
  589. if iter.Error != nil {
  590. return
  591. }
  592. if c != 'a' {
  593. iter.ReportError("skipFalse", "expect a of false")
  594. return
  595. }
  596. c = iter.readByte()
  597. if iter.Error != nil {
  598. return
  599. }
  600. if c != 'l' {
  601. iter.ReportError("skipFalse", "expect l of false")
  602. return
  603. }
  604. c = iter.readByte()
  605. if iter.Error != nil {
  606. return
  607. }
  608. if c != 's' {
  609. iter.ReportError("skipFalse", "expect s of false")
  610. return
  611. }
  612. c = iter.readByte()
  613. if iter.Error != nil {
  614. return
  615. }
  616. if c != 'e' {
  617. iter.ReportError("skipFalse", "expect e of false")
  618. return
  619. }
  620. }
  621. func (iter *Iterator) ReadNull() (ret bool) {
  622. c := iter.readByte()
  623. if iter.Error != nil {
  624. return
  625. }
  626. if c == 'n' {
  627. iter.skipNull()
  628. if iter.Error != nil {
  629. return
  630. }
  631. return true
  632. }
  633. iter.unreadByte()
  634. return false
  635. }
  636. func (iter *Iterator) skipNull() {
  637. c := iter.readByte()
  638. if iter.Error != nil {
  639. return
  640. }
  641. if c != 'u' {
  642. iter.ReportError("skipNull", "expect u of null")
  643. return
  644. }
  645. c = iter.readByte()
  646. if iter.Error != nil {
  647. return
  648. }
  649. if c != 'l' {
  650. iter.ReportError("skipNull", "expect l of null")
  651. return
  652. }
  653. c = iter.readByte()
  654. if iter.Error != nil {
  655. return
  656. }
  657. if c != 'l' {
  658. iter.ReportError("skipNull", "expect l of null")
  659. return
  660. }
  661. }
  662. func (iter *Iterator) Skip() {
  663. c := iter.readByte()
  664. if iter.Error != nil {
  665. return
  666. }
  667. switch c {
  668. case '"':
  669. iter.skipString()
  670. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  671. iter.skipNumber()
  672. case '[':
  673. iter.skipArray()
  674. case '{':
  675. iter.skipObject()
  676. case 't':
  677. iter.skipTrue()
  678. case 'f':
  679. iter.skipFalse()
  680. case 'n':
  681. iter.skipNull()
  682. default:
  683. iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
  684. return
  685. }
  686. }
  687. func (iter *Iterator) skipString() {
  688. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  689. switch c {
  690. case '"':
  691. return // end of string found
  692. case '\\':
  693. iter.readByte() // " after \\ does not count
  694. if iter.Error != nil {
  695. return
  696. }
  697. }
  698. }
  699. }
  700. func (iter *Iterator) skipNumber() {
  701. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  702. switch c {
  703. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  704. continue
  705. default:
  706. iter.unreadByte()
  707. return
  708. }
  709. }
  710. }
  711. func (iter *Iterator) skipArray() {
  712. for {
  713. iter.skipWhitespaces()
  714. c := iter.readByte()
  715. if iter.Error != nil {
  716. return
  717. }
  718. if c == ']' {
  719. return
  720. }
  721. iter.unreadByte()
  722. iter.Skip()
  723. iter.skipWhitespaces()
  724. c = iter.readByte()
  725. if iter.Error != nil {
  726. return
  727. }
  728. switch c {
  729. case ',':
  730. iter.skipWhitespaces()
  731. continue
  732. case ']':
  733. return
  734. default:
  735. iter.ReportError("skipArray", "expects , or ]")
  736. return
  737. }
  738. }
  739. }
  740. func (iter *Iterator) skipObject() {
  741. iter.skipWhitespaces()
  742. c := iter.readByte()
  743. if iter.Error != nil {
  744. return
  745. }
  746. if c == '}' {
  747. return // end of object
  748. } else {
  749. iter.unreadByte()
  750. }
  751. for {
  752. iter.skipWhitespaces()
  753. c := iter.readByte()
  754. if iter.Error != nil {
  755. return
  756. }
  757. if c != '"' {
  758. iter.ReportError("skipObject", `expects "`)
  759. return
  760. }
  761. iter.skipString()
  762. iter.skipWhitespaces()
  763. c = iter.readByte()
  764. if iter.Error != nil {
  765. return
  766. }
  767. if c != ':' {
  768. iter.ReportError("skipObject", `expects :`)
  769. return
  770. }
  771. iter.skipWhitespaces()
  772. iter.Skip()
  773. iter.skipWhitespaces()
  774. c = iter.readByte()
  775. if iter.Error != nil {
  776. return
  777. }
  778. switch c {
  779. case ',':
  780. iter.skipWhitespaces()
  781. continue
  782. case '}':
  783. return // end of object
  784. default:
  785. iter.ReportError("skipObject", "expects , or }")
  786. return
  787. }
  788. }
  789. }