jsoniter.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828
  1. package jsoniter
  2. import (
  3. "io"
  4. "fmt"
  5. "unicode/utf16"
  6. "strconv"
  7. "unsafe"
  8. )
  9. type Iterator struct {
  10. reader io.Reader
  11. buf []byte
  12. head int
  13. tail int
  14. Error error
  15. }
  16. func Parse(reader io.Reader, bufSize int) *Iterator {
  17. iter := &Iterator{
  18. reader: reader,
  19. buf: make([]byte, bufSize),
  20. head: 0,
  21. tail: 0,
  22. }
  23. iter.skipWhitespaces()
  24. return iter
  25. }
  26. func ParseBytes(input []byte) *Iterator {
  27. iter := &Iterator{
  28. reader: nil,
  29. buf: input,
  30. head: 0,
  31. tail: len(input),
  32. }
  33. iter.skipWhitespaces()
  34. return iter
  35. }
  36. func ParseString(input string) *Iterator {
  37. return ParseBytes([]byte(input))
  38. }
  39. func (iter *Iterator) skipWhitespaces() {
  40. c := iter.readByte()
  41. for c == ' ' || c == '\n' || c == '\t' {
  42. c = iter.readByte()
  43. }
  44. iter.unreadByte()
  45. }
  46. func (iter *Iterator) ReportError(operation string, msg string) {
  47. peekStart := iter.head - 10
  48. if peekStart < 0 {
  49. peekStart = 0
  50. }
  51. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  52. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  53. }
  54. func (iter *Iterator) CurrentBuffer() string {
  55. peekStart := iter.head - 10
  56. if peekStart < 0 {
  57. peekStart = 0
  58. }
  59. return fmt.Sprintf("parsing %v ...%s... at %s", iter.head,
  60. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  61. }
  62. func (iter *Iterator) readByte() (ret byte) {
  63. if iter.head == iter.tail {
  64. if iter.reader == nil {
  65. iter.Error = io.EOF
  66. return
  67. }
  68. for {
  69. n, err := iter.reader.Read(iter.buf)
  70. if n == 0 {
  71. if err != nil {
  72. iter.Error = err
  73. return
  74. } else {
  75. // n == 0, err == nil is not EOF
  76. continue
  77. }
  78. } else {
  79. iter.head = 0
  80. iter.tail = n
  81. break
  82. }
  83. }
  84. }
  85. ret = iter.buf[iter.head]
  86. iter.head++
  87. return ret
  88. }
  89. func (iter *Iterator) unreadByte() {
  90. if iter.head == 0 {
  91. iter.ReportError("unreadByte", "unread too many bytes")
  92. return
  93. }
  94. iter.head -= 1
  95. return
  96. }
  97. const maxUint64 = (1 << 64 - 1)
  98. const cutoffUint64 = maxUint64 / 10 + 1
  99. const maxUint32 = (1 << 32 - 1)
  100. const cutoffUint32 = maxUint32 / 10 + 1
  101. func (iter *Iterator) ReadUint() (ret uint) {
  102. val := iter.ReadUint64()
  103. converted := uint(val)
  104. if uint64(converted) != val {
  105. iter.ReportError("ReadUint", "int overflow")
  106. return
  107. }
  108. return converted
  109. }
  110. func (iter *Iterator) ReadUint8() (ret uint8) {
  111. val := iter.ReadUint64()
  112. converted := uint8(val)
  113. if uint64(converted) != val {
  114. iter.ReportError("ReadUint8", "int overflow")
  115. return
  116. }
  117. return converted
  118. }
  119. func (iter *Iterator) ReadUint16() (ret uint16) {
  120. val := iter.ReadUint64()
  121. converted := uint16(val)
  122. if uint64(converted) != val {
  123. iter.ReportError("ReadUint16", "int overflow")
  124. return
  125. }
  126. return converted
  127. }
  128. func (iter *Iterator) ReadUint32() (ret uint32) {
  129. val := iter.ReadUint64()
  130. converted := uint32(val)
  131. if uint64(converted) != val {
  132. iter.ReportError("ReadUint32", "int overflow")
  133. return
  134. }
  135. return converted
  136. }
  137. func (iter *Iterator) ReadUint64() (ret uint64) {
  138. c := iter.readByte()
  139. if iter.Error != nil {
  140. return
  141. }
  142. /* a single zero, or a series of integers */
  143. if c == '0' {
  144. return 0
  145. } else if c >= '1' && c <= '9' {
  146. for c >= '0' && c <= '9' {
  147. var v byte
  148. v = c - '0'
  149. if ret >= cutoffUint64 {
  150. iter.ReportError("ReadUint64", "overflow")
  151. return
  152. }
  153. ret = ret * uint64(10) + uint64(v)
  154. c = iter.readByte()
  155. if iter.Error != nil {
  156. if iter.Error == io.EOF {
  157. break
  158. } else {
  159. return 0
  160. }
  161. }
  162. }
  163. if iter.Error != io.EOF {
  164. iter.unreadByte()
  165. }
  166. } else {
  167. iter.ReportError("ReadUint64", "expects 0~9")
  168. return
  169. }
  170. return ret
  171. }
  172. func (iter *Iterator) ReadInt() (ret int) {
  173. val := iter.ReadInt64()
  174. converted := int(val)
  175. if int64(converted) != val {
  176. iter.ReportError("ReadInt", "int overflow")
  177. return
  178. }
  179. return converted
  180. }
  181. func (iter *Iterator) ReadInt8() (ret int8) {
  182. val := iter.ReadInt64()
  183. converted := int8(val)
  184. if int64(converted) != val {
  185. iter.ReportError("ReadInt8", "int overflow")
  186. return
  187. }
  188. return converted
  189. }
  190. func (iter *Iterator) ReadInt16() (ret int16) {
  191. val := iter.ReadInt64()
  192. converted := int16(val)
  193. if int64(converted) != val {
  194. iter.ReportError("ReadInt16", "int overflow")
  195. return
  196. }
  197. return converted
  198. }
  199. func (iter *Iterator) ReadInt32() (ret int32) {
  200. val := iter.ReadInt64()
  201. converted := int32(val)
  202. if int64(converted) != val {
  203. iter.ReportError("ReadInt32", "int overflow")
  204. return
  205. }
  206. return converted
  207. }
  208. func (iter *Iterator) ReadInt64() (ret int64) {
  209. c := iter.readByte()
  210. if iter.Error != nil {
  211. return
  212. }
  213. /* optional leading minus */
  214. if c == '-' {
  215. n := iter.ReadUint64()
  216. return -int64(n)
  217. } else {
  218. iter.unreadByte()
  219. n := iter.ReadUint64()
  220. return int64(n)
  221. }
  222. }
  223. func (iter *Iterator) ReadString() (ret string) {
  224. str := make([]byte, 0, 10)
  225. c := iter.readByte()
  226. if iter.Error != nil {
  227. return
  228. }
  229. switch c {
  230. case 'n':
  231. iter.skipNull()
  232. if iter.Error != nil {
  233. return
  234. }
  235. return ""
  236. case '"':
  237. // nothing
  238. default:
  239. iter.ReportError("ReadString", `expects " or n`)
  240. return
  241. }
  242. for {
  243. c = iter.readByte()
  244. if iter.Error != nil {
  245. return
  246. }
  247. switch c {
  248. case '\\':
  249. c = iter.readByte()
  250. if iter.Error != nil {
  251. return
  252. }
  253. switch c {
  254. case 'u':
  255. r := iter.readU4()
  256. if iter.Error != nil {
  257. return
  258. }
  259. if utf16.IsSurrogate(r) {
  260. c = iter.readByte()
  261. if iter.Error != nil {
  262. return
  263. }
  264. if c != '\\' {
  265. iter.ReportError("ReadString",
  266. `expects \u after utf16 surrogate, but \ not found`)
  267. return
  268. }
  269. c = iter.readByte()
  270. if iter.Error != nil {
  271. return
  272. }
  273. if c != 'u' {
  274. iter.ReportError("ReadString",
  275. `expects \u after utf16 surrogate, but \u not found`)
  276. return
  277. }
  278. r2 := iter.readU4()
  279. if iter.Error != nil {
  280. return
  281. }
  282. combined := utf16.DecodeRune(r, r2)
  283. str = appendRune(str, combined)
  284. } else {
  285. str = appendRune(str, r)
  286. }
  287. case '"':
  288. str = append(str, '"')
  289. case '\\':
  290. str = append(str, '\\')
  291. case '/':
  292. str = append(str, '/')
  293. case 'b':
  294. str = append(str, '\b')
  295. case 'f':
  296. str = append(str, '\f')
  297. case 'n':
  298. str = append(str, '\n')
  299. case 'r':
  300. str = append(str, '\r')
  301. case 't':
  302. str = append(str, '\t')
  303. default:
  304. iter.ReportError("ReadString",
  305. `invalid escape char after \`)
  306. return
  307. }
  308. case '"':
  309. return string(str)
  310. default:
  311. str = append(str, c)
  312. }
  313. }
  314. }
  315. func (iter *Iterator) readU4() (ret rune) {
  316. for i := 0; i < 4; i++ {
  317. c := iter.readByte()
  318. if iter.Error != nil {
  319. return
  320. }
  321. if (c >= '0' && c <= '9') {
  322. if ret >= cutoffUint32 {
  323. iter.ReportError("readU4", "overflow")
  324. return
  325. }
  326. ret = ret * 16 + rune(c - '0')
  327. } else if ((c >= 'a' && c <= 'f') ) {
  328. if ret >= cutoffUint32 {
  329. iter.ReportError("readU4", "overflow")
  330. return
  331. }
  332. ret = ret * 16 + rune(c - 'a' + 10)
  333. } else {
  334. iter.ReportError("readU4", "expects 0~9 or a~f")
  335. return
  336. }
  337. }
  338. return ret
  339. }
  340. const (
  341. t1 = 0x00 // 0000 0000
  342. tx = 0x80 // 1000 0000
  343. t2 = 0xC0 // 1100 0000
  344. t3 = 0xE0 // 1110 0000
  345. t4 = 0xF0 // 1111 0000
  346. t5 = 0xF8 // 1111 1000
  347. maskx = 0x3F // 0011 1111
  348. mask2 = 0x1F // 0001 1111
  349. mask3 = 0x0F // 0000 1111
  350. mask4 = 0x07 // 0000 0111
  351. rune1Max = 1 << 7 - 1
  352. rune2Max = 1 << 11 - 1
  353. rune3Max = 1 << 16 - 1
  354. surrogateMin = 0xD800
  355. surrogateMax = 0xDFFF
  356. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  357. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  358. )
  359. func appendRune(p []byte, r rune) []byte {
  360. // Negative values are erroneous. Making it unsigned addresses the problem.
  361. switch i := uint32(r); {
  362. case i <= rune1Max:
  363. p = append(p, byte(r))
  364. return p
  365. case i <= rune2Max:
  366. p = append(p, t2 | byte(r >> 6))
  367. p = append(p, tx | byte(r) & maskx)
  368. return p
  369. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  370. r = RuneError
  371. fallthrough
  372. case i <= rune3Max:
  373. p = append(p, t3 | byte(r >> 12))
  374. p = append(p, tx | byte(r >> 6) & maskx)
  375. p = append(p, tx | byte(r) & maskx)
  376. return p
  377. default:
  378. p = append(p, t4 | byte(r >> 18))
  379. p = append(p, tx | byte(r >> 12) & maskx)
  380. p = append(p, tx | byte(r >> 6) & maskx)
  381. p = append(p, tx | byte(r) & maskx)
  382. return p
  383. }
  384. }
  385. func (iter *Iterator) ReadArray() (ret bool) {
  386. iter.skipWhitespaces()
  387. c := iter.readByte()
  388. if iter.Error != nil {
  389. return
  390. }
  391. switch c {
  392. case 'n': {
  393. iter.skipNull()
  394. if iter.Error != nil {
  395. return
  396. }
  397. return false // null
  398. }
  399. case '[': {
  400. iter.skipWhitespaces()
  401. c = iter.readByte()
  402. if iter.Error != nil {
  403. return
  404. }
  405. if c == ']' {
  406. return false
  407. } else {
  408. iter.unreadByte()
  409. return true
  410. }
  411. }
  412. case ']': return false
  413. case ',':
  414. iter.skipWhitespaces()
  415. return true
  416. default:
  417. iter.ReportError("ReadArray", "expect [ or , or ] or n")
  418. return
  419. }
  420. }
  421. func (iter *Iterator) ReadObject() (ret string) {
  422. iter.skipWhitespaces()
  423. c := iter.readByte()
  424. if iter.Error != nil {
  425. return
  426. }
  427. switch c {
  428. case 'n': {
  429. iter.skipNull()
  430. if iter.Error != nil {
  431. return
  432. }
  433. return "" // null
  434. }
  435. case '{': {
  436. iter.skipWhitespaces()
  437. c = iter.readByte()
  438. if iter.Error != nil {
  439. return
  440. }
  441. switch c {
  442. case '}':
  443. return "" // end of object
  444. case '"':
  445. iter.unreadByte()
  446. field := iter.readObjectField()
  447. if iter.Error != nil {
  448. return
  449. }
  450. return field
  451. default:
  452. iter.ReportError("ReadObject", `expect " after {`)
  453. return
  454. }
  455. }
  456. case ',':
  457. iter.skipWhitespaces()
  458. field := iter.readObjectField()
  459. if iter.Error != nil {
  460. return
  461. }
  462. return field
  463. case '}':
  464. return "" // end of object
  465. default:
  466. iter.ReportError("ReadObject", `expect { or , or } or n`)
  467. return
  468. }
  469. }
  470. func (iter *Iterator) readObjectField() (ret string) {
  471. field := iter.ReadString()
  472. if iter.Error != nil {
  473. return
  474. }
  475. iter.skipWhitespaces()
  476. c := iter.readByte()
  477. if iter.Error != nil {
  478. return
  479. }
  480. if c != ':' {
  481. iter.ReportError("ReadObject", "expect : after object field")
  482. return
  483. }
  484. iter.skipWhitespaces()
  485. return field
  486. }
  487. func (iter *Iterator) ReadFloat32() (ret float32) {
  488. str := make([]byte, 0, 4)
  489. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  490. switch c {
  491. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  492. str = append(str, c)
  493. continue
  494. default:
  495. iter.unreadByte()
  496. }
  497. break
  498. }
  499. if iter.Error != nil && iter.Error != io.EOF {
  500. return
  501. }
  502. val, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&str)), 32)
  503. if err != nil {
  504. iter.Error = err
  505. return
  506. }
  507. return float32(val)
  508. }
  509. func (iter *Iterator) ReadFloat64() (ret float64) {
  510. str := make([]byte, 0, 4)
  511. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  512. switch c {
  513. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  514. str = append(str, c)
  515. continue
  516. default:
  517. iter.unreadByte()
  518. }
  519. break
  520. }
  521. if iter.Error != nil && iter.Error != io.EOF {
  522. return
  523. }
  524. val, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&str)), 64)
  525. if err != nil {
  526. iter.Error = err
  527. return
  528. }
  529. return val
  530. }
  531. func (iter *Iterator) ReadBool() (ret bool) {
  532. c := iter.readByte()
  533. if iter.Error != nil {
  534. return
  535. }
  536. switch c {
  537. case 't':
  538. iter.skipTrue()
  539. if iter.Error != nil {
  540. return
  541. }
  542. return true
  543. case 'f':
  544. iter.skipFalse()
  545. if iter.Error != nil {
  546. return
  547. }
  548. return false
  549. default:
  550. iter.ReportError("ReadBool", "expect t or f")
  551. return
  552. }
  553. }
  554. func (iter *Iterator) skipTrue() {
  555. c := iter.readByte()
  556. if iter.Error != nil {
  557. return
  558. }
  559. if c != 'r' {
  560. iter.ReportError("skipTrue", "expect r of true")
  561. return
  562. }
  563. c = iter.readByte()
  564. if iter.Error != nil {
  565. return
  566. }
  567. if c != 'u' {
  568. iter.ReportError("skipTrue", "expect u of true")
  569. return
  570. }
  571. c = iter.readByte()
  572. if iter.Error != nil {
  573. return
  574. }
  575. if c != 'e' {
  576. iter.ReportError("skipTrue", "expect e of true")
  577. return
  578. }
  579. }
  580. func (iter *Iterator) skipFalse() {
  581. c := iter.readByte()
  582. if iter.Error != nil {
  583. return
  584. }
  585. if c != 'a' {
  586. iter.ReportError("skipFalse", "expect a of false")
  587. return
  588. }
  589. c = iter.readByte()
  590. if iter.Error != nil {
  591. return
  592. }
  593. if c != 'l' {
  594. iter.ReportError("skipFalse", "expect l of false")
  595. return
  596. }
  597. c = iter.readByte()
  598. if iter.Error != nil {
  599. return
  600. }
  601. if c != 's' {
  602. iter.ReportError("skipFalse", "expect s of false")
  603. return
  604. }
  605. c = iter.readByte()
  606. if iter.Error != nil {
  607. return
  608. }
  609. if c != 'e' {
  610. iter.ReportError("skipFalse", "expect e of false")
  611. return
  612. }
  613. }
  614. func (iter *Iterator) ReadNull() (ret bool) {
  615. c := iter.readByte()
  616. if iter.Error != nil {
  617. return
  618. }
  619. if c == 'n' {
  620. iter.skipNull()
  621. if iter.Error != nil {
  622. return
  623. }
  624. return true
  625. }
  626. iter.unreadByte()
  627. return false
  628. }
  629. func (iter *Iterator) skipNull() {
  630. c := iter.readByte()
  631. if iter.Error != nil {
  632. return
  633. }
  634. if c != 'u' {
  635. iter.ReportError("skipNull", "expect u of null")
  636. return
  637. }
  638. c = iter.readByte()
  639. if iter.Error != nil {
  640. return
  641. }
  642. if c != 'l' {
  643. iter.ReportError("skipNull", "expect l of null")
  644. return
  645. }
  646. c = iter.readByte()
  647. if iter.Error != nil {
  648. return
  649. }
  650. if c != 'l' {
  651. iter.ReportError("skipNull", "expect l of null")
  652. return
  653. }
  654. }
  655. func (iter *Iterator) Skip() {
  656. c := iter.readByte()
  657. if iter.Error != nil {
  658. return
  659. }
  660. switch c {
  661. case '"':
  662. iter.skipString()
  663. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  664. iter.skipNumber()
  665. case '[':
  666. iter.skipArray()
  667. case '{':
  668. iter.skipObject()
  669. case 't':
  670. iter.skipTrue()
  671. case 'f':
  672. iter.skipFalse()
  673. case 'n':
  674. iter.skipNull()
  675. default:
  676. iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
  677. return
  678. }
  679. }
  680. func (iter *Iterator) skipString() {
  681. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  682. switch c {
  683. case '"':
  684. return // end of string found
  685. case '\\':
  686. iter.readByte() // " after \\ does not count
  687. if iter.Error != nil {
  688. return
  689. }
  690. }
  691. }
  692. }
  693. func (iter *Iterator) skipNumber() {
  694. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  695. switch c {
  696. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  697. continue
  698. default:
  699. iter.unreadByte()
  700. return
  701. }
  702. }
  703. }
  704. func (iter *Iterator) skipArray() {
  705. for {
  706. iter.skipWhitespaces()
  707. c := iter.readByte()
  708. if iter.Error != nil {
  709. return
  710. }
  711. if c == ']' {
  712. return
  713. }
  714. iter.unreadByte()
  715. iter.Skip()
  716. iter.skipWhitespaces()
  717. c = iter.readByte()
  718. if iter.Error != nil {
  719. return
  720. }
  721. switch c {
  722. case ',':
  723. iter.skipWhitespaces()
  724. continue
  725. case ']':
  726. return
  727. default:
  728. iter.ReportError("skipArray", "expects , or ]")
  729. return
  730. }
  731. }
  732. }
  733. func (iter *Iterator) skipObject() {
  734. iter.skipWhitespaces()
  735. c := iter.readByte()
  736. if iter.Error != nil {
  737. return
  738. }
  739. if c == '}' {
  740. return // end of object
  741. } else {
  742. iter.unreadByte()
  743. }
  744. for {
  745. iter.skipWhitespaces()
  746. c := iter.readByte()
  747. if iter.Error != nil {
  748. return
  749. }
  750. if c != '"' {
  751. iter.ReportError("skipObject", `expects "`)
  752. return
  753. }
  754. iter.skipString()
  755. iter.skipWhitespaces()
  756. c = iter.readByte()
  757. if iter.Error != nil {
  758. return
  759. }
  760. if c != ':' {
  761. iter.ReportError("skipObject", `expects :`)
  762. return
  763. }
  764. iter.skipWhitespaces()
  765. iter.Skip()
  766. iter.skipWhitespaces()
  767. c = iter.readByte()
  768. if iter.Error != nil {
  769. return
  770. }
  771. switch c {
  772. case ',':
  773. iter.skipWhitespaces()
  774. continue
  775. case '}':
  776. return // end of object
  777. default:
  778. iter.ReportError("skipObject", "expects , or }")
  779. return
  780. }
  781. }
  782. }