jsoniter.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818
  1. package jsoniter
  2. import (
  3. "io"
  4. "fmt"
  5. "unicode/utf16"
  6. "strconv"
  7. "unsafe"
  8. )
  9. var digits []byte
  10. func init() {
  11. digits = make([]byte, 256)
  12. for i := 0; i < len(digits); i++ {
  13. digits[i] = 255
  14. }
  15. for i := '0'; i <= '9'; i++ {
  16. digits[i] = byte(i - '0');
  17. }
  18. for i := 'a'; i <= 'f'; i++ {
  19. digits[i] = byte((i - 'a') + 10);
  20. }
  21. for i := 'A'; i <= 'F'; i++ {
  22. digits[i] = byte((i - 'A') + 10);
  23. }
  24. }
  25. type Iterator struct {
  26. reader io.Reader
  27. buf []byte
  28. head int
  29. tail int
  30. Error error
  31. }
  32. func Parse(reader io.Reader, bufSize int) *Iterator {
  33. iter := &Iterator{
  34. reader: reader,
  35. buf: make([]byte, bufSize),
  36. head: 0,
  37. tail: 0,
  38. }
  39. iter.skipWhitespaces()
  40. return iter
  41. }
  42. func ParseBytes(input []byte) *Iterator {
  43. iter := &Iterator{
  44. reader: nil,
  45. buf: input,
  46. head: 0,
  47. tail: len(input),
  48. }
  49. iter.skipWhitespaces()
  50. return iter
  51. }
  52. func (iter *Iterator) Reuse(input []byte) *Iterator {
  53. // only for benchmarking
  54. iter.reader = nil
  55. iter.Error = nil
  56. iter.buf = input
  57. iter.head = 0
  58. iter.tail = len(input)
  59. iter.skipWhitespaces()
  60. return iter
  61. }
  62. func ParseString(input string) *Iterator {
  63. return ParseBytes([]byte(input))
  64. }
  65. func (iter *Iterator) skipWhitespaces() {
  66. c := iter.readByte()
  67. for {
  68. switch c {
  69. case ' ', '\n', '\t', 'r':
  70. c = iter.readByte()
  71. continue
  72. }
  73. break
  74. }
  75. iter.unreadByte()
  76. }
  77. func (iter *Iterator) ReportError(operation string, msg string) {
  78. if iter.Error != nil {
  79. return
  80. }
  81. peekStart := iter.head - 10
  82. if peekStart < 0 {
  83. peekStart = 0
  84. }
  85. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  86. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  87. }
  88. func (iter *Iterator) CurrentBuffer() string {
  89. peekStart := iter.head - 10
  90. if peekStart < 0 {
  91. peekStart = 0
  92. }
  93. return fmt.Sprintf("parsing %v ...%s... at %s", iter.head,
  94. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  95. }
  96. func (iter *Iterator) readByte() (ret byte) {
  97. if iter.head == iter.tail {
  98. if iter.reader == nil {
  99. iter.Error = io.EOF
  100. return
  101. }
  102. for {
  103. n, err := iter.reader.Read(iter.buf)
  104. if n == 0 {
  105. if err != nil {
  106. iter.Error = err
  107. return
  108. } else {
  109. // n == 0, err == nil is not EOF
  110. continue
  111. }
  112. } else {
  113. iter.head = 0
  114. iter.tail = n
  115. break
  116. }
  117. }
  118. }
  119. ret = iter.buf[iter.head]
  120. iter.head++
  121. return ret
  122. }
  123. func (iter *Iterator) unreadByte() {
  124. if iter.head == 0 {
  125. iter.ReportError("unreadByte", "unread too many bytes")
  126. return
  127. }
  128. iter.head -= 1
  129. return
  130. }
  131. const maxUint64 = (1 << 64 - 1)
  132. const cutoffUint64 = maxUint64 / 10 + 1
  133. const maxUint32 = (1 << 32 - 1)
  134. const cutoffUint32 = maxUint32 / 10 + 1
  135. func (iter *Iterator) ReadUint() (ret uint) {
  136. val := iter.ReadUint64()
  137. converted := uint(val)
  138. if uint64(converted) != val {
  139. iter.ReportError("ReadUint", "int overflow")
  140. return
  141. }
  142. return converted
  143. }
  144. func (iter *Iterator) ReadUint8() (ret uint8) {
  145. val := iter.ReadUint64()
  146. converted := uint8(val)
  147. if uint64(converted) != val {
  148. iter.ReportError("ReadUint8", "int overflow")
  149. return
  150. }
  151. return converted
  152. }
  153. func (iter *Iterator) ReadUint16() (ret uint16) {
  154. val := iter.ReadUint64()
  155. converted := uint16(val)
  156. if uint64(converted) != val {
  157. iter.ReportError("ReadUint16", "int overflow")
  158. return
  159. }
  160. return converted
  161. }
  162. func (iter *Iterator) ReadUint32() (ret uint32) {
  163. val := iter.ReadUint64()
  164. converted := uint32(val)
  165. if uint64(converted) != val {
  166. iter.ReportError("ReadUint32", "int overflow")
  167. return
  168. }
  169. return converted
  170. }
  171. func (iter *Iterator) ReadUint64() (ret uint64) {
  172. c := iter.readByte()
  173. v := digits[c]
  174. if v == 0 {
  175. return 0 // single zero
  176. }
  177. if v == 255 {
  178. iter.ReportError("ReadUint64", "unexpected character")
  179. return
  180. }
  181. for {
  182. if ret >= cutoffUint64 {
  183. iter.ReportError("ReadUint64", "overflow")
  184. return
  185. }
  186. ret = ret * 10 + uint64(v)
  187. c = iter.readByte()
  188. v = digits[c]
  189. if v == 255 {
  190. iter.unreadByte()
  191. break
  192. }
  193. }
  194. return ret
  195. }
  196. func (iter *Iterator) ReadInt() (ret int) {
  197. val := iter.ReadInt64()
  198. converted := int(val)
  199. if int64(converted) != val {
  200. iter.ReportError("ReadInt", "int overflow")
  201. return
  202. }
  203. return converted
  204. }
  205. func (iter *Iterator) ReadInt8() (ret int8) {
  206. val := iter.ReadInt64()
  207. converted := int8(val)
  208. if int64(converted) != val {
  209. iter.ReportError("ReadInt8", "int overflow")
  210. return
  211. }
  212. return converted
  213. }
  214. func (iter *Iterator) ReadInt16() (ret int16) {
  215. val := iter.ReadInt64()
  216. converted := int16(val)
  217. if int64(converted) != val {
  218. iter.ReportError("ReadInt16", "int overflow")
  219. return
  220. }
  221. return converted
  222. }
  223. func (iter *Iterator) ReadInt32() (ret int32) {
  224. val := iter.ReadInt64()
  225. converted := int32(val)
  226. if int64(converted) != val {
  227. iter.ReportError("ReadInt32", "int overflow")
  228. return
  229. }
  230. return converted
  231. }
  232. func (iter *Iterator) ReadInt64() (ret int64) {
  233. c := iter.readByte()
  234. if iter.Error != nil {
  235. return
  236. }
  237. /* optional leading minus */
  238. if c == '-' {
  239. n := iter.ReadUint64()
  240. return -int64(n)
  241. } else {
  242. iter.unreadByte()
  243. n := iter.ReadUint64()
  244. return int64(n)
  245. }
  246. }
  247. func (iter *Iterator) ReadString() (ret string) {
  248. str := make([]byte, 0, 8)
  249. c := iter.readByte()
  250. if c == 'n' {
  251. iter.skipNull()
  252. return
  253. }
  254. if c != '"' {
  255. iter.ReportError("ReadString", `expects " or n`)
  256. return
  257. }
  258. for iter.Error == nil {
  259. c = iter.readByte()
  260. if c == '"' {
  261. return string(str)
  262. }
  263. if c == '\\' {
  264. c = iter.readByte()
  265. if iter.Error != nil {
  266. return
  267. }
  268. switch c {
  269. case 'u':
  270. r := iter.readU4()
  271. if iter.Error != nil {
  272. return
  273. }
  274. if utf16.IsSurrogate(r) {
  275. c = iter.readByte()
  276. if iter.Error != nil {
  277. return
  278. }
  279. if c != '\\' {
  280. iter.ReportError("ReadString",
  281. `expects \u after utf16 surrogate, but \ not found`)
  282. return
  283. }
  284. c = iter.readByte()
  285. if iter.Error != nil {
  286. return
  287. }
  288. if c != 'u' {
  289. iter.ReportError("ReadString",
  290. `expects \u after utf16 surrogate, but \u not found`)
  291. return
  292. }
  293. r2 := iter.readU4()
  294. if iter.Error != nil {
  295. return
  296. }
  297. combined := utf16.DecodeRune(r, r2)
  298. str = appendRune(str, combined)
  299. } else {
  300. str = appendRune(str, r)
  301. }
  302. case '"':
  303. str = append(str, '"')
  304. case '\\':
  305. str = append(str, '\\')
  306. case '/':
  307. str = append(str, '/')
  308. case 'b':
  309. str = append(str, '\b')
  310. case 'f':
  311. str = append(str, '\f')
  312. case 'n':
  313. str = append(str, '\n')
  314. case 'r':
  315. str = append(str, '\r')
  316. case 't':
  317. str = append(str, '\t')
  318. default:
  319. iter.ReportError("ReadString",
  320. `invalid escape char after \`)
  321. return
  322. }
  323. } else {
  324. str = append(str, c)
  325. }
  326. }
  327. return
  328. }
  329. func (iter *Iterator) readU4() (ret rune) {
  330. for i := 0; i < 4; i++ {
  331. c := iter.readByte()
  332. if iter.Error != nil {
  333. return
  334. }
  335. if (c >= '0' && c <= '9') {
  336. if ret >= cutoffUint32 {
  337. iter.ReportError("readU4", "overflow")
  338. return
  339. }
  340. ret = ret * 16 + rune(c - '0')
  341. } else if ((c >= 'a' && c <= 'f') ) {
  342. if ret >= cutoffUint32 {
  343. iter.ReportError("readU4", "overflow")
  344. return
  345. }
  346. ret = ret * 16 + rune(c - 'a' + 10)
  347. } else {
  348. iter.ReportError("readU4", "expects 0~9 or a~f")
  349. return
  350. }
  351. }
  352. return ret
  353. }
  354. const (
  355. t1 = 0x00 // 0000 0000
  356. tx = 0x80 // 1000 0000
  357. t2 = 0xC0 // 1100 0000
  358. t3 = 0xE0 // 1110 0000
  359. t4 = 0xF0 // 1111 0000
  360. t5 = 0xF8 // 1111 1000
  361. maskx = 0x3F // 0011 1111
  362. mask2 = 0x1F // 0001 1111
  363. mask3 = 0x0F // 0000 1111
  364. mask4 = 0x07 // 0000 0111
  365. rune1Max = 1 << 7 - 1
  366. rune2Max = 1 << 11 - 1
  367. rune3Max = 1 << 16 - 1
  368. surrogateMin = 0xD800
  369. surrogateMax = 0xDFFF
  370. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  371. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  372. )
  373. func appendRune(p []byte, r rune) []byte {
  374. // Negative values are erroneous. Making it unsigned addresses the problem.
  375. switch i := uint32(r); {
  376. case i <= rune1Max:
  377. p = append(p, byte(r))
  378. return p
  379. case i <= rune2Max:
  380. p = append(p, t2 | byte(r >> 6))
  381. p = append(p, tx | byte(r) & maskx)
  382. return p
  383. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  384. r = RuneError
  385. fallthrough
  386. case i <= rune3Max:
  387. p = append(p, t3 | byte(r >> 12))
  388. p = append(p, tx | byte(r >> 6) & maskx)
  389. p = append(p, tx | byte(r) & maskx)
  390. return p
  391. default:
  392. p = append(p, t4 | byte(r >> 18))
  393. p = append(p, tx | byte(r >> 12) & maskx)
  394. p = append(p, tx | byte(r >> 6) & maskx)
  395. p = append(p, tx | byte(r) & maskx)
  396. return p
  397. }
  398. }
  399. func (iter *Iterator) ReadArray() (ret bool) {
  400. iter.skipWhitespaces()
  401. c := iter.readByte()
  402. if iter.Error != nil {
  403. return
  404. }
  405. switch c {
  406. case 'n': {
  407. iter.skipNull()
  408. return false // null
  409. }
  410. case '[': {
  411. iter.skipWhitespaces()
  412. c = iter.readByte()
  413. if iter.Error != nil {
  414. return
  415. }
  416. if c == ']' {
  417. return false
  418. } else {
  419. iter.unreadByte()
  420. return true
  421. }
  422. }
  423. case ']': return false
  424. case ',':
  425. iter.skipWhitespaces()
  426. return true
  427. default:
  428. iter.ReportError("ReadArray", "expect [ or , or ] or n")
  429. return
  430. }
  431. }
  432. func (iter *Iterator) ReadArrayCB(cb func()) {
  433. iter.skipWhitespaces()
  434. c := iter.readByte()
  435. if c == 'n' {
  436. iter.skipNull()
  437. return // null
  438. }
  439. if c != '[' {
  440. iter.ReportError("ReadArray", "expect [ or n")
  441. return
  442. }
  443. iter.skipWhitespaces()
  444. c = iter.readByte()
  445. if c == ']' {
  446. return // []
  447. } else {
  448. iter.unreadByte()
  449. }
  450. for {
  451. if iter.Error != nil {
  452. return
  453. }
  454. cb()
  455. iter.skipWhitespaces()
  456. c = iter.readByte()
  457. if c == ']' {
  458. return
  459. }
  460. if c != ',' {
  461. iter.ReportError("ReadArray", "expect , or ]")
  462. return
  463. }
  464. iter.skipWhitespaces()
  465. }
  466. }
  467. func (iter *Iterator) ReadObject() (ret string) {
  468. iter.skipWhitespaces()
  469. c := iter.readByte()
  470. if iter.Error != nil {
  471. return
  472. }
  473. switch c {
  474. case 'n': {
  475. iter.skipNull()
  476. if iter.Error != nil {
  477. return
  478. }
  479. return "" // null
  480. }
  481. case '{': {
  482. iter.skipWhitespaces()
  483. c = iter.readByte()
  484. if iter.Error != nil {
  485. return
  486. }
  487. switch c {
  488. case '}':
  489. return "" // end of object
  490. case '"':
  491. iter.unreadByte()
  492. return iter.readObjectField()
  493. default:
  494. iter.ReportError("ReadObject", `expect " after {`)
  495. return
  496. }
  497. }
  498. case ',':
  499. iter.skipWhitespaces()
  500. return iter.readObjectField()
  501. case '}':
  502. return "" // end of object
  503. default:
  504. iter.ReportError("ReadObject", `expect { or , or } or n`)
  505. return
  506. }
  507. }
  508. func (iter *Iterator) readObjectField() (ret string) {
  509. field := iter.ReadString()
  510. if iter.Error != nil {
  511. return
  512. }
  513. iter.skipWhitespaces()
  514. c := iter.readByte()
  515. if iter.Error != nil {
  516. return
  517. }
  518. if c != ':' {
  519. iter.ReportError("ReadObject", "expect : after object field")
  520. return
  521. }
  522. iter.skipWhitespaces()
  523. return field
  524. }
  525. func (iter *Iterator) ReadFloat32() (ret float32) {
  526. str := make([]byte, 0, 4)
  527. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  528. switch c {
  529. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  530. str = append(str, c)
  531. continue
  532. default:
  533. iter.unreadByte()
  534. }
  535. break
  536. }
  537. if iter.Error != nil && iter.Error != io.EOF {
  538. return
  539. }
  540. val, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&str)), 32)
  541. if err != nil {
  542. iter.Error = err
  543. return
  544. }
  545. return float32(val)
  546. }
  547. func (iter *Iterator) ReadFloat64() (ret float64) {
  548. str := make([]byte, 0, 4)
  549. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  550. switch c {
  551. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  552. str = append(str, c)
  553. continue
  554. default:
  555. iter.unreadByte()
  556. }
  557. break
  558. }
  559. if iter.Error != nil && iter.Error != io.EOF {
  560. return
  561. }
  562. val, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&str)), 64)
  563. if err != nil {
  564. iter.Error = err
  565. return
  566. }
  567. return val
  568. }
  569. func (iter *Iterator) ReadBool() (ret bool) {
  570. c := iter.readByte()
  571. if iter.Error != nil {
  572. return
  573. }
  574. switch c {
  575. case 't':
  576. iter.skipTrue()
  577. if iter.Error != nil {
  578. return
  579. }
  580. return true
  581. case 'f':
  582. iter.skipFalse()
  583. if iter.Error != nil {
  584. return
  585. }
  586. return false
  587. default:
  588. iter.ReportError("ReadBool", "expect t or f")
  589. return
  590. }
  591. }
  592. func (iter *Iterator) skipTrue() {
  593. c := iter.readByte()
  594. if c != 'r' {
  595. iter.ReportError("skipTrue", "expect r of true")
  596. return
  597. }
  598. c = iter.readByte()
  599. if c != 'u' {
  600. iter.ReportError("skipTrue", "expect u of true")
  601. return
  602. }
  603. c = iter.readByte()
  604. if c != 'e' {
  605. iter.ReportError("skipTrue", "expect e of true")
  606. return
  607. }
  608. }
  609. func (iter *Iterator) skipFalse() {
  610. c := iter.readByte()
  611. if c != 'a' {
  612. iter.ReportError("skipFalse", "expect a of false")
  613. return
  614. }
  615. c = iter.readByte()
  616. if c != 'l' {
  617. iter.ReportError("skipFalse", "expect l of false")
  618. return
  619. }
  620. c = iter.readByte()
  621. if c != 's' {
  622. iter.ReportError("skipFalse", "expect s of false")
  623. return
  624. }
  625. c = iter.readByte()
  626. if c != 'e' {
  627. iter.ReportError("skipFalse", "expect e of false")
  628. return
  629. }
  630. }
  631. func (iter *Iterator) ReadNull() (ret bool) {
  632. c := iter.readByte()
  633. if c == 'n' {
  634. iter.skipNull()
  635. return true
  636. }
  637. iter.unreadByte()
  638. return false
  639. }
  640. func (iter *Iterator) skipNull() {
  641. c := iter.readByte()
  642. if c != 'u' {
  643. iter.ReportError("skipNull", "expect u of null")
  644. return
  645. }
  646. c = iter.readByte()
  647. if c != 'l' {
  648. iter.ReportError("skipNull", "expect l of null")
  649. return
  650. }
  651. c = iter.readByte()
  652. if c != 'l' {
  653. iter.ReportError("skipNull", "expect l of null")
  654. return
  655. }
  656. }
  657. func (iter *Iterator) Skip() {
  658. c := iter.readByte()
  659. if iter.Error != nil {
  660. return
  661. }
  662. switch c {
  663. case '"':
  664. iter.skipString()
  665. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  666. iter.skipNumber()
  667. case '[':
  668. iter.skipArray()
  669. case '{':
  670. iter.skipObject()
  671. case 't':
  672. iter.skipTrue()
  673. case 'f':
  674. iter.skipFalse()
  675. case 'n':
  676. iter.skipNull()
  677. default:
  678. iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
  679. return
  680. }
  681. }
  682. func (iter *Iterator) skipString() {
  683. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  684. switch c {
  685. case '"':
  686. return // end of string found
  687. case '\\':
  688. iter.readByte() // " after \\ does not count
  689. }
  690. }
  691. }
  692. func (iter *Iterator) skipNumber() {
  693. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  694. switch c {
  695. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  696. continue
  697. default:
  698. iter.unreadByte()
  699. return
  700. }
  701. }
  702. }
  703. func (iter *Iterator) skipArray() {
  704. for {
  705. iter.skipWhitespaces()
  706. c := iter.readByte()
  707. if iter.Error != nil {
  708. return
  709. }
  710. if c == ']' {
  711. return
  712. }
  713. iter.unreadByte()
  714. iter.Skip()
  715. iter.skipWhitespaces()
  716. c = iter.readByte()
  717. switch c {
  718. case ',':
  719. iter.skipWhitespaces()
  720. continue
  721. case ']':
  722. return
  723. default:
  724. iter.ReportError("skipArray", "expects , or ]")
  725. return
  726. }
  727. }
  728. }
  729. func (iter *Iterator) skipObject() {
  730. iter.skipWhitespaces()
  731. c := iter.readByte()
  732. if iter.Error != nil {
  733. return
  734. }
  735. if c == '}' {
  736. return // end of object
  737. } else {
  738. iter.unreadByte()
  739. }
  740. for {
  741. iter.skipWhitespaces()
  742. c := iter.readByte()
  743. if c != '"' {
  744. iter.ReportError("skipObject", `expects "`)
  745. return
  746. }
  747. iter.skipString()
  748. iter.skipWhitespaces()
  749. c = iter.readByte()
  750. if c != ':' {
  751. iter.ReportError("skipObject", `expects :`)
  752. return
  753. }
  754. iter.skipWhitespaces()
  755. iter.Skip()
  756. iter.skipWhitespaces()
  757. c = iter.readByte()
  758. switch c {
  759. case ',':
  760. iter.skipWhitespaces()
  761. continue
  762. case '}':
  763. return // end of object
  764. default:
  765. iter.ReportError("skipObject", "expects , or }")
  766. return
  767. }
  768. }
  769. }