jsoniter.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780
  1. package jsoniter
  2. import (
  3. "io"
  4. "fmt"
  5. "unicode/utf16"
  6. "strconv"
  7. "unsafe"
  8. )
  9. var digits []byte
  10. func init() {
  11. digits = make([]byte, 256)
  12. for i := 0; i < len(digits); i++ {
  13. digits[i] = 255
  14. }
  15. for i := '0'; i <= '9'; i++ {
  16. digits[i] = byte(i - '0');
  17. }
  18. for i := 'a'; i <= 'f'; i++ {
  19. digits[i] = byte((i - 'a') + 10);
  20. }
  21. for i := 'A'; i <= 'F'; i++ {
  22. digits[i] = byte((i - 'A') + 10);
  23. }
  24. }
  25. type Iterator struct {
  26. reader io.Reader
  27. buf []byte
  28. head int
  29. tail int
  30. Error error
  31. }
  32. func Parse(reader io.Reader, bufSize int) *Iterator {
  33. iter := &Iterator{
  34. reader: reader,
  35. buf: make([]byte, bufSize),
  36. head: 0,
  37. tail: 0,
  38. }
  39. iter.skipWhitespaces()
  40. return iter
  41. }
  42. func ParseBytes(input []byte) *Iterator {
  43. iter := &Iterator{
  44. reader: nil,
  45. buf: input,
  46. head: 0,
  47. tail: len(input),
  48. }
  49. iter.skipWhitespaces()
  50. return iter
  51. }
  52. func ParseString(input string) *Iterator {
  53. return ParseBytes([]byte(input))
  54. }
  55. func (iter *Iterator) skipWhitespaces() {
  56. c := iter.readByte()
  57. for c == ' ' || c == '\n' || c == '\t' {
  58. c = iter.readByte()
  59. }
  60. iter.unreadByte()
  61. }
  62. func (iter *Iterator) ReportError(operation string, msg string) {
  63. if iter.Error != nil {
  64. return
  65. }
  66. peekStart := iter.head - 10
  67. if peekStart < 0 {
  68. peekStart = 0
  69. }
  70. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  71. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  72. }
  73. func (iter *Iterator) CurrentBuffer() string {
  74. peekStart := iter.head - 10
  75. if peekStart < 0 {
  76. peekStart = 0
  77. }
  78. return fmt.Sprintf("parsing %v ...%s... at %s", iter.head,
  79. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  80. }
  81. func (iter *Iterator) readByte() (ret byte) {
  82. if iter.head == iter.tail {
  83. if iter.reader == nil {
  84. iter.Error = io.EOF
  85. return
  86. }
  87. for {
  88. n, err := iter.reader.Read(iter.buf)
  89. if n == 0 {
  90. if err != nil {
  91. iter.Error = err
  92. return
  93. } else {
  94. // n == 0, err == nil is not EOF
  95. continue
  96. }
  97. } else {
  98. iter.head = 0
  99. iter.tail = n
  100. break
  101. }
  102. }
  103. }
  104. ret = iter.buf[iter.head]
  105. iter.head++
  106. return ret
  107. }
  108. func (iter *Iterator) unreadByte() {
  109. if iter.head == 0 {
  110. iter.ReportError("unreadByte", "unread too many bytes")
  111. return
  112. }
  113. iter.head -= 1
  114. return
  115. }
  116. const maxUint64 = (1 << 64 - 1)
  117. const cutoffUint64 = maxUint64 / 10 + 1
  118. const maxUint32 = (1 << 32 - 1)
  119. const cutoffUint32 = maxUint32 / 10 + 1
  120. func (iter *Iterator) ReadUint() (ret uint) {
  121. val := iter.ReadUint64()
  122. converted := uint(val)
  123. if uint64(converted) != val {
  124. iter.ReportError("ReadUint", "int overflow")
  125. return
  126. }
  127. return converted
  128. }
  129. func (iter *Iterator) ReadUint8() (ret uint8) {
  130. val := iter.ReadUint64()
  131. converted := uint8(val)
  132. if uint64(converted) != val {
  133. iter.ReportError("ReadUint8", "int overflow")
  134. return
  135. }
  136. return converted
  137. }
  138. func (iter *Iterator) ReadUint16() (ret uint16) {
  139. val := iter.ReadUint64()
  140. converted := uint16(val)
  141. if uint64(converted) != val {
  142. iter.ReportError("ReadUint16", "int overflow")
  143. return
  144. }
  145. return converted
  146. }
  147. func (iter *Iterator) ReadUint32() (ret uint32) {
  148. val := iter.ReadUint64()
  149. converted := uint32(val)
  150. if uint64(converted) != val {
  151. iter.ReportError("ReadUint32", "int overflow")
  152. return
  153. }
  154. return converted
  155. }
  156. func (iter *Iterator) ReadUint64() (ret uint64) {
  157. c := iter.readByte()
  158. v := digits[c]
  159. if v == 0 {
  160. return 0 // single zero
  161. }
  162. if v == 255 {
  163. iter.ReportError("ReadUint64", "unexpected character")
  164. return
  165. }
  166. for {
  167. if ret >= cutoffUint64 {
  168. iter.ReportError("ReadUint64", "overflow")
  169. return
  170. }
  171. ret = ret * 10 + uint64(v)
  172. c = iter.readByte()
  173. v = digits[c]
  174. if v == 255 {
  175. iter.unreadByte()
  176. break
  177. }
  178. }
  179. return ret
  180. }
  181. func (iter *Iterator) ReadInt() (ret int) {
  182. val := iter.ReadInt64()
  183. converted := int(val)
  184. if int64(converted) != val {
  185. iter.ReportError("ReadInt", "int overflow")
  186. return
  187. }
  188. return converted
  189. }
  190. func (iter *Iterator) ReadInt8() (ret int8) {
  191. val := iter.ReadInt64()
  192. converted := int8(val)
  193. if int64(converted) != val {
  194. iter.ReportError("ReadInt8", "int overflow")
  195. return
  196. }
  197. return converted
  198. }
  199. func (iter *Iterator) ReadInt16() (ret int16) {
  200. val := iter.ReadInt64()
  201. converted := int16(val)
  202. if int64(converted) != val {
  203. iter.ReportError("ReadInt16", "int overflow")
  204. return
  205. }
  206. return converted
  207. }
  208. func (iter *Iterator) ReadInt32() (ret int32) {
  209. val := iter.ReadInt64()
  210. converted := int32(val)
  211. if int64(converted) != val {
  212. iter.ReportError("ReadInt32", "int overflow")
  213. return
  214. }
  215. return converted
  216. }
  217. func (iter *Iterator) ReadInt64() (ret int64) {
  218. c := iter.readByte()
  219. if iter.Error != nil {
  220. return
  221. }
  222. /* optional leading minus */
  223. if c == '-' {
  224. n := iter.ReadUint64()
  225. return -int64(n)
  226. } else {
  227. iter.unreadByte()
  228. n := iter.ReadUint64()
  229. return int64(n)
  230. }
  231. }
  232. func (iter *Iterator) ReadString() (ret string) {
  233. str := make([]byte, 0, 8)
  234. c := iter.readByte()
  235. if iter.Error != nil {
  236. return
  237. }
  238. switch c {
  239. case 'n':
  240. iter.skipNull()
  241. if iter.Error != nil {
  242. return
  243. }
  244. return ""
  245. case '"':
  246. // nothing
  247. default:
  248. iter.ReportError("ReadString", `expects " or n`)
  249. return
  250. }
  251. for {
  252. c = iter.readByte()
  253. if iter.Error != nil {
  254. return
  255. }
  256. switch c {
  257. case '\\':
  258. c = iter.readByte()
  259. if iter.Error != nil {
  260. return
  261. }
  262. switch c {
  263. case 'u':
  264. r := iter.readU4()
  265. if iter.Error != nil {
  266. return
  267. }
  268. if utf16.IsSurrogate(r) {
  269. c = iter.readByte()
  270. if iter.Error != nil {
  271. return
  272. }
  273. if c != '\\' {
  274. iter.ReportError("ReadString",
  275. `expects \u after utf16 surrogate, but \ not found`)
  276. return
  277. }
  278. c = iter.readByte()
  279. if iter.Error != nil {
  280. return
  281. }
  282. if c != 'u' {
  283. iter.ReportError("ReadString",
  284. `expects \u after utf16 surrogate, but \u not found`)
  285. return
  286. }
  287. r2 := iter.readU4()
  288. if iter.Error != nil {
  289. return
  290. }
  291. combined := utf16.DecodeRune(r, r2)
  292. str = appendRune(str, combined)
  293. } else {
  294. str = appendRune(str, r)
  295. }
  296. case '"':
  297. str = append(str, '"')
  298. case '\\':
  299. str = append(str, '\\')
  300. case '/':
  301. str = append(str, '/')
  302. case 'b':
  303. str = append(str, '\b')
  304. case 'f':
  305. str = append(str, '\f')
  306. case 'n':
  307. str = append(str, '\n')
  308. case 'r':
  309. str = append(str, '\r')
  310. case 't':
  311. str = append(str, '\t')
  312. default:
  313. iter.ReportError("ReadString",
  314. `invalid escape char after \`)
  315. return
  316. }
  317. case '"':
  318. return *(*string)(unsafe.Pointer(&str))
  319. default:
  320. str = append(str, c)
  321. }
  322. }
  323. }
  324. func (iter *Iterator) readU4() (ret rune) {
  325. for i := 0; i < 4; i++ {
  326. c := iter.readByte()
  327. if iter.Error != nil {
  328. return
  329. }
  330. if (c >= '0' && c <= '9') {
  331. if ret >= cutoffUint32 {
  332. iter.ReportError("readU4", "overflow")
  333. return
  334. }
  335. ret = ret * 16 + rune(c - '0')
  336. } else if ((c >= 'a' && c <= 'f') ) {
  337. if ret >= cutoffUint32 {
  338. iter.ReportError("readU4", "overflow")
  339. return
  340. }
  341. ret = ret * 16 + rune(c - 'a' + 10)
  342. } else {
  343. iter.ReportError("readU4", "expects 0~9 or a~f")
  344. return
  345. }
  346. }
  347. return ret
  348. }
  349. const (
  350. t1 = 0x00 // 0000 0000
  351. tx = 0x80 // 1000 0000
  352. t2 = 0xC0 // 1100 0000
  353. t3 = 0xE0 // 1110 0000
  354. t4 = 0xF0 // 1111 0000
  355. t5 = 0xF8 // 1111 1000
  356. maskx = 0x3F // 0011 1111
  357. mask2 = 0x1F // 0001 1111
  358. mask3 = 0x0F // 0000 1111
  359. mask4 = 0x07 // 0000 0111
  360. rune1Max = 1 << 7 - 1
  361. rune2Max = 1 << 11 - 1
  362. rune3Max = 1 << 16 - 1
  363. surrogateMin = 0xD800
  364. surrogateMax = 0xDFFF
  365. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  366. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  367. )
  368. func appendRune(p []byte, r rune) []byte {
  369. // Negative values are erroneous. Making it unsigned addresses the problem.
  370. switch i := uint32(r); {
  371. case i <= rune1Max:
  372. p = append(p, byte(r))
  373. return p
  374. case i <= rune2Max:
  375. p = append(p, t2 | byte(r >> 6))
  376. p = append(p, tx | byte(r) & maskx)
  377. return p
  378. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  379. r = RuneError
  380. fallthrough
  381. case i <= rune3Max:
  382. p = append(p, t3 | byte(r >> 12))
  383. p = append(p, tx | byte(r >> 6) & maskx)
  384. p = append(p, tx | byte(r) & maskx)
  385. return p
  386. default:
  387. p = append(p, t4 | byte(r >> 18))
  388. p = append(p, tx | byte(r >> 12) & maskx)
  389. p = append(p, tx | byte(r >> 6) & maskx)
  390. p = append(p, tx | byte(r) & maskx)
  391. return p
  392. }
  393. }
  394. func (iter *Iterator) ReadArray() (ret bool) {
  395. iter.skipWhitespaces()
  396. c := iter.readByte()
  397. if iter.Error != nil {
  398. return
  399. }
  400. switch c {
  401. case 'n': {
  402. iter.skipNull()
  403. if iter.Error != nil {
  404. return
  405. }
  406. return false // null
  407. }
  408. case '[': {
  409. iter.skipWhitespaces()
  410. c = iter.readByte()
  411. if iter.Error != nil {
  412. return
  413. }
  414. if c == ']' {
  415. return false
  416. } else {
  417. iter.unreadByte()
  418. return true
  419. }
  420. }
  421. case ']': return false
  422. case ',':
  423. iter.skipWhitespaces()
  424. return true
  425. default:
  426. iter.ReportError("ReadArray", "expect [ or , or ] or n")
  427. return
  428. }
  429. }
  430. func (iter *Iterator) ReadObject() (ret string) {
  431. iter.skipWhitespaces()
  432. c := iter.readByte()
  433. if iter.Error != nil {
  434. return
  435. }
  436. switch c {
  437. case 'n': {
  438. iter.skipNull()
  439. if iter.Error != nil {
  440. return
  441. }
  442. return "" // null
  443. }
  444. case '{': {
  445. iter.skipWhitespaces()
  446. c = iter.readByte()
  447. if iter.Error != nil {
  448. return
  449. }
  450. switch c {
  451. case '}':
  452. return "" // end of object
  453. case '"':
  454. iter.unreadByte()
  455. return iter.readObjectField()
  456. default:
  457. iter.ReportError("ReadObject", `expect " after {`)
  458. return
  459. }
  460. }
  461. case ',':
  462. iter.skipWhitespaces()
  463. return iter.readObjectField()
  464. case '}':
  465. return "" // end of object
  466. default:
  467. iter.ReportError("ReadObject", `expect { or , or } or n`)
  468. return
  469. }
  470. }
  471. func (iter *Iterator) readObjectField() (ret string) {
  472. field := iter.ReadString()
  473. if iter.Error != nil {
  474. return
  475. }
  476. iter.skipWhitespaces()
  477. c := iter.readByte()
  478. if iter.Error != nil {
  479. return
  480. }
  481. if c != ':' {
  482. iter.ReportError("ReadObject", "expect : after object field")
  483. return
  484. }
  485. iter.skipWhitespaces()
  486. return field
  487. }
  488. func (iter *Iterator) ReadFloat32() (ret float32) {
  489. str := make([]byte, 0, 4)
  490. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  491. switch c {
  492. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  493. str = append(str, c)
  494. continue
  495. default:
  496. iter.unreadByte()
  497. }
  498. break
  499. }
  500. if iter.Error != nil && iter.Error != io.EOF {
  501. return
  502. }
  503. val, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&str)), 32)
  504. if err != nil {
  505. iter.Error = err
  506. return
  507. }
  508. return float32(val)
  509. }
  510. func (iter *Iterator) ReadFloat64() (ret float64) {
  511. str := make([]byte, 0, 4)
  512. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  513. switch c {
  514. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  515. str = append(str, c)
  516. continue
  517. default:
  518. iter.unreadByte()
  519. }
  520. break
  521. }
  522. if iter.Error != nil && iter.Error != io.EOF {
  523. return
  524. }
  525. val, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&str)), 64)
  526. if err != nil {
  527. iter.Error = err
  528. return
  529. }
  530. return val
  531. }
  532. func (iter *Iterator) ReadBool() (ret bool) {
  533. c := iter.readByte()
  534. if iter.Error != nil {
  535. return
  536. }
  537. switch c {
  538. case 't':
  539. iter.skipTrue()
  540. if iter.Error != nil {
  541. return
  542. }
  543. return true
  544. case 'f':
  545. iter.skipFalse()
  546. if iter.Error != nil {
  547. return
  548. }
  549. return false
  550. default:
  551. iter.ReportError("ReadBool", "expect t or f")
  552. return
  553. }
  554. }
  555. func (iter *Iterator) skipTrue() {
  556. c := iter.readByte()
  557. if c != 'r' {
  558. iter.ReportError("skipTrue", "expect r of true")
  559. return
  560. }
  561. c = iter.readByte()
  562. if c != 'u' {
  563. iter.ReportError("skipTrue", "expect u of true")
  564. return
  565. }
  566. c = iter.readByte()
  567. if c != 'e' {
  568. iter.ReportError("skipTrue", "expect e of true")
  569. return
  570. }
  571. }
  572. func (iter *Iterator) skipFalse() {
  573. c := iter.readByte()
  574. if c != 'a' {
  575. iter.ReportError("skipFalse", "expect a of false")
  576. return
  577. }
  578. c = iter.readByte()
  579. if c != 'l' {
  580. iter.ReportError("skipFalse", "expect l of false")
  581. return
  582. }
  583. c = iter.readByte()
  584. if c != 's' {
  585. iter.ReportError("skipFalse", "expect s of false")
  586. return
  587. }
  588. c = iter.readByte()
  589. if c != 'e' {
  590. iter.ReportError("skipFalse", "expect e of false")
  591. return
  592. }
  593. }
  594. func (iter *Iterator) ReadNull() (ret bool) {
  595. c := iter.readByte()
  596. if c == 'n' {
  597. iter.skipNull()
  598. return true
  599. }
  600. iter.unreadByte()
  601. return false
  602. }
  603. func (iter *Iterator) skipNull() {
  604. c := iter.readByte()
  605. if c != 'u' {
  606. iter.ReportError("skipNull", "expect u of null")
  607. return
  608. }
  609. c = iter.readByte()
  610. if c != 'l' {
  611. iter.ReportError("skipNull", "expect l of null")
  612. return
  613. }
  614. c = iter.readByte()
  615. if c != 'l' {
  616. iter.ReportError("skipNull", "expect l of null")
  617. return
  618. }
  619. }
  620. func (iter *Iterator) Skip() {
  621. c := iter.readByte()
  622. if iter.Error != nil {
  623. return
  624. }
  625. switch c {
  626. case '"':
  627. iter.skipString()
  628. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  629. iter.skipNumber()
  630. case '[':
  631. iter.skipArray()
  632. case '{':
  633. iter.skipObject()
  634. case 't':
  635. iter.skipTrue()
  636. case 'f':
  637. iter.skipFalse()
  638. case 'n':
  639. iter.skipNull()
  640. default:
  641. iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
  642. return
  643. }
  644. }
  645. func (iter *Iterator) skipString() {
  646. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  647. switch c {
  648. case '"':
  649. return // end of string found
  650. case '\\':
  651. iter.readByte() // " after \\ does not count
  652. }
  653. }
  654. }
  655. func (iter *Iterator) skipNumber() {
  656. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  657. switch c {
  658. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  659. continue
  660. default:
  661. iter.unreadByte()
  662. return
  663. }
  664. }
  665. }
  666. func (iter *Iterator) skipArray() {
  667. for {
  668. iter.skipWhitespaces()
  669. c := iter.readByte()
  670. if iter.Error != nil {
  671. return
  672. }
  673. if c == ']' {
  674. return
  675. }
  676. iter.unreadByte()
  677. iter.Skip()
  678. iter.skipWhitespaces()
  679. c = iter.readByte()
  680. switch c {
  681. case ',':
  682. iter.skipWhitespaces()
  683. continue
  684. case ']':
  685. return
  686. default:
  687. iter.ReportError("skipArray", "expects , or ]")
  688. return
  689. }
  690. }
  691. }
  692. func (iter *Iterator) skipObject() {
  693. iter.skipWhitespaces()
  694. c := iter.readByte()
  695. if iter.Error != nil {
  696. return
  697. }
  698. if c == '}' {
  699. return // end of object
  700. } else {
  701. iter.unreadByte()
  702. }
  703. for {
  704. iter.skipWhitespaces()
  705. c := iter.readByte()
  706. if c != '"' {
  707. iter.ReportError("skipObject", `expects "`)
  708. return
  709. }
  710. iter.skipString()
  711. iter.skipWhitespaces()
  712. c = iter.readByte()
  713. if c != ':' {
  714. iter.ReportError("skipObject", `expects :`)
  715. return
  716. }
  717. iter.skipWhitespaces()
  718. iter.Skip()
  719. iter.skipWhitespaces()
  720. c = iter.readByte()
  721. switch c {
  722. case ',':
  723. iter.skipWhitespaces()
  724. continue
  725. case '}':
  726. return // end of object
  727. default:
  728. iter.ReportError("skipObject", "expects , or }")
  729. return
  730. }
  731. }
  732. }