jsoniter.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836
  1. package jsoniter
  2. import (
  3. "io"
  4. "fmt"
  5. "unicode/utf16"
  6. "strconv"
  7. "unsafe"
  8. )
  9. var digits []byte
  10. func init() {
  11. digits = make([]byte, 256)
  12. for i := 0; i < len(digits); i++ {
  13. digits[i] = 255
  14. }
  15. for i := '0'; i <= '9'; i++ {
  16. digits[i] = byte(i - '0');
  17. }
  18. for i := 'a'; i <= 'f'; i++ {
  19. digits[i] = byte((i - 'a') + 10);
  20. }
  21. for i := 'A'; i <= 'F'; i++ {
  22. digits[i] = byte((i - 'A') + 10);
  23. }
  24. }
  25. type Iterator struct {
  26. reader io.Reader
  27. buf []byte
  28. head int
  29. tail int
  30. Error error
  31. }
  32. func Parse(reader io.Reader, bufSize int) *Iterator {
  33. iter := &Iterator{
  34. reader: reader,
  35. buf: make([]byte, bufSize),
  36. head: 0,
  37. tail: 0,
  38. }
  39. iter.skipWhitespaces()
  40. return iter
  41. }
  42. func ParseBytes(input []byte) *Iterator {
  43. iter := &Iterator{
  44. reader: nil,
  45. buf: input,
  46. head: 0,
  47. tail: len(input),
  48. }
  49. iter.skipWhitespaces()
  50. return iter
  51. }
  52. func ParseString(input string) *Iterator {
  53. return ParseBytes([]byte(input))
  54. }
  55. func (iter *Iterator) skipWhitespaces() {
  56. c := iter.readByte()
  57. for c == ' ' || c == '\n' || c == '\t' {
  58. c = iter.readByte()
  59. }
  60. iter.unreadByte()
  61. }
  62. func (iter *Iterator) ReportError(operation string, msg string) {
  63. peekStart := iter.head - 10
  64. if peekStart < 0 {
  65. peekStart = 0
  66. }
  67. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  68. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  69. }
  70. func (iter *Iterator) CurrentBuffer() string {
  71. peekStart := iter.head - 10
  72. if peekStart < 0 {
  73. peekStart = 0
  74. }
  75. return fmt.Sprintf("parsing %v ...%s... at %s", iter.head,
  76. string(iter.buf[peekStart: iter.head]), string(iter.buf[0:iter.tail]))
  77. }
  78. func (iter *Iterator) readByte() (ret byte) {
  79. if iter.head == iter.tail {
  80. if iter.reader == nil {
  81. iter.Error = io.EOF
  82. return
  83. }
  84. for {
  85. n, err := iter.reader.Read(iter.buf)
  86. if n == 0 {
  87. if err != nil {
  88. iter.Error = err
  89. return
  90. } else {
  91. // n == 0, err == nil is not EOF
  92. continue
  93. }
  94. } else {
  95. iter.head = 0
  96. iter.tail = n
  97. break
  98. }
  99. }
  100. }
  101. ret = iter.buf[iter.head]
  102. iter.head++
  103. return ret
  104. }
  105. func (iter *Iterator) unreadByte() {
  106. if iter.head == 0 {
  107. iter.ReportError("unreadByte", "unread too many bytes")
  108. return
  109. }
  110. iter.head -= 1
  111. return
  112. }
  113. const maxUint64 = (1 << 64 - 1)
  114. const cutoffUint64 = maxUint64 / 10 + 1
  115. const maxUint32 = (1 << 32 - 1)
  116. const cutoffUint32 = maxUint32 / 10 + 1
  117. func (iter *Iterator) ReadUint() (ret uint) {
  118. val := iter.ReadUint64()
  119. converted := uint(val)
  120. if uint64(converted) != val {
  121. iter.ReportError("ReadUint", "int overflow")
  122. return
  123. }
  124. return converted
  125. }
  126. func (iter *Iterator) ReadUint8() (ret uint8) {
  127. val := iter.ReadUint64()
  128. converted := uint8(val)
  129. if uint64(converted) != val {
  130. iter.ReportError("ReadUint8", "int overflow")
  131. return
  132. }
  133. return converted
  134. }
  135. func (iter *Iterator) ReadUint16() (ret uint16) {
  136. val := iter.ReadUint64()
  137. converted := uint16(val)
  138. if uint64(converted) != val {
  139. iter.ReportError("ReadUint16", "int overflow")
  140. return
  141. }
  142. return converted
  143. }
  144. func (iter *Iterator) ReadUint32() (ret uint32) {
  145. val := iter.ReadUint64()
  146. converted := uint32(val)
  147. if uint64(converted) != val {
  148. iter.ReportError("ReadUint32", "int overflow")
  149. return
  150. }
  151. return converted
  152. }
  153. func (iter *Iterator) ReadUint64() (ret uint64) {
  154. c := iter.readByte()
  155. v := digits[c]
  156. if v == 0 {
  157. return 0 // single zero
  158. }
  159. if v == 255 {
  160. iter.ReportError("ReadUint64", "unexpected character")
  161. return
  162. }
  163. for {
  164. if ret >= cutoffUint64 {
  165. iter.ReportError("ReadUint64", "overflow")
  166. return
  167. }
  168. ret = ret * 10 + uint64(v)
  169. c = iter.readByte()
  170. v = digits[c]
  171. if v == 255 {
  172. iter.unreadByte()
  173. break
  174. }
  175. }
  176. return ret
  177. }
  178. func (iter *Iterator) ReadInt() (ret int) {
  179. val := iter.ReadInt64()
  180. converted := int(val)
  181. if int64(converted) != val {
  182. iter.ReportError("ReadInt", "int overflow")
  183. return
  184. }
  185. return converted
  186. }
  187. func (iter *Iterator) ReadInt8() (ret int8) {
  188. val := iter.ReadInt64()
  189. converted := int8(val)
  190. if int64(converted) != val {
  191. iter.ReportError("ReadInt8", "int overflow")
  192. return
  193. }
  194. return converted
  195. }
  196. func (iter *Iterator) ReadInt16() (ret int16) {
  197. val := iter.ReadInt64()
  198. converted := int16(val)
  199. if int64(converted) != val {
  200. iter.ReportError("ReadInt16", "int overflow")
  201. return
  202. }
  203. return converted
  204. }
  205. func (iter *Iterator) ReadInt32() (ret int32) {
  206. val := iter.ReadInt64()
  207. converted := int32(val)
  208. if int64(converted) != val {
  209. iter.ReportError("ReadInt32", "int overflow")
  210. return
  211. }
  212. return converted
  213. }
  214. func (iter *Iterator) ReadInt64() (ret int64) {
  215. c := iter.readByte()
  216. if iter.Error != nil {
  217. return
  218. }
  219. /* optional leading minus */
  220. if c == '-' {
  221. n := iter.ReadUint64()
  222. return -int64(n)
  223. } else {
  224. iter.unreadByte()
  225. n := iter.ReadUint64()
  226. return int64(n)
  227. }
  228. }
  229. func (iter *Iterator) ReadString() (ret string) {
  230. str := make([]byte, 0, 10)
  231. c := iter.readByte()
  232. if iter.Error != nil {
  233. return
  234. }
  235. switch c {
  236. case 'n':
  237. iter.skipNull()
  238. if iter.Error != nil {
  239. return
  240. }
  241. return ""
  242. case '"':
  243. // nothing
  244. default:
  245. iter.ReportError("ReadString", `expects " or n`)
  246. return
  247. }
  248. for {
  249. c = iter.readByte()
  250. if iter.Error != nil {
  251. return
  252. }
  253. switch c {
  254. case '\\':
  255. c = iter.readByte()
  256. if iter.Error != nil {
  257. return
  258. }
  259. switch c {
  260. case 'u':
  261. r := iter.readU4()
  262. if iter.Error != nil {
  263. return
  264. }
  265. if utf16.IsSurrogate(r) {
  266. c = iter.readByte()
  267. if iter.Error != nil {
  268. return
  269. }
  270. if c != '\\' {
  271. iter.ReportError("ReadString",
  272. `expects \u after utf16 surrogate, but \ not found`)
  273. return
  274. }
  275. c = iter.readByte()
  276. if iter.Error != nil {
  277. return
  278. }
  279. if c != 'u' {
  280. iter.ReportError("ReadString",
  281. `expects \u after utf16 surrogate, but \u not found`)
  282. return
  283. }
  284. r2 := iter.readU4()
  285. if iter.Error != nil {
  286. return
  287. }
  288. combined := utf16.DecodeRune(r, r2)
  289. str = appendRune(str, combined)
  290. } else {
  291. str = appendRune(str, r)
  292. }
  293. case '"':
  294. str = append(str, '"')
  295. case '\\':
  296. str = append(str, '\\')
  297. case '/':
  298. str = append(str, '/')
  299. case 'b':
  300. str = append(str, '\b')
  301. case 'f':
  302. str = append(str, '\f')
  303. case 'n':
  304. str = append(str, '\n')
  305. case 'r':
  306. str = append(str, '\r')
  307. case 't':
  308. str = append(str, '\t')
  309. default:
  310. iter.ReportError("ReadString",
  311. `invalid escape char after \`)
  312. return
  313. }
  314. case '"':
  315. return string(str)
  316. default:
  317. str = append(str, c)
  318. }
  319. }
  320. }
  321. func (iter *Iterator) readU4() (ret rune) {
  322. for i := 0; i < 4; i++ {
  323. c := iter.readByte()
  324. if iter.Error != nil {
  325. return
  326. }
  327. if (c >= '0' && c <= '9') {
  328. if ret >= cutoffUint32 {
  329. iter.ReportError("readU4", "overflow")
  330. return
  331. }
  332. ret = ret * 16 + rune(c - '0')
  333. } else if ((c >= 'a' && c <= 'f') ) {
  334. if ret >= cutoffUint32 {
  335. iter.ReportError("readU4", "overflow")
  336. return
  337. }
  338. ret = ret * 16 + rune(c - 'a' + 10)
  339. } else {
  340. iter.ReportError("readU4", "expects 0~9 or a~f")
  341. return
  342. }
  343. }
  344. return ret
  345. }
  346. const (
  347. t1 = 0x00 // 0000 0000
  348. tx = 0x80 // 1000 0000
  349. t2 = 0xC0 // 1100 0000
  350. t3 = 0xE0 // 1110 0000
  351. t4 = 0xF0 // 1111 0000
  352. t5 = 0xF8 // 1111 1000
  353. maskx = 0x3F // 0011 1111
  354. mask2 = 0x1F // 0001 1111
  355. mask3 = 0x0F // 0000 1111
  356. mask4 = 0x07 // 0000 0111
  357. rune1Max = 1 << 7 - 1
  358. rune2Max = 1 << 11 - 1
  359. rune3Max = 1 << 16 - 1
  360. surrogateMin = 0xD800
  361. surrogateMax = 0xDFFF
  362. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  363. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  364. )
  365. func appendRune(p []byte, r rune) []byte {
  366. // Negative values are erroneous. Making it unsigned addresses the problem.
  367. switch i := uint32(r); {
  368. case i <= rune1Max:
  369. p = append(p, byte(r))
  370. return p
  371. case i <= rune2Max:
  372. p = append(p, t2 | byte(r >> 6))
  373. p = append(p, tx | byte(r) & maskx)
  374. return p
  375. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  376. r = RuneError
  377. fallthrough
  378. case i <= rune3Max:
  379. p = append(p, t3 | byte(r >> 12))
  380. p = append(p, tx | byte(r >> 6) & maskx)
  381. p = append(p, tx | byte(r) & maskx)
  382. return p
  383. default:
  384. p = append(p, t4 | byte(r >> 18))
  385. p = append(p, tx | byte(r >> 12) & maskx)
  386. p = append(p, tx | byte(r >> 6) & maskx)
  387. p = append(p, tx | byte(r) & maskx)
  388. return p
  389. }
  390. }
  391. func (iter *Iterator) ReadArray() (ret bool) {
  392. iter.skipWhitespaces()
  393. c := iter.readByte()
  394. if iter.Error != nil {
  395. return
  396. }
  397. switch c {
  398. case 'n': {
  399. iter.skipNull()
  400. if iter.Error != nil {
  401. return
  402. }
  403. return false // null
  404. }
  405. case '[': {
  406. iter.skipWhitespaces()
  407. c = iter.readByte()
  408. if iter.Error != nil {
  409. return
  410. }
  411. if c == ']' {
  412. return false
  413. } else {
  414. iter.unreadByte()
  415. return true
  416. }
  417. }
  418. case ']': return false
  419. case ',':
  420. iter.skipWhitespaces()
  421. return true
  422. default:
  423. iter.ReportError("ReadArray", "expect [ or , or ] or n")
  424. return
  425. }
  426. }
  427. func (iter *Iterator) ReadObject() (ret string) {
  428. iter.skipWhitespaces()
  429. c := iter.readByte()
  430. if iter.Error != nil {
  431. return
  432. }
  433. switch c {
  434. case 'n': {
  435. iter.skipNull()
  436. if iter.Error != nil {
  437. return
  438. }
  439. return "" // null
  440. }
  441. case '{': {
  442. iter.skipWhitespaces()
  443. c = iter.readByte()
  444. if iter.Error != nil {
  445. return
  446. }
  447. switch c {
  448. case '}':
  449. return "" // end of object
  450. case '"':
  451. iter.unreadByte()
  452. field := iter.readObjectField()
  453. if iter.Error != nil {
  454. return
  455. }
  456. return field
  457. default:
  458. iter.ReportError("ReadObject", `expect " after {`)
  459. return
  460. }
  461. }
  462. case ',':
  463. iter.skipWhitespaces()
  464. field := iter.readObjectField()
  465. if iter.Error != nil {
  466. return
  467. }
  468. return field
  469. case '}':
  470. return "" // end of object
  471. default:
  472. iter.ReportError("ReadObject", `expect { or , or } or n`)
  473. return
  474. }
  475. }
  476. func (iter *Iterator) readObjectField() (ret string) {
  477. field := iter.ReadString()
  478. if iter.Error != nil {
  479. return
  480. }
  481. iter.skipWhitespaces()
  482. c := iter.readByte()
  483. if iter.Error != nil {
  484. return
  485. }
  486. if c != ':' {
  487. iter.ReportError("ReadObject", "expect : after object field")
  488. return
  489. }
  490. iter.skipWhitespaces()
  491. return field
  492. }
  493. func (iter *Iterator) ReadFloat32() (ret float32) {
  494. str := make([]byte, 0, 4)
  495. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  496. switch c {
  497. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  498. str = append(str, c)
  499. continue
  500. default:
  501. iter.unreadByte()
  502. }
  503. break
  504. }
  505. if iter.Error != nil && iter.Error != io.EOF {
  506. return
  507. }
  508. val, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&str)), 32)
  509. if err != nil {
  510. iter.Error = err
  511. return
  512. }
  513. return float32(val)
  514. }
  515. func (iter *Iterator) ReadFloat64() (ret float64) {
  516. str := make([]byte, 0, 4)
  517. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  518. switch c {
  519. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  520. str = append(str, c)
  521. continue
  522. default:
  523. iter.unreadByte()
  524. }
  525. break
  526. }
  527. if iter.Error != nil && iter.Error != io.EOF {
  528. return
  529. }
  530. val, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&str)), 64)
  531. if err != nil {
  532. iter.Error = err
  533. return
  534. }
  535. return val
  536. }
  537. func (iter *Iterator) ReadBool() (ret bool) {
  538. c := iter.readByte()
  539. if iter.Error != nil {
  540. return
  541. }
  542. switch c {
  543. case 't':
  544. iter.skipTrue()
  545. if iter.Error != nil {
  546. return
  547. }
  548. return true
  549. case 'f':
  550. iter.skipFalse()
  551. if iter.Error != nil {
  552. return
  553. }
  554. return false
  555. default:
  556. iter.ReportError("ReadBool", "expect t or f")
  557. return
  558. }
  559. }
  560. func (iter *Iterator) skipTrue() {
  561. c := iter.readByte()
  562. if iter.Error != nil {
  563. return
  564. }
  565. if c != 'r' {
  566. iter.ReportError("skipTrue", "expect r of true")
  567. return
  568. }
  569. c = iter.readByte()
  570. if iter.Error != nil {
  571. return
  572. }
  573. if c != 'u' {
  574. iter.ReportError("skipTrue", "expect u of true")
  575. return
  576. }
  577. c = iter.readByte()
  578. if iter.Error != nil {
  579. return
  580. }
  581. if c != 'e' {
  582. iter.ReportError("skipTrue", "expect e of true")
  583. return
  584. }
  585. }
  586. func (iter *Iterator) skipFalse() {
  587. c := iter.readByte()
  588. if iter.Error != nil {
  589. return
  590. }
  591. if c != 'a' {
  592. iter.ReportError("skipFalse", "expect a of false")
  593. return
  594. }
  595. c = iter.readByte()
  596. if iter.Error != nil {
  597. return
  598. }
  599. if c != 'l' {
  600. iter.ReportError("skipFalse", "expect l of false")
  601. return
  602. }
  603. c = iter.readByte()
  604. if iter.Error != nil {
  605. return
  606. }
  607. if c != 's' {
  608. iter.ReportError("skipFalse", "expect s of false")
  609. return
  610. }
  611. c = iter.readByte()
  612. if iter.Error != nil {
  613. return
  614. }
  615. if c != 'e' {
  616. iter.ReportError("skipFalse", "expect e of false")
  617. return
  618. }
  619. }
  620. func (iter *Iterator) ReadNull() (ret bool) {
  621. c := iter.readByte()
  622. if iter.Error != nil {
  623. return
  624. }
  625. if c == 'n' {
  626. iter.skipNull()
  627. if iter.Error != nil {
  628. return
  629. }
  630. return true
  631. }
  632. iter.unreadByte()
  633. return false
  634. }
  635. func (iter *Iterator) skipNull() {
  636. c := iter.readByte()
  637. if iter.Error != nil {
  638. return
  639. }
  640. if c != 'u' {
  641. iter.ReportError("skipNull", "expect u of null")
  642. return
  643. }
  644. c = iter.readByte()
  645. if iter.Error != nil {
  646. return
  647. }
  648. if c != 'l' {
  649. iter.ReportError("skipNull", "expect l of null")
  650. return
  651. }
  652. c = iter.readByte()
  653. if iter.Error != nil {
  654. return
  655. }
  656. if c != 'l' {
  657. iter.ReportError("skipNull", "expect l of null")
  658. return
  659. }
  660. }
  661. func (iter *Iterator) Skip() {
  662. c := iter.readByte()
  663. if iter.Error != nil {
  664. return
  665. }
  666. switch c {
  667. case '"':
  668. iter.skipString()
  669. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  670. iter.skipNumber()
  671. case '[':
  672. iter.skipArray()
  673. case '{':
  674. iter.skipObject()
  675. case 't':
  676. iter.skipTrue()
  677. case 'f':
  678. iter.skipFalse()
  679. case 'n':
  680. iter.skipNull()
  681. default:
  682. iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
  683. return
  684. }
  685. }
  686. func (iter *Iterator) skipString() {
  687. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  688. switch c {
  689. case '"':
  690. return // end of string found
  691. case '\\':
  692. iter.readByte() // " after \\ does not count
  693. if iter.Error != nil {
  694. return
  695. }
  696. }
  697. }
  698. }
  699. func (iter *Iterator) skipNumber() {
  700. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  701. switch c {
  702. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  703. continue
  704. default:
  705. iter.unreadByte()
  706. return
  707. }
  708. }
  709. }
  710. func (iter *Iterator) skipArray() {
  711. for {
  712. iter.skipWhitespaces()
  713. c := iter.readByte()
  714. if iter.Error != nil {
  715. return
  716. }
  717. if c == ']' {
  718. return
  719. }
  720. iter.unreadByte()
  721. iter.Skip()
  722. iter.skipWhitespaces()
  723. c = iter.readByte()
  724. if iter.Error != nil {
  725. return
  726. }
  727. switch c {
  728. case ',':
  729. iter.skipWhitespaces()
  730. continue
  731. case ']':
  732. return
  733. default:
  734. iter.ReportError("skipArray", "expects , or ]")
  735. return
  736. }
  737. }
  738. }
  739. func (iter *Iterator) skipObject() {
  740. iter.skipWhitespaces()
  741. c := iter.readByte()
  742. if iter.Error != nil {
  743. return
  744. }
  745. if c == '}' {
  746. return // end of object
  747. } else {
  748. iter.unreadByte()
  749. }
  750. for {
  751. iter.skipWhitespaces()
  752. c := iter.readByte()
  753. if iter.Error != nil {
  754. return
  755. }
  756. if c != '"' {
  757. iter.ReportError("skipObject", `expects "`)
  758. return
  759. }
  760. iter.skipString()
  761. iter.skipWhitespaces()
  762. c = iter.readByte()
  763. if iter.Error != nil {
  764. return
  765. }
  766. if c != ':' {
  767. iter.ReportError("skipObject", `expects :`)
  768. return
  769. }
  770. iter.skipWhitespaces()
  771. iter.Skip()
  772. iter.skipWhitespaces()
  773. c = iter.readByte()
  774. if iter.Error != nil {
  775. return
  776. }
  777. switch c {
  778. case ',':
  779. iter.skipWhitespaces()
  780. continue
  781. case '}':
  782. return // end of object
  783. default:
  784. iter.ReportError("skipObject", "expects , or }")
  785. return
  786. }
  787. }
  788. }