jsoniter.go 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. package jsoniter
  2. import (
  3. "io"
  4. "fmt"
  5. "unicode/utf16"
  6. "strconv"
  7. )
  8. type Iterator struct {
  9. reader io.Reader
  10. buf []byte
  11. head int
  12. tail int
  13. Error error
  14. }
  15. func Parse(reader io.Reader, bufSize int) *Iterator {
  16. iter := &Iterator{
  17. reader: reader,
  18. buf: make([]byte, bufSize),
  19. head: 0,
  20. tail: 0,
  21. }
  22. iter.skipWhitespaces()
  23. return iter
  24. }
  25. func ParseBytes(input []byte) *Iterator {
  26. iter := &Iterator{
  27. reader: nil,
  28. buf: input,
  29. head: 0,
  30. tail: len(input),
  31. }
  32. iter.skipWhitespaces()
  33. return iter
  34. }
  35. func ParseString(input string) *Iterator {
  36. return ParseBytes([]byte(input))
  37. }
  38. func (iter *Iterator) skipWhitespaces() {
  39. c := iter.readByte()
  40. for c == ' ' || c == '\n' {
  41. c = iter.readByte()
  42. }
  43. iter.unreadByte()
  44. }
  45. func (iter *Iterator) ReportError(operation string, msg string) {
  46. iter.Error = fmt.Errorf("%s: %s, parsing %v at %s", operation, msg, iter.head, string(iter.buf[0:iter.tail]))
  47. }
  48. func (iter *Iterator) readByte() (ret byte) {
  49. if iter.head == iter.tail {
  50. if iter.reader == nil {
  51. iter.Error = io.EOF
  52. return
  53. }
  54. n, err := iter.reader.Read(iter.buf)
  55. if err != nil {
  56. iter.Error = err
  57. return
  58. }
  59. if n == 0 {
  60. iter.Error = io.EOF
  61. return
  62. }
  63. iter.head = 0
  64. iter.tail = n
  65. }
  66. ret = iter.buf[iter.head]
  67. iter.head += 1
  68. return ret
  69. }
  70. func (iter *Iterator) unreadByte() {
  71. if iter.head == 0 {
  72. iter.ReportError("unreadByte", "unread too many bytes")
  73. return
  74. }
  75. iter.head -= 1
  76. return
  77. }
  78. const maxUint64 = (1 << 64 - 1)
  79. const cutoffUint64 = maxUint64 / 10 + 1
  80. const maxUint32 = (1 << 32 - 1)
  81. const cutoffUint32 = maxUint32 / 10 + 1
  82. func (iter *Iterator) ReadUint64() (ret uint64) {
  83. c := iter.readByte()
  84. if iter.Error != nil {
  85. return
  86. }
  87. /* a single zero, or a series of integers */
  88. if c == '0' {
  89. return 0
  90. } else if c >= '1' && c <= '9' {
  91. for c >= '0' && c <= '9' {
  92. var v byte
  93. v = c - '0'
  94. if ret >= cutoffUint64 {
  95. iter.ReportError("ReadUint64", "overflow")
  96. return
  97. }
  98. ret = ret * uint64(10) + uint64(v)
  99. c = iter.readByte()
  100. if iter.Error != nil {
  101. if iter.Error == io.EOF {
  102. break
  103. } else {
  104. return 0
  105. }
  106. }
  107. }
  108. if iter.Error != io.EOF {
  109. iter.unreadByte()
  110. }
  111. } else {
  112. iter.ReportError("ReadUint64", "expects 0~9")
  113. return
  114. }
  115. return ret
  116. }
  117. func (iter *Iterator) ReadInt64() (ret int64) {
  118. c := iter.readByte()
  119. if iter.Error != nil {
  120. return
  121. }
  122. /* optional leading minus */
  123. if c == '-' {
  124. n := iter.ReadUint64()
  125. return -int64(n)
  126. } else {
  127. iter.unreadByte()
  128. n := iter.ReadUint64()
  129. return int64(n)
  130. }
  131. }
  132. func (iter *Iterator) ReadString() (ret string) {
  133. str := make([]byte, 0, 10)
  134. c := iter.readByte()
  135. if iter.Error != nil {
  136. return
  137. }
  138. if c != '"' {
  139. iter.ReportError("ReadString", "expects quote")
  140. return
  141. }
  142. for {
  143. c = iter.readByte()
  144. if iter.Error != nil {
  145. return
  146. }
  147. switch c {
  148. case '\\':
  149. c = iter.readByte()
  150. if iter.Error != nil {
  151. return
  152. }
  153. switch c {
  154. case 'u':
  155. r := iter.readU4()
  156. if iter.Error != nil {
  157. return
  158. }
  159. if utf16.IsSurrogate(r) {
  160. c = iter.readByte()
  161. if iter.Error != nil {
  162. return
  163. }
  164. if c != '\\' {
  165. iter.ReportError("ReadString",
  166. `expects \u after utf16 surrogate, but \ not found`)
  167. return
  168. }
  169. c = iter.readByte()
  170. if iter.Error != nil {
  171. return
  172. }
  173. if c != 'u' {
  174. iter.ReportError("ReadString",
  175. `expects \u after utf16 surrogate, but \u not found`)
  176. return
  177. }
  178. r2 := iter.readU4()
  179. if iter.Error != nil {
  180. return
  181. }
  182. combined := utf16.DecodeRune(r, r2)
  183. str = appendRune(str, combined)
  184. } else {
  185. str = appendRune(str, r)
  186. }
  187. case '"':
  188. str = append(str, '"')
  189. case '\\':
  190. str = append(str, '\\')
  191. case '/':
  192. str = append(str, '/')
  193. case 'b':
  194. str = append(str, '\b')
  195. case 'f':
  196. str = append(str, '\f')
  197. case 'n':
  198. str = append(str, '\n')
  199. case 'r':
  200. str = append(str, '\r')
  201. case 't':
  202. str = append(str, '\t')
  203. default:
  204. iter.ReportError("ReadString",
  205. `invalid escape char after \`)
  206. return
  207. }
  208. case '"':
  209. return string(str)
  210. default:
  211. str = append(str, c)
  212. }
  213. }
  214. }
  215. func (iter *Iterator) readU4() (ret rune) {
  216. for i := 0; i < 4; i++ {
  217. c := iter.readByte()
  218. if iter.Error != nil {
  219. return
  220. }
  221. if (c >= '0' && c <= '9') {
  222. if ret >= cutoffUint32 {
  223. iter.ReportError("readU4", "overflow")
  224. return
  225. }
  226. ret = ret * 16 + rune(c - '0')
  227. } else if ((c >= 'a' && c <= 'f') ) {
  228. if ret >= cutoffUint32 {
  229. iter.ReportError("readU4", "overflow")
  230. return
  231. }
  232. ret = ret * 16 + rune(c - 'a' + 10)
  233. } else {
  234. iter.ReportError("readU4", "expects 0~9 or a~f")
  235. return
  236. }
  237. }
  238. return ret
  239. }
  240. const (
  241. t1 = 0x00 // 0000 0000
  242. tx = 0x80 // 1000 0000
  243. t2 = 0xC0 // 1100 0000
  244. t3 = 0xE0 // 1110 0000
  245. t4 = 0xF0 // 1111 0000
  246. t5 = 0xF8 // 1111 1000
  247. maskx = 0x3F // 0011 1111
  248. mask2 = 0x1F // 0001 1111
  249. mask3 = 0x0F // 0000 1111
  250. mask4 = 0x07 // 0000 0111
  251. rune1Max = 1 << 7 - 1
  252. rune2Max = 1 << 11 - 1
  253. rune3Max = 1 << 16 - 1
  254. surrogateMin = 0xD800
  255. surrogateMax = 0xDFFF
  256. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  257. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  258. )
  259. func appendRune(p []byte, r rune) []byte {
  260. // Negative values are erroneous. Making it unsigned addresses the problem.
  261. switch i := uint32(r); {
  262. case i <= rune1Max:
  263. p = append(p, byte(r))
  264. return p
  265. case i <= rune2Max:
  266. p = append(p, t2 | byte(r >> 6))
  267. p = append(p, tx | byte(r) & maskx)
  268. return p
  269. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  270. r = RuneError
  271. fallthrough
  272. case i <= rune3Max:
  273. p = append(p, t3 | byte(r >> 12))
  274. p = append(p, tx | byte(r >> 6) & maskx)
  275. p = append(p, tx | byte(r) & maskx)
  276. return p
  277. default:
  278. p = append(p, t4 | byte(r >> 18))
  279. p = append(p, tx | byte(r >> 12) & maskx)
  280. p = append(p, tx | byte(r >> 6) & maskx)
  281. p = append(p, tx | byte(r) & maskx)
  282. return p
  283. }
  284. }
  285. func (iter *Iterator) ReadArray() (ret bool) {
  286. iter.skipWhitespaces()
  287. c := iter.readByte()
  288. if iter.Error != nil {
  289. return
  290. }
  291. switch c {
  292. case '[': {
  293. iter.skipWhitespaces()
  294. c = iter.readByte()
  295. if iter.Error != nil {
  296. return
  297. }
  298. if c == ']' {
  299. return false
  300. } else {
  301. iter.unreadByte()
  302. return true
  303. }
  304. }
  305. case ']': return false
  306. case ',':
  307. iter.skipWhitespaces()
  308. return true
  309. default:
  310. iter.ReportError("ReadArray", "expect [ or , or ]")
  311. return
  312. }
  313. }
  314. func (iter *Iterator) ReadObject() (ret string) {
  315. iter.skipWhitespaces()
  316. c := iter.readByte()
  317. if iter.Error != nil {
  318. return
  319. }
  320. switch c {
  321. case '{': {
  322. iter.skipWhitespaces()
  323. c = iter.readByte()
  324. if iter.Error != nil {
  325. return
  326. }
  327. switch c {
  328. case '}':
  329. return "" // end of object
  330. case '"':
  331. iter.unreadByte()
  332. field := iter.readObjectField()
  333. if iter.Error != nil {
  334. return
  335. }
  336. return field
  337. default:
  338. iter.ReportError("ReadObject", `expect " after {`)
  339. return
  340. }
  341. }
  342. case ',':
  343. iter.skipWhitespaces()
  344. field := iter.readObjectField()
  345. if iter.Error != nil {
  346. return
  347. }
  348. return field
  349. case '}':
  350. return "" // end of object
  351. default:
  352. iter.ReportError("ReadObject", `expect { or , or }`)
  353. return
  354. }
  355. }
  356. func (iter *Iterator) readObjectField() (ret string) {
  357. field := iter.ReadString()
  358. if iter.Error != nil {
  359. return
  360. }
  361. iter.skipWhitespaces()
  362. c := iter.readByte()
  363. if iter.Error != nil {
  364. return
  365. }
  366. if c != ':' {
  367. iter.ReportError("ReadObject", "expect : after object field")
  368. return
  369. }
  370. iter.skipWhitespaces()
  371. return field
  372. }
  373. func (iter *Iterator) ReadFloat64() (ret float64) {
  374. str := make([]byte, 0, 10)
  375. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  376. switch c {
  377. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  378. str = append(str, c)
  379. default:
  380. iter.unreadByte()
  381. val, err := strconv.ParseFloat(string(str), 64)
  382. if err != nil {
  383. iter.Error = err
  384. return
  385. }
  386. return val
  387. }
  388. }
  389. if iter.Error == io.EOF {
  390. val, err := strconv.ParseFloat(string(str), 64)
  391. if err != nil {
  392. iter.Error = err
  393. return
  394. }
  395. return val
  396. }
  397. return
  398. }
  399. func (iter *Iterator) Skip() {
  400. c := iter.readByte()
  401. if iter.Error != nil {
  402. return
  403. }
  404. switch c {
  405. case '"':
  406. iter.skipString()
  407. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  408. iter.skipNumber()
  409. case '[':
  410. iter.skipArray()
  411. case '{':
  412. iter.skipObject()
  413. default:
  414. iter.ReportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
  415. return
  416. }
  417. }
  418. func (iter *Iterator) skipString() {
  419. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  420. switch c {
  421. case '"':
  422. return // end of string found
  423. case '\\':
  424. iter.readByte() // " after \\ does not count
  425. if iter.Error != nil {
  426. return
  427. }
  428. }
  429. }
  430. }
  431. func (iter *Iterator) skipNumber() {
  432. for c := iter.readByte(); iter.Error == nil; c = iter.readByte() {
  433. switch c {
  434. case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  435. continue
  436. default:
  437. iter.unreadByte()
  438. return
  439. }
  440. }
  441. }
  442. func (iter *Iterator) skipArray() {
  443. for {
  444. iter.skipWhitespaces()
  445. c := iter.readByte()
  446. if iter.Error != nil {
  447. return
  448. }
  449. if c == ']' {
  450. return
  451. }
  452. iter.unreadByte()
  453. iter.Skip()
  454. iter.skipWhitespaces()
  455. c = iter.readByte()
  456. if iter.Error != nil {
  457. return
  458. }
  459. switch c {
  460. case ',':
  461. iter.skipWhitespaces()
  462. continue
  463. case ']':
  464. return
  465. default:
  466. iter.ReportError("skipArray", "expects , or ]")
  467. return
  468. }
  469. }
  470. }
  471. func (iter *Iterator) skipObject() {
  472. for {
  473. iter.skipWhitespaces()
  474. c := iter.readByte()
  475. if c != '"' {
  476. iter.ReportError("skipObject", `expects "`)
  477. return
  478. }
  479. iter.skipString()
  480. iter.skipWhitespaces()
  481. c = iter.readByte()
  482. if iter.Error != nil {
  483. return
  484. }
  485. if c != ':' {
  486. iter.ReportError("skipObject", `expects :`)
  487. return
  488. }
  489. iter.skipWhitespaces()
  490. iter.Skip()
  491. iter.skipWhitespaces()
  492. c = iter.readByte()
  493. if iter.Error != nil {
  494. return
  495. }
  496. switch c {
  497. case ',':
  498. iter.skipWhitespaces()
  499. continue
  500. case '}':
  501. return // end of object
  502. default:
  503. iter.ReportError("skipObject", "expects , or }")
  504. return
  505. }
  506. }
  507. }