iterator.go 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. package jsoniter
  2. import (
  3. "encoding/base64"
  4. "fmt"
  5. "io"
  6. )
  7. type ValueType int
  8. const (
  9. Invalid ValueType = iota
  10. String
  11. Number
  12. Null
  13. Bool
  14. Array
  15. Object
  16. )
  17. var hexDigits []byte
  18. var valueTypes []ValueType
  19. func init() {
  20. hexDigits = make([]byte, 256)
  21. for i := 0; i < len(hexDigits); i++ {
  22. hexDigits[i] = 255
  23. }
  24. for i := '0'; i <= '9'; i++ {
  25. hexDigits[i] = byte(i - '0')
  26. }
  27. for i := 'a'; i <= 'f'; i++ {
  28. hexDigits[i] = byte((i - 'a') + 10)
  29. }
  30. for i := 'A'; i <= 'F'; i++ {
  31. hexDigits[i] = byte((i - 'A') + 10)
  32. }
  33. valueTypes = make([]ValueType, 256)
  34. for i := 0; i < len(valueTypes); i++ {
  35. valueTypes[i] = Invalid
  36. }
  37. valueTypes['"'] = String
  38. valueTypes['-'] = Number
  39. valueTypes['0'] = Number
  40. valueTypes['1'] = Number
  41. valueTypes['2'] = Number
  42. valueTypes['3'] = Number
  43. valueTypes['4'] = Number
  44. valueTypes['5'] = Number
  45. valueTypes['6'] = Number
  46. valueTypes['7'] = Number
  47. valueTypes['8'] = Number
  48. valueTypes['9'] = Number
  49. valueTypes['t'] = Bool
  50. valueTypes['f'] = Bool
  51. valueTypes['n'] = Null
  52. valueTypes['['] = Array
  53. valueTypes['{'] = Object
  54. }
  55. // Iterator is a fast and flexible JSON parser
  56. type Iterator struct {
  57. reader io.Reader
  58. buf []byte
  59. head int
  60. tail int
  61. Error error
  62. }
  63. // Create creates an empty Iterator instance
  64. func NewIterator() *Iterator {
  65. return &Iterator{
  66. reader: nil,
  67. buf: nil,
  68. head: 0,
  69. tail: 0,
  70. }
  71. }
  72. // Parse parses a json buffer in io.Reader into an Iterator instance
  73. func Parse(reader io.Reader, bufSize int) *Iterator {
  74. return &Iterator{
  75. reader: reader,
  76. buf: make([]byte, bufSize),
  77. head: 0,
  78. tail: 0,
  79. }
  80. }
  81. // ParseBytes parses a json byte slice into an Iterator instance
  82. func ParseBytes(input []byte) *Iterator {
  83. return &Iterator{
  84. reader: nil,
  85. buf: input,
  86. head: 0,
  87. tail: len(input),
  88. }
  89. }
  90. // ParseString parses a json string into an Iterator instance
  91. func ParseString(input string) *Iterator {
  92. return ParseBytes([]byte(input))
  93. }
  94. // Reset can reset an Iterator instance for another json buffer in io.Reader
  95. func (iter *Iterator) Reset(reader io.Reader) *Iterator {
  96. iter.reader = reader
  97. iter.head = 0
  98. iter.tail = 0
  99. return iter
  100. }
  101. // ResetBytes can reset an Iterator instance for another json byte slice
  102. func (iter *Iterator) ResetBytes(input []byte) *Iterator {
  103. iter.reader = nil
  104. iter.Error = nil
  105. iter.buf = input
  106. iter.head = 0
  107. iter.tail = len(input)
  108. return iter
  109. }
  110. // WhatIsNext gets ValueType of relatively next json object
  111. func (iter *Iterator) WhatIsNext() ValueType {
  112. valueType := valueTypes[iter.nextToken()]
  113. iter.unreadByte()
  114. return valueType
  115. }
  116. func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
  117. for i := iter.head; i < iter.tail; i++ {
  118. c := iter.buf[i]
  119. switch c {
  120. case ' ', '\n', '\t', '\r':
  121. continue
  122. }
  123. iter.head = i
  124. return false
  125. }
  126. return true
  127. }
  128. func (iter *Iterator) nextToken() byte {
  129. // a variation of skip whitespaces, returning the next non-whitespace token
  130. for {
  131. for i := iter.head; i < iter.tail; i++ {
  132. c := iter.buf[i]
  133. switch c {
  134. case ' ', '\n', '\t', '\r':
  135. continue
  136. }
  137. iter.head = i + 1
  138. return c
  139. }
  140. if !iter.loadMore() {
  141. return 0
  142. }
  143. }
  144. }
  145. func (iter *Iterator) reportError(operation string, msg string) {
  146. if iter.Error != nil {
  147. return
  148. }
  149. peekStart := iter.head - 10
  150. if peekStart < 0 {
  151. peekStart = 0
  152. }
  153. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  154. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  155. }
  156. // CurrentBuffer gets current buffer as string
  157. func (iter *Iterator) CurrentBuffer() string {
  158. peekStart := iter.head - 10
  159. if peekStart < 0 {
  160. peekStart = 0
  161. }
  162. return fmt.Sprintf("parsing %v ...|%s|... at %s", iter.head,
  163. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  164. }
  165. func (iter *Iterator) readByte() (ret byte) {
  166. if iter.head == iter.tail {
  167. if iter.loadMore() {
  168. ret = iter.buf[iter.head]
  169. iter.head++
  170. return ret
  171. }
  172. return 0
  173. }
  174. ret = iter.buf[iter.head]
  175. iter.head++
  176. return ret
  177. }
  178. func (iter *Iterator) loadMore() bool {
  179. if iter.reader == nil {
  180. iter.Error = io.EOF
  181. return false
  182. }
  183. for {
  184. n, err := iter.reader.Read(iter.buf)
  185. if n == 0 {
  186. if err != nil {
  187. iter.Error = err
  188. return false
  189. }
  190. } else {
  191. iter.head = 0
  192. iter.tail = n
  193. return true
  194. }
  195. }
  196. }
  197. func (iter *Iterator) unreadByte() {
  198. if iter.head == 0 {
  199. iter.reportError("unreadByte", "unread too many bytes")
  200. return
  201. }
  202. iter.head--
  203. return
  204. }
  205. // ReadArray reads a json object as Array
  206. func (iter *Iterator) ReadArray() (ret bool) {
  207. c := iter.nextToken()
  208. if iter.Error != nil {
  209. return
  210. }
  211. switch c {
  212. case 'n':
  213. iter.skipUntilBreak()
  214. return false // null
  215. case '[':
  216. c = iter.nextToken()
  217. if iter.Error != nil {
  218. return
  219. }
  220. if c == ']' {
  221. return false
  222. }
  223. iter.unreadByte()
  224. return true
  225. case ']':
  226. return false
  227. case ',':
  228. return true
  229. default:
  230. iter.reportError("ReadArray", "expect [ or , or ] or n, but found: " + string([]byte{c}))
  231. return
  232. }
  233. }
  234. // ReadBool reads a json object as Bool
  235. func (iter *Iterator) ReadBool() (ret bool) {
  236. c := iter.nextToken()
  237. if iter.Error != nil {
  238. return
  239. }
  240. switch c {
  241. case 't':
  242. iter.skipUntilBreak()
  243. return true
  244. case 'f':
  245. iter.skipUntilBreak()
  246. return false
  247. default:
  248. iter.reportError("ReadBool", "expect t or f")
  249. return
  250. }
  251. }
  252. // ReadBase64 reads a json object as Base64 in byte slice
  253. func (iter *Iterator) ReadBase64() (ret []byte) {
  254. src := iter.ReadStringAsSlice()
  255. if iter.Error != nil {
  256. return
  257. }
  258. b64 := base64.StdEncoding
  259. ret = make([]byte, b64.DecodedLen(len(src)))
  260. n, err := b64.Decode(ret, src)
  261. if err != nil {
  262. iter.Error = err
  263. return
  264. }
  265. return ret[:n]
  266. }
  267. // ReadNil reads a json object as nil and
  268. // returns whether it's a nil or not
  269. func (iter *Iterator) ReadNil() (ret bool) {
  270. c := iter.nextToken()
  271. if c == 'n' {
  272. iter.skipUntilBreak()
  273. return true
  274. }
  275. iter.unreadByte()
  276. return false
  277. }
  278. // Skip skips a json object and positions to relatively the next json object
  279. func (iter *Iterator) Skip() {
  280. c := iter.nextToken()
  281. switch c {
  282. case '"':
  283. iter.skipString()
  284. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 't', 'f', 'n':
  285. iter.skipUntilBreak()
  286. case '[':
  287. iter.skipArray()
  288. case '{':
  289. iter.skipObject()
  290. default:
  291. iter.reportError("Skip", fmt.Sprintf("do not know how to skip: %v", c))
  292. return
  293. }
  294. }
  295. func (iter *Iterator) skipString() {
  296. for {
  297. end, escaped := iter.findStringEnd()
  298. if end == -1 {
  299. if !iter.loadMore() {
  300. return
  301. }
  302. if escaped {
  303. iter.head = 1 // skip the first char as last char read is \
  304. }
  305. } else {
  306. iter.head = end
  307. return
  308. }
  309. }
  310. }
  311. // adapted from: https://github.com/buger/jsonparser/blob/master/parser.go
  312. // Tries to find the end of string
  313. // Support if string contains escaped quote symbols.
  314. func (iter *Iterator) findStringEnd() (int, bool) {
  315. escaped := false
  316. for i := iter.head; i < iter.tail; i++ {
  317. c := iter.buf[i]
  318. if c == '"' {
  319. if !escaped {
  320. return i + 1, false
  321. }
  322. j := i - 1
  323. for {
  324. if j < iter.head || iter.buf[j] != '\\' {
  325. // even number of backslashes
  326. // either end of buffer, or " found
  327. return i + 1, true
  328. }
  329. j--
  330. if j < iter.head || iter.buf[j] != '\\' {
  331. // odd number of backslashes
  332. // it is \" or \\\"
  333. break
  334. }
  335. j--
  336. }
  337. } else if c == '\\' {
  338. escaped = true
  339. }
  340. }
  341. j := iter.tail - 1
  342. for {
  343. if j < iter.head || iter.buf[j] != '\\' {
  344. // even number of backslashes
  345. // either end of buffer, or " found
  346. return -1, false // do not end with \
  347. }
  348. j--
  349. if j < iter.head || iter.buf[j] != '\\' {
  350. // odd number of backslashes
  351. // it is \" or \\\"
  352. break
  353. }
  354. j--
  355. }
  356. return -1, true // end with \
  357. }
  358. func (iter *Iterator) skipArray() {
  359. level := 1
  360. for {
  361. for i := iter.head; i < iter.tail; i++ {
  362. switch iter.buf[i] {
  363. case '"': // If inside string, skip it
  364. iter.head = i + 1
  365. iter.skipString()
  366. i = iter.head - 1 // it will be i++ soon
  367. case '[': // If open symbol, increase level
  368. level++
  369. case ']': // If close symbol, increase level
  370. level--
  371. // If we have returned to the original level, we're done
  372. if level == 0 {
  373. iter.head = i + 1
  374. return
  375. }
  376. }
  377. }
  378. if !iter.loadMore() {
  379. return
  380. }
  381. }
  382. }
  383. func (iter *Iterator) skipObject() {
  384. level := 1
  385. for {
  386. for i := iter.head; i < iter.tail; i++ {
  387. switch iter.buf[i] {
  388. case '"': // If inside string, skip it
  389. iter.head = i + 1
  390. iter.skipString()
  391. i = iter.head - 1 // it will be i++ soon
  392. case '{': // If open symbol, increase level
  393. level++
  394. case '}': // If close symbol, increase level
  395. level--
  396. // If we have returned to the original level, we're done
  397. if level == 0 {
  398. iter.head = i + 1
  399. return
  400. }
  401. }
  402. }
  403. if !iter.loadMore() {
  404. return
  405. }
  406. }
  407. }
  408. func (iter *Iterator) skipUntilBreak() {
  409. // true, false, null, number
  410. for {
  411. for i := iter.head; i < iter.tail; i++ {
  412. c := iter.buf[i]
  413. switch c {
  414. case ' ', '\n', '\r', '\t', ',', '}', ']':
  415. iter.head = i
  416. return
  417. }
  418. }
  419. if !iter.loadMore() {
  420. return
  421. }
  422. }
  423. }