iter.go 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. package jsoniter
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "io"
  6. )
  7. // ValueType the type for JSON element
  8. type ValueType int
  9. const (
  10. // InvalidValue invalid JSON element
  11. InvalidValue ValueType = iota
  12. // StringValue JSON element "string"
  13. StringValue
  14. // NumberValue JSON element 100 or 0.10
  15. NumberValue
  16. // NilValue JSON element null
  17. NilValue
  18. // BoolValue JSON element true or false
  19. BoolValue
  20. // ArrayValue JSON element []
  21. ArrayValue
  22. // ObjectValue JSON element {}
  23. ObjectValue
  24. )
  25. var hexDigits []byte
  26. var valueTypes []ValueType
  27. func init() {
  28. hexDigits = make([]byte, 256)
  29. for i := 0; i < len(hexDigits); i++ {
  30. hexDigits[i] = 255
  31. }
  32. for i := '0'; i <= '9'; i++ {
  33. hexDigits[i] = byte(i - '0')
  34. }
  35. for i := 'a'; i <= 'f'; i++ {
  36. hexDigits[i] = byte((i - 'a') + 10)
  37. }
  38. for i := 'A'; i <= 'F'; i++ {
  39. hexDigits[i] = byte((i - 'A') + 10)
  40. }
  41. valueTypes = make([]ValueType, 256)
  42. for i := 0; i < len(valueTypes); i++ {
  43. valueTypes[i] = InvalidValue
  44. }
  45. valueTypes['"'] = StringValue
  46. valueTypes['-'] = NumberValue
  47. valueTypes['0'] = NumberValue
  48. valueTypes['1'] = NumberValue
  49. valueTypes['2'] = NumberValue
  50. valueTypes['3'] = NumberValue
  51. valueTypes['4'] = NumberValue
  52. valueTypes['5'] = NumberValue
  53. valueTypes['6'] = NumberValue
  54. valueTypes['7'] = NumberValue
  55. valueTypes['8'] = NumberValue
  56. valueTypes['9'] = NumberValue
  57. valueTypes['t'] = BoolValue
  58. valueTypes['f'] = BoolValue
  59. valueTypes['n'] = NilValue
  60. valueTypes['['] = ArrayValue
  61. valueTypes['{'] = ObjectValue
  62. }
  63. // Iterator is a io.Reader like object, with JSON specific read functions.
  64. // Error is not returned as return value, but stored as Error member on this iterator instance.
  65. type Iterator struct {
  66. cfg *frozenConfig
  67. reader io.Reader
  68. buf []byte
  69. head int
  70. tail int
  71. depth int
  72. captureStartedAt int
  73. captured []byte
  74. Error error
  75. Attachment interface{} // open for customized decoder
  76. }
  77. // NewIterator creates an empty Iterator instance
  78. func NewIterator(cfg API) *Iterator {
  79. return &Iterator{
  80. cfg: cfg.(*frozenConfig),
  81. reader: nil,
  82. buf: nil,
  83. head: 0,
  84. tail: 0,
  85. depth: 0,
  86. }
  87. }
  88. // Parse creates an Iterator instance from io.Reader
  89. func Parse(cfg API, reader io.Reader, bufSize int) *Iterator {
  90. return &Iterator{
  91. cfg: cfg.(*frozenConfig),
  92. reader: reader,
  93. buf: make([]byte, bufSize),
  94. head: 0,
  95. tail: 0,
  96. depth: 0,
  97. }
  98. }
  99. // ParseBytes creates an Iterator instance from byte array
  100. func ParseBytes(cfg API, input []byte) *Iterator {
  101. return &Iterator{
  102. cfg: cfg.(*frozenConfig),
  103. reader: nil,
  104. buf: input,
  105. head: 0,
  106. tail: len(input),
  107. depth: 0,
  108. }
  109. }
  110. // ParseString creates an Iterator instance from string
  111. func ParseString(cfg API, input string) *Iterator {
  112. return ParseBytes(cfg, []byte(input))
  113. }
  114. // Pool returns a pool can provide more iterator with same configuration
  115. func (iter *Iterator) Pool() IteratorPool {
  116. return iter.cfg
  117. }
  118. // Reset reuse iterator instance by specifying another reader
  119. func (iter *Iterator) Reset(reader io.Reader) *Iterator {
  120. iter.reader = reader
  121. iter.head = 0
  122. iter.tail = 0
  123. iter.depth = 0
  124. return iter
  125. }
  126. // ResetBytes reuse iterator instance by specifying another byte array as input
  127. func (iter *Iterator) ResetBytes(input []byte) *Iterator {
  128. iter.reader = nil
  129. iter.buf = input
  130. iter.head = 0
  131. iter.tail = len(input)
  132. iter.depth = 0
  133. return iter
  134. }
  135. // WhatIsNext gets ValueType of relatively next json element
  136. func (iter *Iterator) WhatIsNext() ValueType {
  137. valueType := valueTypes[iter.nextToken()]
  138. iter.unreadByte()
  139. return valueType
  140. }
  141. func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
  142. for i := iter.head; i < iter.tail; i++ {
  143. c := iter.buf[i]
  144. switch c {
  145. case ' ', '\n', '\t', '\r':
  146. continue
  147. }
  148. iter.head = i
  149. return false
  150. }
  151. return true
  152. }
  153. func (iter *Iterator) isObjectEnd() bool {
  154. c := iter.nextToken()
  155. if c == ',' {
  156. return false
  157. }
  158. if c == '}' {
  159. return true
  160. }
  161. iter.ReportError("isObjectEnd", "object ended prematurely, unexpected char "+string([]byte{c}))
  162. return true
  163. }
  164. func (iter *Iterator) nextToken() byte {
  165. // a variation of skip whitespaces, returning the next non-whitespace token
  166. for {
  167. for i := iter.head; i < iter.tail; i++ {
  168. c := iter.buf[i]
  169. switch c {
  170. case ' ', '\n', '\t', '\r':
  171. continue
  172. }
  173. iter.head = i + 1
  174. return c
  175. }
  176. if !iter.loadMore() {
  177. return 0
  178. }
  179. }
  180. }
  181. // ReportError record a error in iterator instance with current position.
  182. func (iter *Iterator) ReportError(operation string, msg string) {
  183. if iter.Error != nil {
  184. if iter.Error != io.EOF {
  185. return
  186. }
  187. }
  188. peekStart := iter.head - 10
  189. if peekStart < 0 {
  190. peekStart = 0
  191. }
  192. peekEnd := iter.head + 10
  193. if peekEnd > iter.tail {
  194. peekEnd = iter.tail
  195. }
  196. parsing := string(iter.buf[peekStart:peekEnd])
  197. contextStart := iter.head - 50
  198. if contextStart < 0 {
  199. contextStart = 0
  200. }
  201. contextEnd := iter.head + 50
  202. if contextEnd > iter.tail {
  203. contextEnd = iter.tail
  204. }
  205. context := string(iter.buf[contextStart:contextEnd])
  206. iter.Error = fmt.Errorf("%s: %s, error found in #%v byte of ...|%s|..., bigger context ...|%s|...",
  207. operation, msg, iter.head-peekStart, parsing, context)
  208. }
  209. // CurrentBuffer gets current buffer as string for debugging purpose
  210. func (iter *Iterator) CurrentBuffer() string {
  211. peekStart := iter.head - 10
  212. if peekStart < 0 {
  213. peekStart = 0
  214. }
  215. return fmt.Sprintf("parsing #%v byte, around ...|%s|..., whole buffer ...|%s|...", iter.head,
  216. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  217. }
  218. func (iter *Iterator) readByte() (ret byte) {
  219. if iter.head == iter.tail {
  220. if iter.loadMore() {
  221. ret = iter.buf[iter.head]
  222. iter.head++
  223. return ret
  224. }
  225. return 0
  226. }
  227. ret = iter.buf[iter.head]
  228. iter.head++
  229. return ret
  230. }
  231. func (iter *Iterator) loadMore() bool {
  232. if iter.reader == nil {
  233. if iter.Error == nil {
  234. iter.head = iter.tail
  235. iter.Error = io.EOF
  236. }
  237. return false
  238. }
  239. if iter.captured != nil {
  240. iter.captured = append(iter.captured,
  241. iter.buf[iter.captureStartedAt:iter.tail]...)
  242. iter.captureStartedAt = 0
  243. }
  244. for {
  245. n, err := iter.reader.Read(iter.buf)
  246. if n == 0 {
  247. if err != nil {
  248. if iter.Error == nil {
  249. iter.Error = err
  250. }
  251. return false
  252. }
  253. } else {
  254. iter.head = 0
  255. iter.tail = n
  256. return true
  257. }
  258. }
  259. }
  260. func (iter *Iterator) unreadByte() {
  261. if iter.Error != nil {
  262. return
  263. }
  264. iter.head--
  265. return
  266. }
  267. // Read read the next JSON element as generic interface{}.
  268. func (iter *Iterator) Read() interface{} {
  269. valueType := iter.WhatIsNext()
  270. switch valueType {
  271. case StringValue:
  272. return iter.ReadString()
  273. case NumberValue:
  274. if iter.cfg.configBeforeFrozen.UseNumber {
  275. return json.Number(iter.readNumberAsString())
  276. }
  277. return iter.ReadFloat64()
  278. case NilValue:
  279. iter.skipFourBytes('n', 'u', 'l', 'l')
  280. return nil
  281. case BoolValue:
  282. return iter.ReadBool()
  283. case ArrayValue:
  284. arr := []interface{}{}
  285. iter.ReadArrayCB(func(iter *Iterator) bool {
  286. var elem interface{}
  287. iter.ReadVal(&elem)
  288. arr = append(arr, elem)
  289. return true
  290. })
  291. return arr
  292. case ObjectValue:
  293. obj := map[string]interface{}{}
  294. iter.ReadMapCB(func(Iter *Iterator, field string) bool {
  295. var elem interface{}
  296. iter.ReadVal(&elem)
  297. obj[field] = elem
  298. return true
  299. })
  300. return obj
  301. default:
  302. iter.ReportError("Read", fmt.Sprintf("unexpected value type: %v", valueType))
  303. return nil
  304. }
  305. }
  306. // limit maximum depth of nesting, as allowed by https://tools.ietf.org/html/rfc7159#section-9
  307. const maxDepth = 10000
  308. func (iter *Iterator) incrementDepth() (success bool) {
  309. iter.depth++
  310. if iter.depth <= maxDepth {
  311. return true
  312. }
  313. iter.ReportError("incrementDepth", "exceeded max depth")
  314. return false
  315. }
  316. func (iter *Iterator) decrementDepth() (success bool) {
  317. iter.depth--
  318. if iter.depth >= 0 {
  319. return true
  320. }
  321. iter.ReportError("decrementDepth", "unexpected negative nesting")
  322. return false
  323. }