feature_iter.go 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. package jsoniter
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "io"
  6. )
  7. // ValueType the type for JSON element
  8. type ValueType int
  9. const (
  10. // Invalid invalid JSON element
  11. Invalid ValueType = iota
  12. // String JSON element "string"
  13. String
  14. // Number JSON element 100 or 0.10
  15. Number
  16. // Nil JSON element null
  17. Nil
  18. // Bool JSON element true or false
  19. Bool
  20. // Array JSON element []
  21. Array
  22. // Object JSON element {}
  23. Object
  24. )
  25. var hexDigits []byte
  26. var valueTypes []ValueType
  27. func init() {
  28. hexDigits = make([]byte, 256)
  29. for i := 0; i < len(hexDigits); i++ {
  30. hexDigits[i] = 255
  31. }
  32. for i := '0'; i <= '9'; i++ {
  33. hexDigits[i] = byte(i - '0')
  34. }
  35. for i := 'a'; i <= 'f'; i++ {
  36. hexDigits[i] = byte((i - 'a') + 10)
  37. }
  38. for i := 'A'; i <= 'F'; i++ {
  39. hexDigits[i] = byte((i - 'A') + 10)
  40. }
  41. valueTypes = make([]ValueType, 256)
  42. for i := 0; i < len(valueTypes); i++ {
  43. valueTypes[i] = Invalid
  44. }
  45. valueTypes['"'] = String
  46. valueTypes['-'] = Number
  47. valueTypes['0'] = Number
  48. valueTypes['1'] = Number
  49. valueTypes['2'] = Number
  50. valueTypes['3'] = Number
  51. valueTypes['4'] = Number
  52. valueTypes['5'] = Number
  53. valueTypes['6'] = Number
  54. valueTypes['7'] = Number
  55. valueTypes['8'] = Number
  56. valueTypes['9'] = Number
  57. valueTypes['t'] = Bool
  58. valueTypes['f'] = Bool
  59. valueTypes['n'] = Nil
  60. valueTypes['['] = Array
  61. valueTypes['{'] = Object
  62. }
  63. // Iterator is a io.Reader like object, with JSON specific read functions.
  64. // Error is not returned as return value, but stored as Error member on this iterator instance.
  65. type Iterator struct {
  66. cfg *frozenConfig
  67. reader io.Reader
  68. buf []byte
  69. head int
  70. tail int
  71. captureStartedAt int
  72. captured []byte
  73. Error error
  74. }
  75. // NewIterator creates an empty Iterator instance
  76. func NewIterator(cfg API) *Iterator {
  77. return &Iterator{
  78. cfg: cfg.(*frozenConfig),
  79. reader: nil,
  80. buf: nil,
  81. head: 0,
  82. tail: 0,
  83. }
  84. }
  85. // Parse creates an Iterator instance from io.Reader
  86. func Parse(cfg API, reader io.Reader, bufSize int) *Iterator {
  87. return &Iterator{
  88. cfg: cfg.(*frozenConfig),
  89. reader: reader,
  90. buf: make([]byte, bufSize),
  91. head: 0,
  92. tail: 0,
  93. }
  94. }
  95. // ParseBytes creates an Iterator instance from byte array
  96. func ParseBytes(cfg API, input []byte) *Iterator {
  97. return &Iterator{
  98. cfg: cfg.(*frozenConfig),
  99. reader: nil,
  100. buf: input,
  101. head: 0,
  102. tail: len(input),
  103. }
  104. }
  105. // ParseString creates an Iterator instance from string
  106. func ParseString(cfg API, input string) *Iterator {
  107. return ParseBytes(cfg, []byte(input))
  108. }
  109. // Pool returns a pool can provide more iterator with same configuration
  110. func (iter *Iterator) Pool() IteratorPool {
  111. return iter.cfg
  112. }
  113. // Reset reuse iterator instance by specifying another reader
  114. func (iter *Iterator) Reset(reader io.Reader) *Iterator {
  115. iter.reader = reader
  116. iter.head = 0
  117. iter.tail = 0
  118. return iter
  119. }
  120. // ResetBytes reuse iterator instance by specifying another byte array as input
  121. func (iter *Iterator) ResetBytes(input []byte) *Iterator {
  122. iter.reader = nil
  123. iter.buf = input
  124. iter.head = 0
  125. iter.tail = len(input)
  126. return iter
  127. }
  128. // WhatIsNext gets ValueType of relatively next json element
  129. func (iter *Iterator) WhatIsNext() ValueType {
  130. valueType := valueTypes[iter.nextToken()]
  131. iter.unreadByte()
  132. return valueType
  133. }
  134. func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
  135. for i := iter.head; i < iter.tail; i++ {
  136. c := iter.buf[i]
  137. switch c {
  138. case ' ', '\n', '\t', '\r':
  139. continue
  140. }
  141. iter.head = i
  142. return false
  143. }
  144. return true
  145. }
  146. func (iter *Iterator) isObjectEnd() bool {
  147. c := iter.nextToken()
  148. if c == ',' {
  149. return false
  150. }
  151. if c == '}' {
  152. return true
  153. }
  154. iter.ReportError("isObjectEnd", "object ended prematurely")
  155. return true
  156. }
  157. func (iter *Iterator) nextToken() byte {
  158. // a variation of skip whitespaces, returning the next non-whitespace token
  159. for {
  160. for i := iter.head; i < iter.tail; i++ {
  161. c := iter.buf[i]
  162. switch c {
  163. case ' ', '\n', '\t', '\r':
  164. continue
  165. }
  166. iter.head = i + 1
  167. return c
  168. }
  169. if !iter.loadMore() {
  170. return 0
  171. }
  172. }
  173. }
  174. // ReportError record a error in iterator instance with current position.
  175. func (iter *Iterator) ReportError(operation string, msg string) {
  176. if iter.Error != nil {
  177. if iter.Error != io.EOF {
  178. return
  179. }
  180. }
  181. peekStart := iter.head - 10
  182. if peekStart < 0 {
  183. peekStart = 0
  184. }
  185. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  186. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  187. }
  188. // CurrentBuffer gets current buffer as string for debugging purpose
  189. func (iter *Iterator) CurrentBuffer() string {
  190. peekStart := iter.head - 10
  191. if peekStart < 0 {
  192. peekStart = 0
  193. }
  194. return fmt.Sprintf("parsing %v ...|%s|... at %s", iter.head,
  195. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  196. }
  197. func (iter *Iterator) readByte() (ret byte) {
  198. if iter.head == iter.tail {
  199. if iter.loadMore() {
  200. ret = iter.buf[iter.head]
  201. iter.head++
  202. return ret
  203. }
  204. return 0
  205. }
  206. ret = iter.buf[iter.head]
  207. iter.head++
  208. return ret
  209. }
  210. func (iter *Iterator) loadMore() bool {
  211. if iter.reader == nil {
  212. if iter.Error == nil {
  213. iter.head = iter.tail
  214. iter.Error = io.EOF
  215. }
  216. return false
  217. }
  218. if iter.captureStartedAt != -1 {
  219. iter.captured = append(iter.captured,
  220. iter.buf[iter.captureStartedAt:iter.tail]...)
  221. iter.captureStartedAt = 0
  222. }
  223. for {
  224. n, err := iter.reader.Read(iter.buf)
  225. if n == 0 {
  226. if err != nil {
  227. if iter.Error == nil {
  228. iter.Error = err
  229. }
  230. return false
  231. }
  232. } else {
  233. iter.head = 0
  234. iter.tail = n
  235. return true
  236. }
  237. }
  238. }
  239. func (iter *Iterator) unreadByte() {
  240. if iter.head == 0 {
  241. iter.ReportError("unreadByte", "unread too many bytes")
  242. return
  243. }
  244. iter.head--
  245. return
  246. }
  247. // Read read the next JSON element as generic interface{}.
  248. func (iter *Iterator) Read() interface{} {
  249. valueType := iter.WhatIsNext()
  250. switch valueType {
  251. case String:
  252. return iter.ReadString()
  253. case Number:
  254. if iter.cfg.configBeforeFrozen.UseNumber {
  255. return json.Number(iter.readNumberAsString())
  256. }
  257. return iter.ReadFloat64()
  258. case Nil:
  259. iter.skipFourBytes('n', 'u', 'l', 'l')
  260. return nil
  261. case Bool:
  262. return iter.ReadBool()
  263. case Array:
  264. arr := []interface{}{}
  265. iter.ReadArrayCB(func(iter *Iterator) bool {
  266. arr = append(arr, iter.Read())
  267. return true
  268. })
  269. return arr
  270. case Object:
  271. obj := map[string]interface{}{}
  272. iter.ReadObjectCB(func(Iter *Iterator, field string) bool {
  273. obj[field] = iter.Read()
  274. return true
  275. })
  276. return obj
  277. default:
  278. iter.ReportError("Read", fmt.Sprintf("unexpected value type: %v", valueType))
  279. return nil
  280. }
  281. }