feature_iter.go 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. package jsoniter
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "io"
  6. "strconv"
  7. )
  8. // ValueType the type for JSON element
  9. type ValueType int
  10. const (
  11. // Invalid invalid JSON element
  12. Invalid ValueType = iota
  13. // String JSON element "string"
  14. String
  15. // Number JSON element 100 or 0.10
  16. Number
  17. // Nil JSON element null
  18. Nil
  19. // Bool JSON element true or false
  20. Bool
  21. // Array JSON element []
  22. Array
  23. // Object JSON element {}
  24. Object
  25. )
  26. var hexDigits []byte
  27. var valueTypes []ValueType
  28. func init() {
  29. hexDigits = make([]byte, 256)
  30. for i := 0; i < len(hexDigits); i++ {
  31. hexDigits[i] = 255
  32. }
  33. for i := '0'; i <= '9'; i++ {
  34. hexDigits[i] = byte(i - '0')
  35. }
  36. for i := 'a'; i <= 'f'; i++ {
  37. hexDigits[i] = byte((i - 'a') + 10)
  38. }
  39. for i := 'A'; i <= 'F'; i++ {
  40. hexDigits[i] = byte((i - 'A') + 10)
  41. }
  42. valueTypes = make([]ValueType, 256)
  43. for i := 0; i < len(valueTypes); i++ {
  44. valueTypes[i] = Invalid
  45. }
  46. valueTypes['"'] = String
  47. valueTypes['-'] = Number
  48. valueTypes['0'] = Number
  49. valueTypes['1'] = Number
  50. valueTypes['2'] = Number
  51. valueTypes['3'] = Number
  52. valueTypes['4'] = Number
  53. valueTypes['5'] = Number
  54. valueTypes['6'] = Number
  55. valueTypes['7'] = Number
  56. valueTypes['8'] = Number
  57. valueTypes['9'] = Number
  58. valueTypes['t'] = Bool
  59. valueTypes['f'] = Bool
  60. valueTypes['n'] = Nil
  61. valueTypes['['] = Array
  62. valueTypes['{'] = Object
  63. }
  64. // Iterator is a io.Reader like object, with JSON specific read functions.
  65. // Error is not returned as return value, but stored as Error member on this iterator instance.
  66. type Iterator struct {
  67. cfg *frozenConfig
  68. reader io.Reader
  69. buf []byte
  70. head int
  71. tail int
  72. captureStartedAt int
  73. captured []byte
  74. Error error
  75. }
  76. // NewIterator creates an empty Iterator instance
  77. func NewIterator(cfg API) *Iterator {
  78. return &Iterator{
  79. cfg: cfg.(*frozenConfig),
  80. reader: nil,
  81. buf: nil,
  82. head: 0,
  83. tail: 0,
  84. }
  85. }
  86. // Parse creates an Iterator instance from io.Reader
  87. func Parse(cfg API, reader io.Reader, bufSize int) *Iterator {
  88. return &Iterator{
  89. cfg: cfg.(*frozenConfig),
  90. reader: reader,
  91. buf: make([]byte, bufSize),
  92. head: 0,
  93. tail: 0,
  94. }
  95. }
  96. // ParseBytes creates an Iterator instance from byte array
  97. func ParseBytes(cfg API, input []byte) *Iterator {
  98. return &Iterator{
  99. cfg: cfg.(*frozenConfig),
  100. reader: nil,
  101. buf: input,
  102. head: 0,
  103. tail: len(input),
  104. }
  105. }
  106. // ParseString creates an Iterator instance from string
  107. func ParseString(cfg API, input string) *Iterator {
  108. return ParseBytes(cfg, []byte(input))
  109. }
  110. // Pool returns a pool can provide more iterator with same configuration
  111. func (iter *Iterator) Pool() IteratorPool {
  112. return iter.cfg
  113. }
  114. // Reset reuse iterator instance by specifying another reader
  115. func (iter *Iterator) Reset(reader io.Reader) *Iterator {
  116. iter.reader = reader
  117. iter.head = 0
  118. iter.tail = 0
  119. return iter
  120. }
  121. // ResetBytes reuse iterator instance by specifying another byte array as input
  122. func (iter *Iterator) ResetBytes(input []byte) *Iterator {
  123. iter.reader = nil
  124. iter.buf = input
  125. iter.head = 0
  126. iter.tail = len(input)
  127. return iter
  128. }
  129. // WhatIsNext gets ValueType of relatively next json element
  130. func (iter *Iterator) WhatIsNext() ValueType {
  131. valueType := valueTypes[iter.nextToken()]
  132. iter.unreadByte()
  133. return valueType
  134. }
  135. func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
  136. for i := iter.head; i < iter.tail; i++ {
  137. c := iter.buf[i]
  138. switch c {
  139. case ' ', '\n', '\t', '\r':
  140. continue
  141. }
  142. iter.head = i
  143. return false
  144. }
  145. return true
  146. }
  147. func (iter *Iterator) isObjectEnd() bool {
  148. c := iter.nextToken()
  149. if c == ',' {
  150. return false
  151. }
  152. if c == '}' {
  153. return true
  154. }
  155. iter.ReportError("isObjectEnd", "object ended prematurely")
  156. return true
  157. }
  158. func (iter *Iterator) nextToken() byte {
  159. // a variation of skip whitespaces, returning the next non-whitespace token
  160. for {
  161. for i := iter.head; i < iter.tail; i++ {
  162. c := iter.buf[i]
  163. switch c {
  164. case ' ', '\n', '\t', '\r':
  165. continue
  166. }
  167. iter.head = i + 1
  168. return c
  169. }
  170. if !iter.loadMore() {
  171. return 0
  172. }
  173. }
  174. }
  175. // ReportError record a error in iterator instance with current position.
  176. func (iter *Iterator) ReportError(operation string, msg string) {
  177. if iter.Error != nil {
  178. if iter.Error != io.EOF {
  179. return
  180. }
  181. }
  182. peekStart := iter.head - 10
  183. if peekStart < 0 {
  184. peekStart = 0
  185. }
  186. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  187. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  188. }
  189. // CurrentBuffer gets current buffer as string for debugging purpose
  190. func (iter *Iterator) CurrentBuffer() string {
  191. peekStart := iter.head - 10
  192. if peekStart < 0 {
  193. peekStart = 0
  194. }
  195. return fmt.Sprintf("parsing %v ...|%s|... at %s", iter.head,
  196. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  197. }
  198. func (iter *Iterator) readByte() (ret byte) {
  199. if iter.head == iter.tail {
  200. if iter.loadMore() {
  201. ret = iter.buf[iter.head]
  202. iter.head++
  203. return ret
  204. }
  205. return 0
  206. }
  207. ret = iter.buf[iter.head]
  208. iter.head++
  209. return ret
  210. }
  211. func (iter *Iterator) loadMore() bool {
  212. if iter.reader == nil {
  213. if iter.Error == nil {
  214. iter.head = iter.tail
  215. iter.Error = io.EOF
  216. }
  217. return false
  218. }
  219. if iter.captureStartedAt != -1 {
  220. iter.captured = append(iter.captured,
  221. iter.buf[iter.captureStartedAt:iter.tail]...)
  222. iter.captureStartedAt = 0
  223. }
  224. for {
  225. n, err := iter.reader.Read(iter.buf)
  226. if n == 0 {
  227. if err != nil {
  228. if iter.Error == nil {
  229. iter.Error = err
  230. }
  231. return false
  232. }
  233. } else {
  234. iter.head = 0
  235. iter.tail = n
  236. return true
  237. }
  238. }
  239. }
  240. func (iter *Iterator) unreadByte() {
  241. if iter.head == 0 {
  242. iter.ReportError("unreadByte", "unread too many bytes")
  243. return
  244. }
  245. iter.head--
  246. return
  247. }
  248. // Read read the next JSON element as generic interface{}.
  249. func (iter *Iterator) Read() interface{} {
  250. valueType := iter.WhatIsNext()
  251. switch valueType {
  252. case String:
  253. return iter.ReadString()
  254. case Number:
  255. if iter.cfg.configBeforeFrozen.UseNumber {
  256. num := iter.ReadInt64()
  257. return json.Number(strconv.FormatInt(num, 10))
  258. }
  259. return iter.ReadFloat64()
  260. case Nil:
  261. iter.skipFourBytes('n', 'u', 'l', 'l')
  262. return nil
  263. case Bool:
  264. return iter.ReadBool()
  265. case Array:
  266. arr := []interface{}{}
  267. iter.ReadArrayCB(func(iter *Iterator) bool {
  268. arr = append(arr, iter.Read())
  269. return true
  270. })
  271. return arr
  272. case Object:
  273. obj := map[string]interface{}{}
  274. iter.ReadObjectCB(func(Iter *Iterator, field string) bool {
  275. obj[field] = iter.Read()
  276. return true
  277. })
  278. return obj
  279. default:
  280. iter.ReportError("Read", fmt.Sprintf("unexpected value type: %v", valueType))
  281. return nil
  282. }
  283. }