feature_iter.go 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. package jsoniter
  2. import (
  3. "fmt"
  4. "io"
  5. )
  6. // ValueType the type for JSON element
  7. type ValueType int
  8. const (
  9. // Invalid invalid JSON element
  10. Invalid ValueType = iota
  11. // String JSON element "string"
  12. String
  13. // Number JSON element 100 or 0.10
  14. Number
  15. // Nil JSON element null
  16. Nil
  17. // Bool JSON element true or false
  18. Bool
  19. // Array JSON element []
  20. Array
  21. // Object JSON element {}
  22. Object
  23. )
  24. var hexDigits []byte
  25. var valueTypes []ValueType
  26. func init() {
  27. hexDigits = make([]byte, 256)
  28. for i := 0; i < len(hexDigits); i++ {
  29. hexDigits[i] = 255
  30. }
  31. for i := '0'; i <= '9'; i++ {
  32. hexDigits[i] = byte(i - '0')
  33. }
  34. for i := 'a'; i <= 'f'; i++ {
  35. hexDigits[i] = byte((i - 'a') + 10)
  36. }
  37. for i := 'A'; i <= 'F'; i++ {
  38. hexDigits[i] = byte((i - 'A') + 10)
  39. }
  40. valueTypes = make([]ValueType, 256)
  41. for i := 0; i < len(valueTypes); i++ {
  42. valueTypes[i] = Invalid
  43. }
  44. valueTypes['"'] = String
  45. valueTypes['-'] = Number
  46. valueTypes['0'] = Number
  47. valueTypes['1'] = Number
  48. valueTypes['2'] = Number
  49. valueTypes['3'] = Number
  50. valueTypes['4'] = Number
  51. valueTypes['5'] = Number
  52. valueTypes['6'] = Number
  53. valueTypes['7'] = Number
  54. valueTypes['8'] = Number
  55. valueTypes['9'] = Number
  56. valueTypes['t'] = Bool
  57. valueTypes['f'] = Bool
  58. valueTypes['n'] = Nil
  59. valueTypes['['] = Array
  60. valueTypes['{'] = Object
  61. }
  62. // Iterator is a io.Reader like object, with JSON specific read functions.
  63. // Error is not returned as return value, but stored as Error member on this iterator instance.
  64. type Iterator struct {
  65. cfg *frozenConfig
  66. reader io.Reader
  67. buf []byte
  68. head int
  69. tail int
  70. captureStartedAt int
  71. captured []byte
  72. Error error
  73. }
  74. // NewIterator creates an empty Iterator instance
  75. func NewIterator(cfg API) *Iterator {
  76. return &Iterator{
  77. cfg: cfg.(*frozenConfig),
  78. reader: nil,
  79. buf: nil,
  80. head: 0,
  81. tail: 0,
  82. }
  83. }
  84. // Parse creates an Iterator instance from io.Reader
  85. func Parse(cfg API, reader io.Reader, bufSize int) *Iterator {
  86. return &Iterator{
  87. cfg: cfg.(*frozenConfig),
  88. reader: reader,
  89. buf: make([]byte, bufSize),
  90. head: 0,
  91. tail: 0,
  92. }
  93. }
  94. // ParseBytes creates an Iterator instance from byte array
  95. func ParseBytes(cfg API, input []byte) *Iterator {
  96. return &Iterator{
  97. cfg: cfg.(*frozenConfig),
  98. reader: nil,
  99. buf: input,
  100. head: 0,
  101. tail: len(input),
  102. }
  103. }
  104. // ParseString creates an Iterator instance from string
  105. func ParseString(cfg API, input string) *Iterator {
  106. return ParseBytes(cfg, []byte(input))
  107. }
  108. // Pool returns a pool can provide more iterator with same configuration
  109. func (iter *Iterator) Pool() IteratorPool {
  110. return iter.cfg
  111. }
  112. // Reset reuse iterator instance by specifying another reader
  113. func (iter *Iterator) Reset(reader io.Reader) *Iterator {
  114. iter.reader = reader
  115. iter.head = 0
  116. iter.tail = 0
  117. return iter
  118. }
  119. // ResetBytes reuse iterator instance by specifying another byte array as input
  120. func (iter *Iterator) ResetBytes(input []byte) *Iterator {
  121. iter.reader = nil
  122. iter.buf = input
  123. iter.head = 0
  124. iter.tail = len(input)
  125. return iter
  126. }
  127. // WhatIsNext gets ValueType of relatively next json element
  128. func (iter *Iterator) WhatIsNext() ValueType {
  129. valueType := valueTypes[iter.nextToken()]
  130. iter.unreadByte()
  131. return valueType
  132. }
  133. func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
  134. for i := iter.head; i < iter.tail; i++ {
  135. c := iter.buf[i]
  136. switch c {
  137. case ' ', '\n', '\t', '\r':
  138. continue
  139. }
  140. iter.head = i
  141. return false
  142. }
  143. return true
  144. }
  145. func (iter *Iterator) isObjectEnd() bool {
  146. c := iter.nextToken()
  147. if c == ',' {
  148. return false
  149. }
  150. if c == '}' {
  151. return true
  152. }
  153. iter.ReportError("isObjectEnd", "object ended prematurely")
  154. return true
  155. }
  156. func (iter *Iterator) nextToken() byte {
  157. // a variation of skip whitespaces, returning the next non-whitespace token
  158. for {
  159. for i := iter.head; i < iter.tail; i++ {
  160. c := iter.buf[i]
  161. switch c {
  162. case ' ', '\n', '\t', '\r':
  163. continue
  164. }
  165. iter.head = i + 1
  166. return c
  167. }
  168. if !iter.loadMore() {
  169. return 0
  170. }
  171. }
  172. }
  173. // ReportError record a error in iterator instance with current position.
  174. func (iter *Iterator) ReportError(operation string, msg string) {
  175. if iter.Error != nil {
  176. if iter.Error != io.EOF {
  177. return
  178. }
  179. }
  180. peekStart := iter.head - 10
  181. if peekStart < 0 {
  182. peekStart = 0
  183. }
  184. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  185. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  186. }
  187. // CurrentBuffer gets current buffer as string for debugging purpose
  188. func (iter *Iterator) CurrentBuffer() string {
  189. peekStart := iter.head - 10
  190. if peekStart < 0 {
  191. peekStart = 0
  192. }
  193. return fmt.Sprintf("parsing %v ...|%s|... at %s", iter.head,
  194. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  195. }
  196. func (iter *Iterator) readByte() (ret byte) {
  197. if iter.head == iter.tail {
  198. if iter.loadMore() {
  199. ret = iter.buf[iter.head]
  200. iter.head++
  201. return ret
  202. }
  203. return 0
  204. }
  205. ret = iter.buf[iter.head]
  206. iter.head++
  207. return ret
  208. }
  209. func (iter *Iterator) loadMore() bool {
  210. if iter.reader == nil {
  211. if iter.Error == nil {
  212. iter.head = iter.tail
  213. iter.Error = io.EOF
  214. }
  215. return false
  216. }
  217. if iter.captureStartedAt != -1 {
  218. iter.captured = append(iter.captured,
  219. iter.buf[iter.captureStartedAt:iter.tail]...)
  220. iter.captureStartedAt = 0
  221. }
  222. for {
  223. n, err := iter.reader.Read(iter.buf)
  224. if n == 0 {
  225. if err != nil {
  226. if iter.Error == nil {
  227. iter.Error = err
  228. }
  229. return false
  230. }
  231. } else {
  232. iter.head = 0
  233. iter.tail = n
  234. return true
  235. }
  236. }
  237. }
  238. func (iter *Iterator) unreadByte() {
  239. if iter.head == 0 {
  240. iter.ReportError("unreadByte", "unread too many bytes")
  241. return
  242. }
  243. iter.head--
  244. return
  245. }
  246. // Read read the next JSON element as generic interface{}.
  247. func (iter *Iterator) Read() interface{} {
  248. valueType := iter.WhatIsNext()
  249. switch valueType {
  250. case String:
  251. return iter.ReadString()
  252. case Number:
  253. return iter.ReadFloat64()
  254. case Nil:
  255. iter.skipFourBytes('n', 'u', 'l', 'l')
  256. return nil
  257. case Bool:
  258. return iter.ReadBool()
  259. case Array:
  260. arr := []interface{}{}
  261. iter.ReadArrayCB(func(iter *Iterator) bool {
  262. arr = append(arr, iter.Read())
  263. return true
  264. })
  265. return arr
  266. case Object:
  267. obj := map[string]interface{}{}
  268. iter.ReadObjectCB(func(Iter *Iterator, field string) bool {
  269. obj[field] = iter.Read()
  270. return true
  271. })
  272. return obj
  273. default:
  274. iter.ReportError("Read", fmt.Sprintf("unexpected value type: %v", valueType))
  275. return nil
  276. }
  277. }