feature_iter.go 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. //
  2. // Besides, jsoniter.Iterator provides a different set of interfaces
  3. // iterating given bytes/string/reader
  4. // and yielding parsed elements one by one.
  5. // This set of interfaces reads input as required and gives
  6. // better performance.
  7. package jsoniter
  8. import (
  9. "fmt"
  10. "io"
  11. )
  12. type ValueType int
  13. const (
  14. Invalid ValueType = iota
  15. String
  16. Number
  17. Nil
  18. Bool
  19. Array
  20. Object
  21. )
  22. var hexDigits []byte
  23. var valueTypes []ValueType
  24. func init() {
  25. hexDigits = make([]byte, 256)
  26. for i := 0; i < len(hexDigits); i++ {
  27. hexDigits[i] = 255
  28. }
  29. for i := '0'; i <= '9'; i++ {
  30. hexDigits[i] = byte(i - '0')
  31. }
  32. for i := 'a'; i <= 'f'; i++ {
  33. hexDigits[i] = byte((i - 'a') + 10)
  34. }
  35. for i := 'A'; i <= 'F'; i++ {
  36. hexDigits[i] = byte((i - 'A') + 10)
  37. }
  38. valueTypes = make([]ValueType, 256)
  39. for i := 0; i < len(valueTypes); i++ {
  40. valueTypes[i] = Invalid
  41. }
  42. valueTypes['"'] = String
  43. valueTypes['-'] = Number
  44. valueTypes['0'] = Number
  45. valueTypes['1'] = Number
  46. valueTypes['2'] = Number
  47. valueTypes['3'] = Number
  48. valueTypes['4'] = Number
  49. valueTypes['5'] = Number
  50. valueTypes['6'] = Number
  51. valueTypes['7'] = Number
  52. valueTypes['8'] = Number
  53. valueTypes['9'] = Number
  54. valueTypes['t'] = Bool
  55. valueTypes['f'] = Bool
  56. valueTypes['n'] = Nil
  57. valueTypes['['] = Array
  58. valueTypes['{'] = Object
  59. }
  60. // Iterator is a fast and flexible JSON parser
  61. type Iterator struct {
  62. cfg *frozenConfig
  63. reader io.Reader
  64. buf []byte
  65. head int
  66. tail int
  67. captureStartedAt int
  68. captured []byte
  69. Error error
  70. }
  71. // Create creates an empty Iterator instance
  72. func NewIterator(cfg *frozenConfig) *Iterator {
  73. return &Iterator{
  74. cfg: cfg,
  75. reader: nil,
  76. buf: nil,
  77. head: 0,
  78. tail: 0,
  79. }
  80. }
  81. // Parse parses a json buffer in io.Reader into an Iterator instance
  82. func Parse(cfg *frozenConfig, reader io.Reader, bufSize int) *Iterator {
  83. return &Iterator{
  84. cfg: cfg,
  85. reader: reader,
  86. buf: make([]byte, bufSize),
  87. head: 0,
  88. tail: 0,
  89. }
  90. }
  91. // ParseBytes parses a json byte slice into an Iterator instance
  92. func ParseBytes(cfg *frozenConfig, input []byte) *Iterator {
  93. return &Iterator{
  94. cfg: cfg,
  95. reader: nil,
  96. buf: input,
  97. head: 0,
  98. tail: len(input),
  99. }
  100. }
  101. // ParseString parses a json string into an Iterator instance
  102. func ParseString(cfg *frozenConfig, input string) *Iterator {
  103. return ParseBytes(cfg, []byte(input))
  104. }
  105. func (iter *Iterator) Config() *frozenConfig {
  106. return iter.cfg
  107. }
  108. // Reset can reset an Iterator instance for another json buffer in io.Reader
  109. func (iter *Iterator) Reset(reader io.Reader) *Iterator {
  110. iter.reader = reader
  111. iter.head = 0
  112. iter.tail = 0
  113. return iter
  114. }
  115. // ResetBytes can reset an Iterator instance for another json byte slice
  116. func (iter *Iterator) ResetBytes(input []byte) *Iterator {
  117. iter.reader = nil
  118. iter.buf = input
  119. iter.head = 0
  120. iter.tail = len(input)
  121. return iter
  122. }
  123. // WhatIsNext gets ValueType of relatively next json object
  124. func (iter *Iterator) WhatIsNext() ValueType {
  125. valueType := valueTypes[iter.nextToken()]
  126. iter.unreadByte()
  127. return valueType
  128. }
  129. func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
  130. for i := iter.head; i < iter.tail; i++ {
  131. c := iter.buf[i]
  132. switch c {
  133. case ' ', '\n', '\t', '\r':
  134. continue
  135. }
  136. iter.head = i
  137. return false
  138. }
  139. return true
  140. }
  141. func (iter *Iterator) isObjectEnd() bool {
  142. c := iter.nextToken()
  143. if c == ',' {
  144. return false
  145. }
  146. if c == '}' {
  147. return true
  148. }
  149. iter.ReportError("isObjectEnd", "object ended prematurely")
  150. return true
  151. }
  152. func (iter *Iterator) nextToken() byte {
  153. // a variation of skip whitespaces, returning the next non-whitespace token
  154. for {
  155. for i := iter.head; i < iter.tail; i++ {
  156. c := iter.buf[i]
  157. switch c {
  158. case ' ', '\n', '\t', '\r':
  159. continue
  160. }
  161. iter.head = i + 1
  162. return c
  163. }
  164. if !iter.loadMore() {
  165. return 0
  166. }
  167. }
  168. }
  169. func (iter *Iterator) ReportError(operation string, msg string) {
  170. if iter.Error != nil {
  171. if iter.Error != io.EOF {
  172. return
  173. }
  174. }
  175. peekStart := iter.head - 10
  176. if peekStart < 0 {
  177. peekStart = 0
  178. }
  179. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  180. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  181. }
  182. // CurrentBuffer gets current buffer as string
  183. func (iter *Iterator) CurrentBuffer() string {
  184. peekStart := iter.head - 10
  185. if peekStart < 0 {
  186. peekStart = 0
  187. }
  188. return fmt.Sprintf("parsing %v ...|%s|... at %s", iter.head,
  189. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  190. }
  191. func (iter *Iterator) readByte() (ret byte) {
  192. if iter.head == iter.tail {
  193. if iter.loadMore() {
  194. ret = iter.buf[iter.head]
  195. iter.head++
  196. return ret
  197. }
  198. return 0
  199. }
  200. ret = iter.buf[iter.head]
  201. iter.head++
  202. return ret
  203. }
  204. func (iter *Iterator) loadMore() bool {
  205. if iter.reader == nil {
  206. if iter.Error == nil {
  207. iter.head = iter.tail
  208. iter.Error = io.EOF
  209. }
  210. return false
  211. }
  212. if iter.captureStartedAt != -1 {
  213. iter.captured = append(iter.captured,
  214. iter.buf[iter.captureStartedAt:iter.tail]...)
  215. iter.captureStartedAt = 0
  216. }
  217. for {
  218. n, err := iter.reader.Read(iter.buf)
  219. if n == 0 {
  220. if err != nil {
  221. if iter.Error == nil {
  222. iter.Error = err
  223. }
  224. return false
  225. }
  226. } else {
  227. iter.head = 0
  228. iter.tail = n
  229. return true
  230. }
  231. }
  232. }
  233. func (iter *Iterator) unreadByte() {
  234. if iter.head == 0 {
  235. iter.ReportError("unreadByte", "unread too many bytes")
  236. return
  237. }
  238. iter.head--
  239. return
  240. }
  241. func (iter *Iterator) Read() interface{} {
  242. valueType := iter.WhatIsNext()
  243. switch valueType {
  244. case String:
  245. return iter.ReadString()
  246. case Number:
  247. return iter.ReadFloat64()
  248. case Nil:
  249. iter.skipFixedBytes(4) // null
  250. return nil
  251. case Bool:
  252. return iter.ReadBool()
  253. case Array:
  254. arr := []interface{}{}
  255. iter.ReadArrayCB(func(iter *Iterator) bool {
  256. arr = append(arr, iter.Read())
  257. return true
  258. })
  259. return arr
  260. case Object:
  261. obj := map[string]interface{}{}
  262. iter.ReadObjectCB(func(Iter *Iterator, field string) bool {
  263. obj[field] = iter.Read()
  264. return true
  265. })
  266. return obj
  267. default:
  268. iter.ReportError("Read", fmt.Sprintf("unexpected value type: %v", valueType))
  269. return nil
  270. }
  271. }