feature_iter.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. //
  2. // Besides, jsoniter.Iterator provides a different set of interfaces
  3. // iterating given bytes/string/reader
  4. // and yielding parsed elements one by one.
  5. // This set of interfaces reads input as required and gives
  6. // better performance.
  7. package jsoniter
  8. import (
  9. "encoding/base64"
  10. "fmt"
  11. "io"
  12. )
  13. type ValueType int
  14. const (
  15. Invalid ValueType = iota
  16. String
  17. Number
  18. Nil
  19. Bool
  20. Array
  21. Object
  22. )
  23. var hexDigits []byte
  24. var valueTypes []ValueType
  25. func init() {
  26. hexDigits = make([]byte, 256)
  27. for i := 0; i < len(hexDigits); i++ {
  28. hexDigits[i] = 255
  29. }
  30. for i := '0'; i <= '9'; i++ {
  31. hexDigits[i] = byte(i - '0')
  32. }
  33. for i := 'a'; i <= 'f'; i++ {
  34. hexDigits[i] = byte((i - 'a') + 10)
  35. }
  36. for i := 'A'; i <= 'F'; i++ {
  37. hexDigits[i] = byte((i - 'A') + 10)
  38. }
  39. valueTypes = make([]ValueType, 256)
  40. for i := 0; i < len(valueTypes); i++ {
  41. valueTypes[i] = Invalid
  42. }
  43. valueTypes['"'] = String
  44. valueTypes['-'] = Number
  45. valueTypes['0'] = Number
  46. valueTypes['1'] = Number
  47. valueTypes['2'] = Number
  48. valueTypes['3'] = Number
  49. valueTypes['4'] = Number
  50. valueTypes['5'] = Number
  51. valueTypes['6'] = Number
  52. valueTypes['7'] = Number
  53. valueTypes['8'] = Number
  54. valueTypes['9'] = Number
  55. valueTypes['t'] = Bool
  56. valueTypes['f'] = Bool
  57. valueTypes['n'] = Nil
  58. valueTypes['['] = Array
  59. valueTypes['{'] = Object
  60. }
  61. // Iterator is a fast and flexible JSON parser
  62. type Iterator struct {
  63. reader io.Reader
  64. buf []byte
  65. head int
  66. tail int
  67. Error error
  68. }
  69. // Create creates an empty Iterator instance
  70. func NewIterator() *Iterator {
  71. return &Iterator{
  72. reader: nil,
  73. buf: nil,
  74. head: 0,
  75. tail: 0,
  76. }
  77. }
  78. // Parse parses a json buffer in io.Reader into an Iterator instance
  79. func Parse(reader io.Reader, bufSize int) *Iterator {
  80. return &Iterator{
  81. reader: reader,
  82. buf: make([]byte, bufSize),
  83. head: 0,
  84. tail: 0,
  85. }
  86. }
  87. // ParseBytes parses a json byte slice into an Iterator instance
  88. func ParseBytes(input []byte) *Iterator {
  89. return &Iterator{
  90. reader: nil,
  91. buf: input,
  92. head: 0,
  93. tail: len(input),
  94. }
  95. }
  96. // ParseString parses a json string into an Iterator instance
  97. func ParseString(input string) *Iterator {
  98. return ParseBytes([]byte(input))
  99. }
  100. // Reset can reset an Iterator instance for another json buffer in io.Reader
  101. func (iter *Iterator) Reset(reader io.Reader) *Iterator {
  102. iter.reader = reader
  103. iter.head = 0
  104. iter.tail = 0
  105. return iter
  106. }
  107. // ResetBytes can reset an Iterator instance for another json byte slice
  108. func (iter *Iterator) ResetBytes(input []byte) *Iterator {
  109. iter.reader = nil
  110. iter.Error = nil
  111. iter.buf = input
  112. iter.head = 0
  113. iter.tail = len(input)
  114. return iter
  115. }
  116. // WhatIsNext gets ValueType of relatively next json object
  117. func (iter *Iterator) WhatIsNext() ValueType {
  118. valueType := valueTypes[iter.nextToken()]
  119. iter.unreadByte()
  120. return valueType
  121. }
  122. func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
  123. for i := iter.head; i < iter.tail; i++ {
  124. c := iter.buf[i]
  125. switch c {
  126. case ' ', '\n', '\t', '\r':
  127. continue
  128. }
  129. iter.head = i
  130. return false
  131. }
  132. return true
  133. }
  134. func (iter *Iterator) nextToken() byte {
  135. // a variation of skip whitespaces, returning the next non-whitespace token
  136. for {
  137. for i := iter.head; i < iter.tail; i++ {
  138. c := iter.buf[i]
  139. switch c {
  140. case ' ', '\n', '\t', '\r':
  141. continue
  142. }
  143. iter.head = i + 1
  144. return c
  145. }
  146. if !iter.loadMore() {
  147. return 0
  148. }
  149. }
  150. }
  151. func (iter *Iterator) reportError(operation string, msg string) {
  152. if iter.Error != nil {
  153. if iter.Error != io.EOF {
  154. return
  155. }
  156. }
  157. peekStart := iter.head - 10
  158. if peekStart < 0 {
  159. peekStart = 0
  160. }
  161. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  162. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  163. }
  164. // CurrentBuffer gets current buffer as string
  165. func (iter *Iterator) CurrentBuffer() string {
  166. peekStart := iter.head - 10
  167. if peekStart < 0 {
  168. peekStart = 0
  169. }
  170. return fmt.Sprintf("parsing %v ...|%s|... at %s", iter.head,
  171. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  172. }
  173. func (iter *Iterator) readByte() (ret byte) {
  174. if iter.head == iter.tail {
  175. if iter.loadMore() {
  176. ret = iter.buf[iter.head]
  177. iter.head++
  178. return ret
  179. }
  180. return 0
  181. }
  182. ret = iter.buf[iter.head]
  183. iter.head++
  184. return ret
  185. }
  186. func (iter *Iterator) loadMore() bool {
  187. if iter.reader == nil {
  188. if iter.Error == nil {
  189. iter.Error = io.EOF
  190. }
  191. return false
  192. }
  193. for {
  194. n, err := iter.reader.Read(iter.buf)
  195. if n == 0 {
  196. if err != nil {
  197. if iter.Error == nil {
  198. iter.Error = err
  199. }
  200. return false
  201. }
  202. } else {
  203. iter.head = 0
  204. iter.tail = n
  205. return true
  206. }
  207. }
  208. }
  209. func (iter *Iterator) unreadByte() {
  210. if iter.head == 0 {
  211. iter.reportError("unreadByte", "unread too many bytes")
  212. return
  213. }
  214. iter.head--
  215. return
  216. }
  217. func (iter *Iterator) Read() interface{} {
  218. valueType := iter.WhatIsNext()
  219. switch valueType {
  220. case String:
  221. return iter.ReadString()
  222. case Number:
  223. return iter.ReadFloat64()
  224. case Nil:
  225. iter.skipFixedBytes(4) // null
  226. return nil
  227. case Bool:
  228. return iter.ReadBool()
  229. case Array:
  230. arr := []interface{}{}
  231. iter.ReadArrayCB(func(iter *Iterator) bool {
  232. arr = append(arr, iter.Read())
  233. return true
  234. })
  235. return arr
  236. case Object:
  237. obj := map[string]interface{}{}
  238. iter.ReadObjectCB(func(Iter *Iterator, field string) bool {
  239. obj[field] = iter.Read()
  240. return true
  241. })
  242. return obj
  243. default:
  244. iter.reportError("Read", fmt.Sprintf("unexpected value type: %v", valueType))
  245. return nil
  246. }
  247. }
  248. // ReadBase64 reads a json object as Base64 in byte slice
  249. func (iter *Iterator) ReadBase64() (ret []byte) {
  250. src := iter.ReadStringAsSlice()
  251. if iter.Error != nil {
  252. return
  253. }
  254. b64 := base64.StdEncoding
  255. ret = make([]byte, b64.DecodedLen(len(src)))
  256. n, err := b64.Decode(ret, src)
  257. if err != nil {
  258. iter.Error = err
  259. return
  260. }
  261. return ret[:n]
  262. }