feature_iter.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. package jsoniter
  2. import (
  3. "encoding/base64"
  4. "fmt"
  5. "io"
  6. )
  7. type ValueType int
  8. const (
  9. Invalid ValueType = iota
  10. String
  11. Number
  12. Nil
  13. Bool
  14. Array
  15. Object
  16. )
  17. var hexDigits []byte
  18. var valueTypes []ValueType
  19. func init() {
  20. hexDigits = make([]byte, 256)
  21. for i := 0; i < len(hexDigits); i++ {
  22. hexDigits[i] = 255
  23. }
  24. for i := '0'; i <= '9'; i++ {
  25. hexDigits[i] = byte(i - '0')
  26. }
  27. for i := 'a'; i <= 'f'; i++ {
  28. hexDigits[i] = byte((i - 'a') + 10)
  29. }
  30. for i := 'A'; i <= 'F'; i++ {
  31. hexDigits[i] = byte((i - 'A') + 10)
  32. }
  33. valueTypes = make([]ValueType, 256)
  34. for i := 0; i < len(valueTypes); i++ {
  35. valueTypes[i] = Invalid
  36. }
  37. valueTypes['"'] = String
  38. valueTypes['-'] = Number
  39. valueTypes['0'] = Number
  40. valueTypes['1'] = Number
  41. valueTypes['2'] = Number
  42. valueTypes['3'] = Number
  43. valueTypes['4'] = Number
  44. valueTypes['5'] = Number
  45. valueTypes['6'] = Number
  46. valueTypes['7'] = Number
  47. valueTypes['8'] = Number
  48. valueTypes['9'] = Number
  49. valueTypes['t'] = Bool
  50. valueTypes['f'] = Bool
  51. valueTypes['n'] = Nil
  52. valueTypes['['] = Array
  53. valueTypes['{'] = Object
  54. }
  55. // Iterator is a fast and flexible JSON parser
  56. type Iterator struct {
  57. reader io.Reader
  58. buf []byte
  59. head int
  60. tail int
  61. Error error
  62. }
  63. // Create creates an empty Iterator instance
  64. func NewIterator() *Iterator {
  65. return &Iterator{
  66. reader: nil,
  67. buf: nil,
  68. head: 0,
  69. tail: 0,
  70. }
  71. }
  72. // Parse parses a json buffer in io.Reader into an Iterator instance
  73. func Parse(reader io.Reader, bufSize int) *Iterator {
  74. return &Iterator{
  75. reader: reader,
  76. buf: make([]byte, bufSize),
  77. head: 0,
  78. tail: 0,
  79. }
  80. }
  81. // ParseBytes parses a json byte slice into an Iterator instance
  82. func ParseBytes(input []byte) *Iterator {
  83. return &Iterator{
  84. reader: nil,
  85. buf: input,
  86. head: 0,
  87. tail: len(input),
  88. }
  89. }
  90. // ParseString parses a json string into an Iterator instance
  91. func ParseString(input string) *Iterator {
  92. return ParseBytes([]byte(input))
  93. }
  94. // Reset can reset an Iterator instance for another json buffer in io.Reader
  95. func (iter *Iterator) Reset(reader io.Reader) *Iterator {
  96. iter.reader = reader
  97. iter.head = 0
  98. iter.tail = 0
  99. return iter
  100. }
  101. // ResetBytes can reset an Iterator instance for another json byte slice
  102. func (iter *Iterator) ResetBytes(input []byte) *Iterator {
  103. iter.reader = nil
  104. iter.Error = nil
  105. iter.buf = input
  106. iter.head = 0
  107. iter.tail = len(input)
  108. return iter
  109. }
  110. // WhatIsNext gets ValueType of relatively next json object
  111. func (iter *Iterator) WhatIsNext() ValueType {
  112. valueType := valueTypes[iter.nextToken()]
  113. iter.unreadByte()
  114. return valueType
  115. }
  116. func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
  117. for i := iter.head; i < iter.tail; i++ {
  118. c := iter.buf[i]
  119. switch c {
  120. case ' ', '\n', '\t', '\r':
  121. continue
  122. }
  123. iter.head = i
  124. return false
  125. }
  126. return true
  127. }
  128. func (iter *Iterator) nextToken() byte {
  129. // a variation of skip whitespaces, returning the next non-whitespace token
  130. for {
  131. for i := iter.head; i < iter.tail; i++ {
  132. c := iter.buf[i]
  133. switch c {
  134. case ' ', '\n', '\t', '\r':
  135. continue
  136. }
  137. iter.head = i + 1
  138. return c
  139. }
  140. if !iter.loadMore() {
  141. return 0
  142. }
  143. }
  144. }
  145. func (iter *Iterator) reportError(operation string, msg string) {
  146. if iter.Error != nil {
  147. if iter.Error != io.EOF {
  148. return
  149. }
  150. }
  151. peekStart := iter.head - 10
  152. if peekStart < 0 {
  153. peekStart = 0
  154. }
  155. iter.Error = fmt.Errorf("%s: %s, parsing %v ...%s... at %s", operation, msg, iter.head,
  156. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  157. }
  158. // CurrentBuffer gets current buffer as string
  159. func (iter *Iterator) CurrentBuffer() string {
  160. peekStart := iter.head - 10
  161. if peekStart < 0 {
  162. peekStart = 0
  163. }
  164. return fmt.Sprintf("parsing %v ...|%s|... at %s", iter.head,
  165. string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
  166. }
  167. func (iter *Iterator) readByte() (ret byte) {
  168. if iter.head == iter.tail {
  169. if iter.loadMore() {
  170. ret = iter.buf[iter.head]
  171. iter.head++
  172. return ret
  173. }
  174. return 0
  175. }
  176. ret = iter.buf[iter.head]
  177. iter.head++
  178. return ret
  179. }
  180. func (iter *Iterator) loadMore() bool {
  181. if iter.reader == nil {
  182. if iter.Error == nil {
  183. iter.Error = io.EOF
  184. }
  185. return false
  186. }
  187. for {
  188. n, err := iter.reader.Read(iter.buf)
  189. if n == 0 {
  190. if err != nil {
  191. if iter.Error == nil {
  192. iter.Error = err
  193. }
  194. return false
  195. }
  196. } else {
  197. iter.head = 0
  198. iter.tail = n
  199. return true
  200. }
  201. }
  202. }
  203. func (iter *Iterator) unreadByte() {
  204. if iter.head == 0 {
  205. iter.reportError("unreadByte", "unread too many bytes")
  206. return
  207. }
  208. iter.head--
  209. return
  210. }
  211. func (iter *Iterator) Read() interface{} {
  212. valueType := iter.WhatIsNext()
  213. switch valueType {
  214. case String:
  215. return iter.ReadString()
  216. case Number:
  217. return iter.ReadFloat64()
  218. case Nil:
  219. iter.skipFixedBytes(4) // null
  220. return nil
  221. case Bool:
  222. return iter.ReadBool()
  223. case Array:
  224. arr := []interface{}{}
  225. iter.ReadArrayCB(func(iter *Iterator) bool {
  226. arr = append(arr, iter.Read())
  227. return true
  228. })
  229. return arr
  230. case Object:
  231. obj := map[string]interface{}{}
  232. iter.ReadObjectCB(func(Iter *Iterator, field string) bool {
  233. obj[field] = iter.Read()
  234. return true
  235. })
  236. return obj
  237. default:
  238. iter.reportError("Read", fmt.Sprintf("unexpected value type: %v", valueType))
  239. return nil
  240. }
  241. }
  242. // ReadBase64 reads a json object as Base64 in byte slice
  243. func (iter *Iterator) ReadBase64() (ret []byte) {
  244. src := iter.ReadStringAsSlice()
  245. if iter.Error != nil {
  246. return
  247. }
  248. b64 := base64.StdEncoding
  249. ret = make([]byte, b64.DecodedLen(len(src)))
  250. n, err := b64.Decode(ret, src)
  251. if err != nil {
  252. iter.Error = err
  253. return
  254. }
  255. return ret[:n]
  256. }