jsoniter.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. package jsoniter
  2. import (
  3. "io"
  4. "fmt"
  5. "unicode/utf16"
  6. )
  7. type Iterator struct {
  8. reader io.Reader
  9. buf []byte
  10. head int
  11. tail int
  12. Error error
  13. }
  14. func Parse(reader io.Reader, bufSize int) *Iterator {
  15. iter := &Iterator{
  16. reader: reader,
  17. buf: make([]byte, bufSize),
  18. head: 0,
  19. tail: 0,
  20. }
  21. iter.skipWhitespaces()
  22. return iter
  23. }
  24. func ParseBytes(input []byte) *Iterator {
  25. iter := &Iterator{
  26. reader: nil,
  27. buf: input,
  28. head: 0,
  29. tail: len(input),
  30. }
  31. iter.skipWhitespaces()
  32. return iter
  33. }
  34. func ParseString(input string) *Iterator {
  35. return ParseBytes([]byte(input))
  36. }
  37. func (iter *Iterator) skipWhitespaces() {
  38. c := iter.readByte()
  39. for c == ' ' {
  40. c = iter.readByte()
  41. }
  42. iter.unreadByte()
  43. }
  44. func (iter *Iterator) ReportError(operation string, msg string) {
  45. iter.Error = fmt.Errorf("%s: %s, parsing %v at %s", operation, msg, iter.head, string(iter.buf[0:iter.tail]))
  46. }
  47. func (iter *Iterator) readByte() (ret byte) {
  48. if iter.head == iter.tail {
  49. if iter.reader == nil {
  50. iter.Error = io.EOF
  51. return
  52. }
  53. n, err := iter.reader.Read(iter.buf)
  54. if err != nil {
  55. iter.Error = err
  56. return
  57. }
  58. if n == 0 {
  59. iter.Error = io.EOF
  60. return
  61. }
  62. iter.head = 0
  63. iter.tail = n
  64. }
  65. ret = iter.buf[iter.head]
  66. iter.head += 1
  67. return ret
  68. }
  69. func (iter *Iterator) unreadByte() {
  70. if iter.head == 0 {
  71. iter.ReportError("unreadByte", "unread too many bytes")
  72. return
  73. }
  74. iter.head -= 1
  75. return
  76. }
  77. const maxUint64 = (1 << 64 - 1)
  78. const cutoffUint64 = maxUint64 / 10 + 1
  79. const maxUint32 = (1 << 32 - 1)
  80. const cutoffUint32 = maxUint32 / 10 + 1
  81. func (iter *Iterator) ReadUint64() (ret uint64) {
  82. c := iter.readByte()
  83. if iter.Error != nil {
  84. return
  85. }
  86. /* a single zero, or a series of integers */
  87. if c == '0' {
  88. return 0
  89. } else if c >= '1' && c <= '9' {
  90. for c >= '0' && c <= '9' {
  91. var v byte
  92. v = c - '0'
  93. if ret >= cutoffUint64 {
  94. iter.ReportError("ReadUint64", "overflow")
  95. return
  96. }
  97. ret = ret * uint64(10) + uint64(v)
  98. c = iter.readByte()
  99. if iter.Error != nil {
  100. if iter.Error == io.EOF {
  101. break
  102. } else {
  103. return 0
  104. }
  105. }
  106. }
  107. if iter.Error != io.EOF {
  108. iter.unreadByte()
  109. }
  110. } else {
  111. iter.ReportError("ReadUint64", "expects 0~9")
  112. return
  113. }
  114. return ret
  115. }
  116. func (iter *Iterator) ReadInt64() (ret int64) {
  117. c := iter.readByte()
  118. if iter.Error != nil {
  119. return
  120. }
  121. /* optional leading minus */
  122. if c == '-' {
  123. n := iter.ReadUint64()
  124. return -int64(n)
  125. } else {
  126. iter.unreadByte()
  127. n := iter.ReadUint64()
  128. return int64(n)
  129. }
  130. }
  131. func (iter *Iterator) ReadString() (ret string) {
  132. str := make([]byte, 0, 10)
  133. c := iter.readByte()
  134. if iter.Error != nil {
  135. return
  136. }
  137. if c != '"' {
  138. iter.ReportError("ReadString", "expects quote")
  139. return
  140. }
  141. for {
  142. c = iter.readByte()
  143. if iter.Error != nil {
  144. return
  145. }
  146. switch c {
  147. case '\\':
  148. c = iter.readByte()
  149. if iter.Error != nil {
  150. return
  151. }
  152. switch c {
  153. case 'u':
  154. r := iter.readU4()
  155. if iter.Error != nil {
  156. return
  157. }
  158. if utf16.IsSurrogate(r) {
  159. c = iter.readByte()
  160. if iter.Error != nil {
  161. return
  162. }
  163. if c != '\\' {
  164. iter.ReportError("ReadString",
  165. `expects \u after utf16 surrogate, but \ not found`)
  166. return
  167. }
  168. c = iter.readByte()
  169. if iter.Error != nil {
  170. return
  171. }
  172. if c != 'u' {
  173. iter.ReportError("ReadString",
  174. `expects \u after utf16 surrogate, but \u not found`)
  175. return
  176. }
  177. r2 := iter.readU4()
  178. if iter.Error != nil {
  179. return
  180. }
  181. combined := utf16.DecodeRune(r, r2)
  182. str = appendRune(str, combined)
  183. } else {
  184. str = appendRune(str, r)
  185. }
  186. case '"':
  187. str = append(str, '"')
  188. case '\\':
  189. str = append(str, '\\')
  190. case '/':
  191. str = append(str, '/')
  192. case 'b':
  193. str = append(str, '\b')
  194. case 'f':
  195. str = append(str, '\f')
  196. case 'n':
  197. str = append(str, '\n')
  198. case 'r':
  199. str = append(str, '\r')
  200. case 't':
  201. str = append(str, '\t')
  202. default:
  203. iter.ReportError("ReadString",
  204. `invalid escape char after \`)
  205. return
  206. }
  207. case '"':
  208. return string(str)
  209. default:
  210. str = append(str, c)
  211. }
  212. }
  213. }
  214. func (iter *Iterator) readU4() (ret rune) {
  215. for i := 0; i < 4; i++ {
  216. c := iter.readByte()
  217. if iter.Error != nil {
  218. return
  219. }
  220. if (c >= '0' && c <= '9') {
  221. if ret >= cutoffUint32 {
  222. iter.ReportError("readU4", "overflow")
  223. return
  224. }
  225. ret = ret * 16 + rune(c - '0')
  226. } else if ((c >= 'a' && c <= 'f') ) {
  227. if ret >= cutoffUint32 {
  228. iter.ReportError("readU4", "overflow")
  229. return
  230. }
  231. ret = ret * 16 + rune(c - 'a' + 10)
  232. } else {
  233. iter.ReportError("readU4", "expects 0~9 or a~f")
  234. return
  235. }
  236. }
  237. return ret
  238. }
  239. const (
  240. t1 = 0x00 // 0000 0000
  241. tx = 0x80 // 1000 0000
  242. t2 = 0xC0 // 1100 0000
  243. t3 = 0xE0 // 1110 0000
  244. t4 = 0xF0 // 1111 0000
  245. t5 = 0xF8 // 1111 1000
  246. maskx = 0x3F // 0011 1111
  247. mask2 = 0x1F // 0001 1111
  248. mask3 = 0x0F // 0000 1111
  249. mask4 = 0x07 // 0000 0111
  250. rune1Max = 1 << 7 - 1
  251. rune2Max = 1 << 11 - 1
  252. rune3Max = 1 << 16 - 1
  253. surrogateMin = 0xD800
  254. surrogateMax = 0xDFFF
  255. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  256. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  257. )
  258. func appendRune(p []byte, r rune) []byte {
  259. // Negative values are erroneous. Making it unsigned addresses the problem.
  260. switch i := uint32(r); {
  261. case i <= rune1Max:
  262. p = append(p, byte(r))
  263. return p
  264. case i <= rune2Max:
  265. p = append(p, t2 | byte(r >> 6))
  266. p = append(p, tx | byte(r) & maskx)
  267. return p
  268. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  269. r = RuneError
  270. fallthrough
  271. case i <= rune3Max:
  272. p = append(p, t3 | byte(r >> 12))
  273. p = append(p, tx | byte(r >> 6) & maskx)
  274. p = append(p, tx | byte(r) & maskx)
  275. return p
  276. default:
  277. p = append(p, t4 | byte(r >> 18))
  278. p = append(p, tx | byte(r >> 12) & maskx)
  279. p = append(p, tx | byte(r >> 6) & maskx)
  280. p = append(p, tx | byte(r) & maskx)
  281. return p
  282. }
  283. }
  284. func (iter *Iterator) ReadArray() (ret bool) {
  285. iter.skipWhitespaces()
  286. c := iter.readByte()
  287. if iter.Error != nil {
  288. return
  289. }
  290. switch c {
  291. case '[': {
  292. iter.skipWhitespaces()
  293. c = iter.readByte()
  294. if iter.Error != nil {
  295. return
  296. }
  297. if c == ']' {
  298. return false
  299. } else {
  300. iter.unreadByte()
  301. return true
  302. }
  303. }
  304. case ']': return false
  305. case ',': return true
  306. default:
  307. iter.ReportError("ReadArray", "expect [ or , or ]")
  308. return
  309. }
  310. }
  311. func (iter *Iterator) ReadObject() (ret string) {
  312. iter.skipWhitespaces()
  313. c := iter.readByte()
  314. if iter.Error != nil {
  315. return
  316. }
  317. switch c {
  318. case '{': {
  319. iter.skipWhitespaces()
  320. c = iter.readByte()
  321. if iter.Error != nil {
  322. return
  323. }
  324. switch c {
  325. case '}':
  326. return "" // end of object
  327. case '"':
  328. iter.unreadByte()
  329. field := iter.readObjectField()
  330. if iter.Error != nil {
  331. return
  332. }
  333. return field
  334. default:
  335. iter.ReportError("ReadObject", `expect " after {`)
  336. return
  337. }
  338. }
  339. case ',':
  340. iter.skipWhitespaces()
  341. field := iter.readObjectField()
  342. if iter.Error != nil {
  343. return
  344. }
  345. return field
  346. case '}':
  347. return "" // end of object
  348. default:
  349. iter.ReportError("ReadObject", `expect { or , or }`)
  350. return
  351. }
  352. }
  353. func (iter *Iterator) readObjectField() (ret string) {
  354. field := iter.ReadString()
  355. if iter.Error != nil {
  356. return
  357. }
  358. iter.skipWhitespaces()
  359. c := iter.readByte()
  360. if iter.Error != nil {
  361. return
  362. }
  363. if c != ':' {
  364. iter.ReportError("ReadObject", "expect : after object field")
  365. return
  366. }
  367. iter.skipWhitespaces()
  368. return field
  369. }