jsoniter.go 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. package jsoniter
  2. import (
  3. "io"
  4. "fmt"
  5. "unicode/utf16"
  6. )
  7. type Iterator struct {
  8. reader io.Reader
  9. buf []byte
  10. head int
  11. tail int
  12. Error error
  13. }
  14. func Parse(reader io.Reader, bufSize int) *Iterator {
  15. return &Iterator{
  16. reader: reader,
  17. buf: make([]byte, bufSize),
  18. head: 0,
  19. tail: 0,
  20. }
  21. }
  22. func ParseBytes(input []byte) *Iterator {
  23. return &Iterator{
  24. reader: nil,
  25. buf: input,
  26. head: 0,
  27. tail: len(input),
  28. }
  29. }
  30. func ParseString(input string) *Iterator {
  31. return ParseBytes([]byte(input))
  32. }
  33. func (iter *Iterator) ReportError(operation string, msg string) {
  34. iter.Error = fmt.Errorf("%s: %s, parsing %v at %s", operation, msg, iter.head, string(iter.buf[0:iter.tail]))
  35. }
  36. func (iter *Iterator) readByte() (ret byte) {
  37. if iter.head == iter.tail {
  38. if iter.reader == nil {
  39. iter.Error = io.EOF
  40. return
  41. }
  42. n, err := iter.reader.Read(iter.buf)
  43. if err != nil {
  44. iter.Error = err
  45. return
  46. }
  47. if n == 0 {
  48. iter.Error = io.EOF
  49. return
  50. }
  51. iter.head = 0
  52. iter.tail = n
  53. }
  54. ret = iter.buf[iter.head]
  55. iter.head += 1
  56. return ret
  57. }
  58. func (iter *Iterator) unreadByte() {
  59. if iter.head == 0 {
  60. iter.ReportError("unreadByte", "unread too many bytes")
  61. return
  62. }
  63. iter.head -= 1
  64. return
  65. }
  66. const maxUint64 = (1 << 64 - 1)
  67. const cutoffUint64 = maxUint64 / 10 + 1
  68. const maxUint32 = (1 << 32 - 1)
  69. const cutoffUint32 = maxUint32 / 10 + 1
  70. func (iter *Iterator) ReadUint64() (ret uint64) {
  71. c := iter.readByte()
  72. if iter.Error != nil {
  73. return
  74. }
  75. /* a single zero, or a series of integers */
  76. if c == '0' {
  77. return 0
  78. } else if c >= '1' && c <= '9' {
  79. for c >= '0' && c <= '9' {
  80. var v byte
  81. v = c - '0'
  82. if ret >= cutoffUint64 {
  83. iter.ReportError("ReadUint64", "overflow")
  84. return
  85. }
  86. ret = ret * uint64(10) + uint64(v)
  87. c = iter.readByte()
  88. if iter.Error != nil {
  89. if iter.Error == io.EOF {
  90. break
  91. } else {
  92. return 0
  93. }
  94. }
  95. }
  96. if iter.Error != io.EOF {
  97. iter.unreadByte()
  98. }
  99. } else {
  100. iter.ReportError("ReadUint64", "expects 0~9")
  101. return
  102. }
  103. return ret
  104. }
  105. func (iter *Iterator) ReadInt64() (ret int64) {
  106. c := iter.readByte()
  107. if iter.Error != nil {
  108. return
  109. }
  110. /* optional leading minus */
  111. if c == '-' {
  112. n := iter.ReadUint64()
  113. return -int64(n)
  114. } else {
  115. iter.unreadByte()
  116. n := iter.ReadUint64()
  117. return int64(n)
  118. }
  119. }
  120. func (iter *Iterator) ReadString() (ret string) {
  121. str := make([]byte, 0, 10)
  122. c := iter.readByte()
  123. if iter.Error != nil {
  124. return
  125. }
  126. if c != '"' {
  127. iter.ReportError("ReadString", "expects quote")
  128. return
  129. }
  130. for {
  131. c = iter.readByte()
  132. if iter.Error != nil {
  133. return
  134. }
  135. switch c {
  136. case '\\':
  137. c = iter.readByte()
  138. if iter.Error != nil {
  139. return
  140. }
  141. switch c {
  142. case 'u':
  143. r := iter.readU4()
  144. if iter.Error != nil {
  145. return
  146. }
  147. if utf16.IsSurrogate(r) {
  148. c = iter.readByte()
  149. if iter.Error != nil {
  150. return
  151. }
  152. if c != '\\' {
  153. iter.ReportError("ReadString",
  154. `expects \u after utf16 surrogate, but \ not found`)
  155. return
  156. }
  157. c = iter.readByte()
  158. if iter.Error != nil {
  159. return
  160. }
  161. if c != 'u' {
  162. iter.ReportError("ReadString",
  163. `expects \u after utf16 surrogate, but \u not found`)
  164. return
  165. }
  166. r2 := iter.readU4()
  167. if iter.Error != nil {
  168. return
  169. }
  170. combined := utf16.DecodeRune(r, r2)
  171. str = appendRune(str, combined)
  172. } else {
  173. str = appendRune(str, r)
  174. }
  175. case '"':
  176. str = append(str, '"')
  177. case '\\':
  178. str = append(str, '\\')
  179. case '/':
  180. str = append(str, '/')
  181. case 'b':
  182. str = append(str, '\b')
  183. case 'f':
  184. str = append(str, '\f')
  185. case 'n':
  186. str = append(str, '\n')
  187. case 'r':
  188. str = append(str, '\r')
  189. case 't':
  190. str = append(str, '\t')
  191. default:
  192. iter.ReportError("ReadString",
  193. `invalid escape char after \`)
  194. return
  195. }
  196. case '"':
  197. return string(str)
  198. default:
  199. str = append(str, c)
  200. }
  201. }
  202. }
  203. func (iter *Iterator) readU4() (ret rune) {
  204. for i := 0; i < 4; i++ {
  205. c := iter.readByte()
  206. if iter.Error != nil {
  207. return
  208. }
  209. if (c >= '0' && c <= '9') {
  210. if ret >= cutoffUint32 {
  211. iter.ReportError("readU4", "overflow")
  212. return
  213. }
  214. ret = ret * 16 + rune(c - '0')
  215. } else if ((c >= 'a' && c <= 'f') ) {
  216. if ret >= cutoffUint32 {
  217. iter.ReportError("readU4", "overflow")
  218. return
  219. }
  220. ret = ret * 16 + rune(c - 'a' + 10)
  221. } else {
  222. iter.ReportError("readU4", "expects 0~9 or a~f")
  223. return
  224. }
  225. }
  226. return ret
  227. }
  228. const (
  229. t1 = 0x00 // 0000 0000
  230. tx = 0x80 // 1000 0000
  231. t2 = 0xC0 // 1100 0000
  232. t3 = 0xE0 // 1110 0000
  233. t4 = 0xF0 // 1111 0000
  234. t5 = 0xF8 // 1111 1000
  235. maskx = 0x3F // 0011 1111
  236. mask2 = 0x1F // 0001 1111
  237. mask3 = 0x0F // 0000 1111
  238. mask4 = 0x07 // 0000 0111
  239. rune1Max = 1 << 7 - 1
  240. rune2Max = 1 << 11 - 1
  241. rune3Max = 1 << 16 - 1
  242. surrogateMin = 0xD800
  243. surrogateMax = 0xDFFF
  244. MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
  245. RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
  246. )
  247. func appendRune(p []byte, r rune) []byte {
  248. // Negative values are erroneous. Making it unsigned addresses the problem.
  249. switch i := uint32(r); {
  250. case i <= rune1Max:
  251. p = append(p, byte(r))
  252. return p
  253. case i <= rune2Max:
  254. p = append(p, t2 | byte(r >> 6))
  255. p = append(p, tx | byte(r) & maskx)
  256. return p
  257. case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
  258. r = RuneError
  259. fallthrough
  260. case i <= rune3Max:
  261. p = append(p, t3 | byte(r >> 12))
  262. p = append(p, tx | byte(r >> 6) & maskx)
  263. p = append(p, tx | byte(r) & maskx)
  264. return p
  265. default:
  266. p = append(p, t4 | byte(r >> 18))
  267. p = append(p, tx | byte(r >> 12) & maskx)
  268. p = append(p, tx | byte(r >> 6) & maskx)
  269. p = append(p, tx | byte(r) & maskx)
  270. return p
  271. }
  272. }