reader.go 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. /**
  2. * Copyright 2014 Paul Querna
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. *
  16. */
  17. package v1
  18. import (
  19. "fmt"
  20. "io"
  21. "unicode"
  22. "unicode/utf16"
  23. )
  24. const sliceStringMask = cIJC | cNFP
  25. type ffReader struct {
  26. s []byte
  27. i int
  28. l int
  29. }
  30. func newffReader(d []byte) *ffReader {
  31. return &ffReader{
  32. s: d,
  33. i: 0,
  34. l: len(d),
  35. }
  36. }
  37. func (r *ffReader) Slice(start, stop int) []byte {
  38. return r.s[start:stop]
  39. }
  40. func (r *ffReader) Pos() int {
  41. return r.i
  42. }
  43. // Reset the reader, and add new input.
  44. func (r *ffReader) Reset(d []byte) {
  45. r.s = d
  46. r.i = 0
  47. r.l = len(d)
  48. }
  49. // Calcuates the Position with line and line offset,
  50. // because this isn't counted for performance reasons,
  51. // it will iterate the buffer from the beginning, and should
  52. // only be used in error-paths.
  53. func (r *ffReader) PosWithLine() (int, int) {
  54. currentLine := 1
  55. currentChar := 0
  56. for i := 0; i < r.i; i++ {
  57. c := r.s[i]
  58. currentChar++
  59. if c == '\n' {
  60. currentLine++
  61. currentChar = 0
  62. }
  63. }
  64. return currentLine, currentChar
  65. }
  66. func (r *ffReader) ReadByteNoWS() (byte, error) {
  67. if r.i >= r.l {
  68. return 0, io.EOF
  69. }
  70. j := r.i
  71. for {
  72. c := r.s[j]
  73. j++
  74. // inline whitespace parsing gives another ~8% performance boost
  75. // for many kinds of nicely indented JSON.
  76. // ... and using a [255]bool instead of multiple ifs, gives another 2%
  77. /*
  78. if c != '\t' &&
  79. c != '\n' &&
  80. c != '\v' &&
  81. c != '\f' &&
  82. c != '\r' &&
  83. c != ' ' {
  84. r.i = j
  85. return c, nil
  86. }
  87. */
  88. if whitespaceLookupTable[c] == false {
  89. r.i = j
  90. return c, nil
  91. }
  92. if j >= r.l {
  93. return 0, io.EOF
  94. }
  95. }
  96. }
  97. func (r *ffReader) ReadByte() (byte, error) {
  98. if r.i >= r.l {
  99. return 0, io.EOF
  100. }
  101. r.i++
  102. return r.s[r.i-1], nil
  103. }
  104. func (r *ffReader) UnreadByte() {
  105. if r.i <= 0 {
  106. panic("ffReader.UnreadByte: at beginning of slice")
  107. }
  108. r.i--
  109. }
  110. func (r *ffReader) readU4(j int) (rune, error) {
  111. var u4 [4]byte
  112. for i := 0; i < 4; i++ {
  113. if j >= r.l {
  114. return -1, io.EOF
  115. }
  116. c := r.s[j]
  117. if byteLookupTable[c]&cVHC != 0 {
  118. u4[i] = c
  119. j++
  120. continue
  121. } else {
  122. // TODO(pquerna): handle errors better. layering violation.
  123. return -1, fmt.Errorf("lex_string_invalid_hex_char: %v %v", c, string(u4[:]))
  124. }
  125. }
  126. // TODO(pquerna): utf16.IsSurrogate
  127. rr, err := ParseUint(u4[:], 16, 64)
  128. if err != nil {
  129. return -1, err
  130. }
  131. return rune(rr), nil
  132. }
  133. func (r *ffReader) handleEscaped(c byte, j int, out DecodingBuffer) (int, error) {
  134. if j >= r.l {
  135. return 0, io.EOF
  136. }
  137. c = r.s[j]
  138. j++
  139. if c == 'u' {
  140. ru, err := r.readU4(j)
  141. if err != nil {
  142. return 0, err
  143. }
  144. if utf16.IsSurrogate(ru) {
  145. ru2, err := r.readU4(j + 6)
  146. if err != nil {
  147. return 0, err
  148. }
  149. out.Write(r.s[r.i : j-2])
  150. r.i = j + 10
  151. j = r.i
  152. rval := utf16.DecodeRune(ru, ru2)
  153. if rval != unicode.ReplacementChar {
  154. out.WriteRune(rval)
  155. } else {
  156. return 0, fmt.Errorf("lex_string_invalid_unicode_surrogate: %v %v", ru, ru2)
  157. }
  158. } else {
  159. out.Write(r.s[r.i : j-2])
  160. r.i = j + 4
  161. j = r.i
  162. out.WriteRune(ru)
  163. }
  164. return j, nil
  165. } else if byteLookupTable[c]&cVEC == 0 {
  166. return 0, fmt.Errorf("lex_string_invalid_escaped_char: %v", c)
  167. } else {
  168. out.Write(r.s[r.i : j-2])
  169. r.i = j
  170. j = r.i
  171. switch c {
  172. case '"':
  173. out.WriteByte('"')
  174. case '\\':
  175. out.WriteByte('\\')
  176. case '/':
  177. out.WriteByte('/')
  178. case 'b':
  179. out.WriteByte('\b')
  180. case 'f':
  181. out.WriteByte('\f')
  182. case 'n':
  183. out.WriteByte('\n')
  184. case 'r':
  185. out.WriteByte('\r')
  186. case 't':
  187. out.WriteByte('\t')
  188. }
  189. }
  190. return j, nil
  191. }
  192. func (r *ffReader) SliceString(out DecodingBuffer) error {
  193. var c byte
  194. // TODO(pquerna): string_with_escapes? de-escape here?
  195. j := r.i
  196. for {
  197. if j >= r.l {
  198. return io.EOF
  199. }
  200. j, c = scanString(r.s, j)
  201. if c == '"' {
  202. if j != r.i {
  203. out.Write(r.s[r.i : j-1])
  204. r.i = j
  205. }
  206. return nil
  207. } else if c == '\\' {
  208. var err error
  209. j, err = r.handleEscaped(c, j, out)
  210. if err != nil {
  211. return err
  212. }
  213. } else if byteLookupTable[c]&cIJC != 0 {
  214. return fmt.Errorf("lex_string_invalid_json_char: %v", c)
  215. }
  216. continue
  217. }
  218. panic("ffjson: SliceString unreached exit")
  219. }
  220. // TODO(pquerna): consider combining wibth the normal byte mask.
  221. var whitespaceLookupTable [256]bool = [256]bool{
  222. false, /* 0 */
  223. false, /* 1 */
  224. false, /* 2 */
  225. false, /* 3 */
  226. false, /* 4 */
  227. false, /* 5 */
  228. false, /* 6 */
  229. false, /* 7 */
  230. false, /* 8 */
  231. true, /* 9 */
  232. true, /* 10 */
  233. true, /* 11 */
  234. true, /* 12 */
  235. true, /* 13 */
  236. false, /* 14 */
  237. false, /* 15 */
  238. false, /* 16 */
  239. false, /* 17 */
  240. false, /* 18 */
  241. false, /* 19 */
  242. false, /* 20 */
  243. false, /* 21 */
  244. false, /* 22 */
  245. false, /* 23 */
  246. false, /* 24 */
  247. false, /* 25 */
  248. false, /* 26 */
  249. false, /* 27 */
  250. false, /* 28 */
  251. false, /* 29 */
  252. false, /* 30 */
  253. false, /* 31 */
  254. true, /* 32 */
  255. false, /* 33 */
  256. false, /* 34 */
  257. false, /* 35 */
  258. false, /* 36 */
  259. false, /* 37 */
  260. false, /* 38 */
  261. false, /* 39 */
  262. false, /* 40 */
  263. false, /* 41 */
  264. false, /* 42 */
  265. false, /* 43 */
  266. false, /* 44 */
  267. false, /* 45 */
  268. false, /* 46 */
  269. false, /* 47 */
  270. false, /* 48 */
  271. false, /* 49 */
  272. false, /* 50 */
  273. false, /* 51 */
  274. false, /* 52 */
  275. false, /* 53 */
  276. false, /* 54 */
  277. false, /* 55 */
  278. false, /* 56 */
  279. false, /* 57 */
  280. false, /* 58 */
  281. false, /* 59 */
  282. false, /* 60 */
  283. false, /* 61 */
  284. false, /* 62 */
  285. false, /* 63 */
  286. false, /* 64 */
  287. false, /* 65 */
  288. false, /* 66 */
  289. false, /* 67 */
  290. false, /* 68 */
  291. false, /* 69 */
  292. false, /* 70 */
  293. false, /* 71 */
  294. false, /* 72 */
  295. false, /* 73 */
  296. false, /* 74 */
  297. false, /* 75 */
  298. false, /* 76 */
  299. false, /* 77 */
  300. false, /* 78 */
  301. false, /* 79 */
  302. false, /* 80 */
  303. false, /* 81 */
  304. false, /* 82 */
  305. false, /* 83 */
  306. false, /* 84 */
  307. false, /* 85 */
  308. false, /* 86 */
  309. false, /* 87 */
  310. false, /* 88 */
  311. false, /* 89 */
  312. false, /* 90 */
  313. false, /* 91 */
  314. false, /* 92 */
  315. false, /* 93 */
  316. false, /* 94 */
  317. false, /* 95 */
  318. false, /* 96 */
  319. false, /* 97 */
  320. false, /* 98 */
  321. false, /* 99 */
  322. false, /* 100 */
  323. false, /* 101 */
  324. false, /* 102 */
  325. false, /* 103 */
  326. false, /* 104 */
  327. false, /* 105 */
  328. false, /* 106 */
  329. false, /* 107 */
  330. false, /* 108 */
  331. false, /* 109 */
  332. false, /* 110 */
  333. false, /* 111 */
  334. false, /* 112 */
  335. false, /* 113 */
  336. false, /* 114 */
  337. false, /* 115 */
  338. false, /* 116 */
  339. false, /* 117 */
  340. false, /* 118 */
  341. false, /* 119 */
  342. false, /* 120 */
  343. false, /* 121 */
  344. false, /* 122 */
  345. false, /* 123 */
  346. false, /* 124 */
  347. false, /* 125 */
  348. false, /* 126 */
  349. false, /* 127 */
  350. false, /* 128 */
  351. false, /* 129 */
  352. false, /* 130 */
  353. false, /* 131 */
  354. false, /* 132 */
  355. false, /* 133 */
  356. false, /* 134 */
  357. false, /* 135 */
  358. false, /* 136 */
  359. false, /* 137 */
  360. false, /* 138 */
  361. false, /* 139 */
  362. false, /* 140 */
  363. false, /* 141 */
  364. false, /* 142 */
  365. false, /* 143 */
  366. false, /* 144 */
  367. false, /* 145 */
  368. false, /* 146 */
  369. false, /* 147 */
  370. false, /* 148 */
  371. false, /* 149 */
  372. false, /* 150 */
  373. false, /* 151 */
  374. false, /* 152 */
  375. false, /* 153 */
  376. false, /* 154 */
  377. false, /* 155 */
  378. false, /* 156 */
  379. false, /* 157 */
  380. false, /* 158 */
  381. false, /* 159 */
  382. false, /* 160 */
  383. false, /* 161 */
  384. false, /* 162 */
  385. false, /* 163 */
  386. false, /* 164 */
  387. false, /* 165 */
  388. false, /* 166 */
  389. false, /* 167 */
  390. false, /* 168 */
  391. false, /* 169 */
  392. false, /* 170 */
  393. false, /* 171 */
  394. false, /* 172 */
  395. false, /* 173 */
  396. false, /* 174 */
  397. false, /* 175 */
  398. false, /* 176 */
  399. false, /* 177 */
  400. false, /* 178 */
  401. false, /* 179 */
  402. false, /* 180 */
  403. false, /* 181 */
  404. false, /* 182 */
  405. false, /* 183 */
  406. false, /* 184 */
  407. false, /* 185 */
  408. false, /* 186 */
  409. false, /* 187 */
  410. false, /* 188 */
  411. false, /* 189 */
  412. false, /* 190 */
  413. false, /* 191 */
  414. false, /* 192 */
  415. false, /* 193 */
  416. false, /* 194 */
  417. false, /* 195 */
  418. false, /* 196 */
  419. false, /* 197 */
  420. false, /* 198 */
  421. false, /* 199 */
  422. false, /* 200 */
  423. false, /* 201 */
  424. false, /* 202 */
  425. false, /* 203 */
  426. false, /* 204 */
  427. false, /* 205 */
  428. false, /* 206 */
  429. false, /* 207 */
  430. false, /* 208 */
  431. false, /* 209 */
  432. false, /* 210 */
  433. false, /* 211 */
  434. false, /* 212 */
  435. false, /* 213 */
  436. false, /* 214 */
  437. false, /* 215 */
  438. false, /* 216 */
  439. false, /* 217 */
  440. false, /* 218 */
  441. false, /* 219 */
  442. false, /* 220 */
  443. false, /* 221 */
  444. false, /* 222 */
  445. false, /* 223 */
  446. false, /* 224 */
  447. false, /* 225 */
  448. false, /* 226 */
  449. false, /* 227 */
  450. false, /* 228 */
  451. false, /* 229 */
  452. false, /* 230 */
  453. false, /* 231 */
  454. false, /* 232 */
  455. false, /* 233 */
  456. false, /* 234 */
  457. false, /* 235 */
  458. false, /* 236 */
  459. false, /* 237 */
  460. false, /* 238 */
  461. false, /* 239 */
  462. false, /* 240 */
  463. false, /* 241 */
  464. false, /* 242 */
  465. false, /* 243 */
  466. false, /* 244 */
  467. false, /* 245 */
  468. false, /* 246 */
  469. false, /* 247 */
  470. false, /* 248 */
  471. false, /* 249 */
  472. false, /* 250 */
  473. false, /* 251 */
  474. false, /* 252 */
  475. false, /* 253 */
  476. false, /* 254 */
  477. false, /* 255 */
  478. }