| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513 |
- /**
- * Copyright 2014 Paul Querna
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
- package v1
- import (
- "fmt"
- "io"
- "unicode"
- "unicode/utf16"
- )
- const sliceStringMask = cIJC | cNFP
- type ffReader struct {
- s []byte
- i int
- l int
- }
- func newffReader(d []byte) *ffReader {
- return &ffReader{
- s: d,
- i: 0,
- l: len(d),
- }
- }
- func (r *ffReader) Slice(start, stop int) []byte {
- return r.s[start:stop]
- }
- func (r *ffReader) Pos() int {
- return r.i
- }
- // Reset the reader, and add new input.
- func (r *ffReader) Reset(d []byte) {
- r.s = d
- r.i = 0
- r.l = len(d)
- }
- // Calcuates the Position with line and line offset,
- // because this isn't counted for performance reasons,
- // it will iterate the buffer from the beginning, and should
- // only be used in error-paths.
- func (r *ffReader) PosWithLine() (int, int) {
- currentLine := 1
- currentChar := 0
- for i := 0; i < r.i; i++ {
- c := r.s[i]
- currentChar++
- if c == '\n' {
- currentLine++
- currentChar = 0
- }
- }
- return currentLine, currentChar
- }
- func (r *ffReader) ReadByteNoWS() (byte, error) {
- if r.i >= r.l {
- return 0, io.EOF
- }
- j := r.i
- for {
- c := r.s[j]
- j++
- // inline whitespace parsing gives another ~8% performance boost
- // for many kinds of nicely indented JSON.
- // ... and using a [255]bool instead of multiple ifs, gives another 2%
- /*
- if c != '\t' &&
- c != '\n' &&
- c != '\v' &&
- c != '\f' &&
- c != '\r' &&
- c != ' ' {
- r.i = j
- return c, nil
- }
- */
- if whitespaceLookupTable[c] == false {
- r.i = j
- return c, nil
- }
- if j >= r.l {
- return 0, io.EOF
- }
- }
- }
- func (r *ffReader) ReadByte() (byte, error) {
- if r.i >= r.l {
- return 0, io.EOF
- }
- r.i++
- return r.s[r.i-1], nil
- }
- func (r *ffReader) UnreadByte() {
- if r.i <= 0 {
- panic("ffReader.UnreadByte: at beginning of slice")
- }
- r.i--
- }
- func (r *ffReader) readU4(j int) (rune, error) {
- var u4 [4]byte
- for i := 0; i < 4; i++ {
- if j >= r.l {
- return -1, io.EOF
- }
- c := r.s[j]
- if byteLookupTable[c]&cVHC != 0 {
- u4[i] = c
- j++
- continue
- } else {
- // TODO(pquerna): handle errors better. layering violation.
- return -1, fmt.Errorf("lex_string_invalid_hex_char: %v %v", c, string(u4[:]))
- }
- }
- // TODO(pquerna): utf16.IsSurrogate
- rr, err := ParseUint(u4[:], 16, 64)
- if err != nil {
- return -1, err
- }
- return rune(rr), nil
- }
- func (r *ffReader) handleEscaped(c byte, j int, out DecodingBuffer) (int, error) {
- if j >= r.l {
- return 0, io.EOF
- }
- c = r.s[j]
- j++
- if c == 'u' {
- ru, err := r.readU4(j)
- if err != nil {
- return 0, err
- }
- if utf16.IsSurrogate(ru) {
- ru2, err := r.readU4(j + 6)
- if err != nil {
- return 0, err
- }
- out.Write(r.s[r.i : j-2])
- r.i = j + 10
- j = r.i
- rval := utf16.DecodeRune(ru, ru2)
- if rval != unicode.ReplacementChar {
- out.WriteRune(rval)
- } else {
- return 0, fmt.Errorf("lex_string_invalid_unicode_surrogate: %v %v", ru, ru2)
- }
- } else {
- out.Write(r.s[r.i : j-2])
- r.i = j + 4
- j = r.i
- out.WriteRune(ru)
- }
- return j, nil
- } else if byteLookupTable[c]&cVEC == 0 {
- return 0, fmt.Errorf("lex_string_invalid_escaped_char: %v", c)
- } else {
- out.Write(r.s[r.i : j-2])
- r.i = j
- j = r.i
- switch c {
- case '"':
- out.WriteByte('"')
- case '\\':
- out.WriteByte('\\')
- case '/':
- out.WriteByte('/')
- case 'b':
- out.WriteByte('\b')
- case 'f':
- out.WriteByte('\f')
- case 'n':
- out.WriteByte('\n')
- case 'r':
- out.WriteByte('\r')
- case 't':
- out.WriteByte('\t')
- }
- }
- return j, nil
- }
- func (r *ffReader) SliceString(out DecodingBuffer) error {
- var c byte
- // TODO(pquerna): string_with_escapes? de-escape here?
- j := r.i
- for {
- if j >= r.l {
- return io.EOF
- }
- j, c = scanString(r.s, j)
- if c == '"' {
- if j != r.i {
- out.Write(r.s[r.i : j-1])
- r.i = j
- }
- return nil
- } else if c == '\\' {
- var err error
- j, err = r.handleEscaped(c, j, out)
- if err != nil {
- return err
- }
- } else if byteLookupTable[c]&cIJC != 0 {
- return fmt.Errorf("lex_string_invalid_json_char: %v", c)
- }
- continue
- }
- panic("ffjson: SliceString unreached exit")
- }
- // TODO(pquerna): consider combining wibth the normal byte mask.
- var whitespaceLookupTable [256]bool = [256]bool{
- false, /* 0 */
- false, /* 1 */
- false, /* 2 */
- false, /* 3 */
- false, /* 4 */
- false, /* 5 */
- false, /* 6 */
- false, /* 7 */
- false, /* 8 */
- true, /* 9 */
- true, /* 10 */
- true, /* 11 */
- true, /* 12 */
- true, /* 13 */
- false, /* 14 */
- false, /* 15 */
- false, /* 16 */
- false, /* 17 */
- false, /* 18 */
- false, /* 19 */
- false, /* 20 */
- false, /* 21 */
- false, /* 22 */
- false, /* 23 */
- false, /* 24 */
- false, /* 25 */
- false, /* 26 */
- false, /* 27 */
- false, /* 28 */
- false, /* 29 */
- false, /* 30 */
- false, /* 31 */
- true, /* 32 */
- false, /* 33 */
- false, /* 34 */
- false, /* 35 */
- false, /* 36 */
- false, /* 37 */
- false, /* 38 */
- false, /* 39 */
- false, /* 40 */
- false, /* 41 */
- false, /* 42 */
- false, /* 43 */
- false, /* 44 */
- false, /* 45 */
- false, /* 46 */
- false, /* 47 */
- false, /* 48 */
- false, /* 49 */
- false, /* 50 */
- false, /* 51 */
- false, /* 52 */
- false, /* 53 */
- false, /* 54 */
- false, /* 55 */
- false, /* 56 */
- false, /* 57 */
- false, /* 58 */
- false, /* 59 */
- false, /* 60 */
- false, /* 61 */
- false, /* 62 */
- false, /* 63 */
- false, /* 64 */
- false, /* 65 */
- false, /* 66 */
- false, /* 67 */
- false, /* 68 */
- false, /* 69 */
- false, /* 70 */
- false, /* 71 */
- false, /* 72 */
- false, /* 73 */
- false, /* 74 */
- false, /* 75 */
- false, /* 76 */
- false, /* 77 */
- false, /* 78 */
- false, /* 79 */
- false, /* 80 */
- false, /* 81 */
- false, /* 82 */
- false, /* 83 */
- false, /* 84 */
- false, /* 85 */
- false, /* 86 */
- false, /* 87 */
- false, /* 88 */
- false, /* 89 */
- false, /* 90 */
- false, /* 91 */
- false, /* 92 */
- false, /* 93 */
- false, /* 94 */
- false, /* 95 */
- false, /* 96 */
- false, /* 97 */
- false, /* 98 */
- false, /* 99 */
- false, /* 100 */
- false, /* 101 */
- false, /* 102 */
- false, /* 103 */
- false, /* 104 */
- false, /* 105 */
- false, /* 106 */
- false, /* 107 */
- false, /* 108 */
- false, /* 109 */
- false, /* 110 */
- false, /* 111 */
- false, /* 112 */
- false, /* 113 */
- false, /* 114 */
- false, /* 115 */
- false, /* 116 */
- false, /* 117 */
- false, /* 118 */
- false, /* 119 */
- false, /* 120 */
- false, /* 121 */
- false, /* 122 */
- false, /* 123 */
- false, /* 124 */
- false, /* 125 */
- false, /* 126 */
- false, /* 127 */
- false, /* 128 */
- false, /* 129 */
- false, /* 130 */
- false, /* 131 */
- false, /* 132 */
- false, /* 133 */
- false, /* 134 */
- false, /* 135 */
- false, /* 136 */
- false, /* 137 */
- false, /* 138 */
- false, /* 139 */
- false, /* 140 */
- false, /* 141 */
- false, /* 142 */
- false, /* 143 */
- false, /* 144 */
- false, /* 145 */
- false, /* 146 */
- false, /* 147 */
- false, /* 148 */
- false, /* 149 */
- false, /* 150 */
- false, /* 151 */
- false, /* 152 */
- false, /* 153 */
- false, /* 154 */
- false, /* 155 */
- false, /* 156 */
- false, /* 157 */
- false, /* 158 */
- false, /* 159 */
- false, /* 160 */
- false, /* 161 */
- false, /* 162 */
- false, /* 163 */
- false, /* 164 */
- false, /* 165 */
- false, /* 166 */
- false, /* 167 */
- false, /* 168 */
- false, /* 169 */
- false, /* 170 */
- false, /* 171 */
- false, /* 172 */
- false, /* 173 */
- false, /* 174 */
- false, /* 175 */
- false, /* 176 */
- false, /* 177 */
- false, /* 178 */
- false, /* 179 */
- false, /* 180 */
- false, /* 181 */
- false, /* 182 */
- false, /* 183 */
- false, /* 184 */
- false, /* 185 */
- false, /* 186 */
- false, /* 187 */
- false, /* 188 */
- false, /* 189 */
- false, /* 190 */
- false, /* 191 */
- false, /* 192 */
- false, /* 193 */
- false, /* 194 */
- false, /* 195 */
- false, /* 196 */
- false, /* 197 */
- false, /* 198 */
- false, /* 199 */
- false, /* 200 */
- false, /* 201 */
- false, /* 202 */
- false, /* 203 */
- false, /* 204 */
- false, /* 205 */
- false, /* 206 */
- false, /* 207 */
- false, /* 208 */
- false, /* 209 */
- false, /* 210 */
- false, /* 211 */
- false, /* 212 */
- false, /* 213 */
- false, /* 214 */
- false, /* 215 */
- false, /* 216 */
- false, /* 217 */
- false, /* 218 */
- false, /* 219 */
- false, /* 220 */
- false, /* 221 */
- false, /* 222 */
- false, /* 223 */
- false, /* 224 */
- false, /* 225 */
- false, /* 226 */
- false, /* 227 */
- false, /* 228 */
- false, /* 229 */
- false, /* 230 */
- false, /* 231 */
- false, /* 232 */
- false, /* 233 */
- false, /* 234 */
- false, /* 235 */
- false, /* 236 */
- false, /* 237 */
- false, /* 238 */
- false, /* 239 */
- false, /* 240 */
- false, /* 241 */
- false, /* 242 */
- false, /* 243 */
- false, /* 244 */
- false, /* 245 */
- false, /* 246 */
- false, /* 247 */
- false, /* 248 */
- false, /* 249 */
- false, /* 250 */
- false, /* 251 */
- false, /* 252 */
- false, /* 253 */
- false, /* 254 */
- false, /* 255 */
- }
|