decode.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "bytes"
  7. "fmt"
  8. "io"
  9. "regexp"
  10. "strconv"
  11. "unicode/utf8"
  12. "google.golang.org/protobuf/internal/errors"
  13. )
  14. // call specifies which Decoder method was invoked.
  15. type call uint8
  16. const (
  17. readCall call = iota
  18. peekCall
  19. )
  20. // Decoder is a token-based JSON decoder.
  21. type Decoder struct {
  22. // lastCall is last method called, either readCall or peekCall.
  23. // Initial value is readCall.
  24. lastCall call
  25. // value contains the last read value.
  26. value Value
  27. // err contains the last read error.
  28. err error
  29. // startStack is a stack containing StartObject and StartArray types. The
  30. // top of stack represents the object or the array the current value is
  31. // directly located in.
  32. startStack []Type
  33. // orig is used in reporting line and column.
  34. orig []byte
  35. // in contains the unconsumed input.
  36. in []byte
  37. }
  38. // NewDecoder returns a Decoder to read the given []byte.
  39. func NewDecoder(b []byte) *Decoder {
  40. return &Decoder{orig: b, in: b}
  41. }
  42. // Peek looks ahead and returns the next JSON type without advancing a read.
  43. func (d *Decoder) Peek() Type {
  44. defer func() { d.lastCall = peekCall }()
  45. if d.lastCall == readCall {
  46. d.value, d.err = d.Read()
  47. }
  48. return d.value.typ
  49. }
  50. // Read returns the next JSON value. It will return an error if there is no
  51. // valid value. For String types containing invalid UTF8 characters, a non-fatal
  52. // error is returned and caller can call Read for the next value.
  53. func (d *Decoder) Read() (Value, error) {
  54. defer func() { d.lastCall = readCall }()
  55. if d.lastCall == peekCall {
  56. return d.value, d.err
  57. }
  58. var nerr errors.NonFatal
  59. value, err := d.parseNext()
  60. if !nerr.Merge(err) {
  61. return Value{}, err
  62. }
  63. n := value.size
  64. switch value.typ {
  65. case EOF:
  66. if len(d.startStack) != 0 ||
  67. d.value.typ&Null|Bool|Number|String|EndObject|EndArray == 0 {
  68. return Value{}, io.ErrUnexpectedEOF
  69. }
  70. case Null:
  71. if !d.isValueNext() {
  72. return Value{}, d.newSyntaxError("unexpected value null")
  73. }
  74. case Bool, Number:
  75. if !d.isValueNext() {
  76. return Value{}, d.newSyntaxError("unexpected value %v", value.Raw())
  77. }
  78. case String:
  79. if d.isValueNext() {
  80. break
  81. }
  82. // Check if this is for an object name.
  83. if d.value.typ&(StartObject|comma) == 0 {
  84. return Value{}, d.newSyntaxError("unexpected value %v", value.Raw())
  85. }
  86. d.in = d.in[n:]
  87. d.consume(0)
  88. if c := d.in[0]; c != ':' {
  89. return Value{}, d.newSyntaxError(`unexpected character %v, missing ":" after object name`, string(c))
  90. }
  91. n = 1
  92. value.typ = Name
  93. case StartObject, StartArray:
  94. if !d.isValueNext() {
  95. return Value{}, d.newSyntaxError("unexpected character %v", value.Raw())
  96. }
  97. d.startStack = append(d.startStack, value.typ)
  98. case EndObject:
  99. if len(d.startStack) == 0 ||
  100. d.value.typ == comma ||
  101. d.startStack[len(d.startStack)-1] != StartObject {
  102. return Value{}, d.newSyntaxError("unexpected character }")
  103. }
  104. d.startStack = d.startStack[:len(d.startStack)-1]
  105. case EndArray:
  106. if len(d.startStack) == 0 ||
  107. d.value.typ == comma ||
  108. d.startStack[len(d.startStack)-1] != StartArray {
  109. return Value{}, d.newSyntaxError("unexpected character ]")
  110. }
  111. d.startStack = d.startStack[:len(d.startStack)-1]
  112. case comma:
  113. if len(d.startStack) == 0 ||
  114. d.value.typ&(Null|Bool|Number|String|EndObject|EndArray) == 0 {
  115. return Value{}, d.newSyntaxError("unexpected character ,")
  116. }
  117. }
  118. // Update d.value only after validating value to be in the right sequence.
  119. d.value = value
  120. d.in = d.in[n:]
  121. if d.value.typ == comma {
  122. return d.Read()
  123. }
  124. return value, nerr.E
  125. }
  126. // Any sequence that looks like a non-delimiter (for error reporting).
  127. var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
  128. // parseNext parses for the next JSON value. It returns a Value object for
  129. // different types, except for Name. It does not handle whether the next value
  130. // is in a valid sequence or not.
  131. func (d *Decoder) parseNext() (value Value, err error) {
  132. // Trim leading spaces.
  133. d.consume(0)
  134. in := d.in
  135. if len(in) == 0 {
  136. return d.newValue(EOF, nil, 0), nil
  137. }
  138. switch in[0] {
  139. case 'n':
  140. n := matchWithDelim("null", in)
  141. if n == 0 {
  142. return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
  143. }
  144. return d.newValue(Null, in, n), nil
  145. case 't':
  146. n := matchWithDelim("true", in)
  147. if n == 0 {
  148. return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
  149. }
  150. return d.newBoolValue(in, n, true), nil
  151. case 'f':
  152. n := matchWithDelim("false", in)
  153. if n == 0 {
  154. return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
  155. }
  156. return d.newBoolValue(in, n, false), nil
  157. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  158. n, ok := consumeNumber(in)
  159. if !ok {
  160. return Value{}, d.newSyntaxError("invalid number %s", errRegexp.Find(in))
  161. }
  162. return d.newValue(Number, in, n), nil
  163. case '"':
  164. var nerr errors.NonFatal
  165. s, n, err := d.parseString(in)
  166. if !nerr.Merge(err) {
  167. return Value{}, err
  168. }
  169. return d.newStringValue(in, n, s), nerr.E
  170. case '{':
  171. return d.newValue(StartObject, in, 1), nil
  172. case '}':
  173. return d.newValue(EndObject, in, 1), nil
  174. case '[':
  175. return d.newValue(StartArray, in, 1), nil
  176. case ']':
  177. return d.newValue(EndArray, in, 1), nil
  178. case ',':
  179. return d.newValue(comma, in, 1), nil
  180. }
  181. return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
  182. }
  183. // position returns line and column number of index in given orig slice.
  184. func position(orig []byte, idx int) (int, int) {
  185. b := orig[:idx]
  186. line := bytes.Count(b, []byte("\n")) + 1
  187. if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
  188. b = b[i+1:]
  189. }
  190. column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
  191. return line, column
  192. }
  193. // newSyntaxError returns an error with line and column information useful for
  194. // syntax errors.
  195. func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
  196. e := errors.New(f, x...)
  197. line, column := position(d.orig, len(d.orig)-len(d.in))
  198. return errors.New("syntax error (line %d:%d): %v", line, column, e)
  199. }
  200. // matchWithDelim matches s with the input b and verifies that the match
  201. // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
  202. // As a special case, EOF is considered a delimiter. It returns the length of s
  203. // if there is a match, else 0.
  204. func matchWithDelim(s string, b []byte) int {
  205. if !bytes.HasPrefix(b, []byte(s)) {
  206. return 0
  207. }
  208. n := len(s)
  209. if n < len(b) && isNotDelim(b[n]) {
  210. return 0
  211. }
  212. return n
  213. }
  214. // isNotDelim returns true if given byte is a not delimiter character.
  215. func isNotDelim(c byte) bool {
  216. return (c == '-' || c == '+' || c == '.' || c == '_' ||
  217. ('a' <= c && c <= 'z') ||
  218. ('A' <= c && c <= 'Z') ||
  219. ('0' <= c && c <= '9'))
  220. }
  221. // consume consumes n bytes of input and any subsequent whitespace.
  222. func (d *Decoder) consume(n int) {
  223. d.in = d.in[n:]
  224. for len(d.in) > 0 {
  225. switch d.in[0] {
  226. case ' ', '\n', '\r', '\t':
  227. d.in = d.in[1:]
  228. default:
  229. return
  230. }
  231. }
  232. }
  233. // isValueNext returns true if next type should be a JSON value: Null,
  234. // Number, String or Bool.
  235. func (d *Decoder) isValueNext() bool {
  236. if len(d.startStack) == 0 {
  237. return d.value.typ == 0
  238. }
  239. start := d.startStack[len(d.startStack)-1]
  240. switch start {
  241. case StartObject:
  242. return d.value.typ&Name != 0
  243. case StartArray:
  244. return d.value.typ&(StartArray|comma) != 0
  245. }
  246. panic(fmt.Sprintf(
  247. "unreachable logic in Decoder.isValueNext, lastType: %v, startStack: %v",
  248. d.value.typ, start))
  249. }
  250. // newValue constructs a Value for given Type.
  251. func (d *Decoder) newValue(typ Type, input []byte, size int) Value {
  252. return Value{
  253. typ: typ,
  254. input: d.orig,
  255. start: len(d.orig) - len(input),
  256. size: size,
  257. }
  258. }
  259. // newBoolValue constructs a Value for a JSON boolean.
  260. func (d *Decoder) newBoolValue(input []byte, size int, b bool) Value {
  261. return Value{
  262. typ: Bool,
  263. input: d.orig,
  264. start: len(d.orig) - len(input),
  265. size: size,
  266. boo: b,
  267. }
  268. }
  269. // newStringValue constructs a Value for a JSON string.
  270. func (d *Decoder) newStringValue(input []byte, size int, s string) Value {
  271. return Value{
  272. typ: String,
  273. input: d.orig,
  274. start: len(d.orig) - len(input),
  275. size: size,
  276. str: s,
  277. }
  278. }
  279. // Clone returns a copy of the Decoder for use in reading ahead the next JSON
  280. // object, array or other values without affecting current Decoder.
  281. func (d *Decoder) Clone() *Decoder {
  282. ret := *d
  283. ret.startStack = append([]Type(nil), ret.startStack...)
  284. return &ret
  285. }
  286. // Value provides a parsed JSON type and value.
  287. //
  288. // The original input slice is stored in this struct in order to compute for
  289. // position as needed. The raw JSON value is derived from the original input
  290. // slice given start and size.
  291. //
  292. // For JSON boolean and string, it holds the converted value in boo and str
  293. // fields respectively. For JSON number, the raw JSON value holds a valid number
  294. // which is converted only in Int or Float. Other JSON types do not require any
  295. // additional data.
  296. type Value struct {
  297. typ Type
  298. input []byte
  299. start int
  300. size int
  301. boo bool
  302. str string
  303. }
  304. func (v Value) newError(f string, x ...interface{}) error {
  305. e := errors.New(f, x...)
  306. line, col := v.Position()
  307. return errors.New("error (line %d:%d): %v", line, col, e)
  308. }
  309. // Type returns the JSON type.
  310. func (v Value) Type() Type {
  311. return v.typ
  312. }
  313. // Position returns the line and column of the value.
  314. func (v Value) Position() (int, int) {
  315. return position(v.input, v.start)
  316. }
  317. // Bool returns the bool value if token is Bool, else it will return an error.
  318. func (v Value) Bool() (bool, error) {
  319. if v.typ != Bool {
  320. return false, v.newError("%s is not a bool", v.Raw())
  321. }
  322. return v.boo, nil
  323. }
  324. // String returns the string value for a JSON string token or the read value in
  325. // string if token is not a string.
  326. func (v Value) String() string {
  327. if v.typ != String {
  328. return v.Raw()
  329. }
  330. return v.str
  331. }
  332. // Name returns the object name if token is Name, else it will return an error.
  333. func (v Value) Name() (string, error) {
  334. if v.typ != Name {
  335. return "", v.newError("%s is not an object name", v.Raw())
  336. }
  337. return v.str, nil
  338. }
  339. // Raw returns the read value in string.
  340. func (v Value) Raw() string {
  341. return string(v.input[v.start : v.start+v.size])
  342. }
  343. // Float returns the floating-point number if token is Number, else it will
  344. // return an error.
  345. //
  346. // The floating-point precision is specified by the bitSize parameter: 32 for
  347. // float32 or 64 for float64. If bitSize=32, the result still has type float64,
  348. // but it will be convertible to float32 without changing its value. It will
  349. // return an error if the number exceeds the floating point limits for given
  350. // bitSize.
  351. func (v Value) Float(bitSize int) (float64, error) {
  352. if v.typ != Number {
  353. return 0, v.newError("%s is not a number", v.Raw())
  354. }
  355. f, err := strconv.ParseFloat(v.Raw(), bitSize)
  356. if err != nil {
  357. return 0, v.newError("%v", err)
  358. }
  359. return f, nil
  360. }
  361. // Int returns the signed integer number if token is Number, else it will
  362. // return an error.
  363. //
  364. // The given bitSize specifies the integer type that the result must fit into.
  365. // It returns an error if the number is not an integer value or if the result
  366. // exceeds the limits for given bitSize.
  367. func (v Value) Int(bitSize int) (int64, error) {
  368. s, err := v.getIntStr()
  369. if err != nil {
  370. return 0, err
  371. }
  372. n, err := strconv.ParseInt(s, 10, bitSize)
  373. if err != nil {
  374. return 0, v.newError("%v", err)
  375. }
  376. return n, nil
  377. }
  378. // Uint returns the signed integer number if token is Number, else it will
  379. // return an error.
  380. //
  381. // The given bitSize specifies the unsigned integer type that the result must
  382. // fit into. It returns an error if the number is not an unsigned integer value
  383. // or if the result exceeds the limits for given bitSize.
  384. func (v Value) Uint(bitSize int) (uint64, error) {
  385. s, err := v.getIntStr()
  386. if err != nil {
  387. return 0, err
  388. }
  389. n, err := strconv.ParseUint(s, 10, bitSize)
  390. if err != nil {
  391. return 0, v.newError("%v", err)
  392. }
  393. return n, nil
  394. }
  395. func (v Value) getIntStr() (string, error) {
  396. if v.typ != Number {
  397. return "", v.newError("%s is not a number", v.input)
  398. }
  399. parts, ok := parseNumber(v.input[v.start : v.start+v.size])
  400. if !ok {
  401. return "", v.newError("%s is not a number", v.input)
  402. }
  403. num, ok := normalizeToIntString(parts)
  404. if !ok {
  405. return "", v.newError("cannot convert %s to integer", v.input)
  406. }
  407. return num, nil
  408. }