decode.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "bytes"
  7. "fmt"
  8. "io"
  9. "regexp"
  10. "strconv"
  11. "unicode/utf8"
  12. "github.com/golang/protobuf/v2/internal/errors"
  13. )
  14. // call specifies which Decoder method was invoked.
  15. type call uint8
  16. const (
  17. readCall call = iota
  18. peekCall
  19. )
  20. // Decoder is a token-based JSON decoder.
  21. type Decoder struct {
  22. // lastCall is last method called, either readCall or peekCall.
  23. // Initial value is readCall.
  24. lastCall call
  25. // value contains the last read value.
  26. value Value
  27. // err contains the last read error.
  28. err error
  29. // startStack is a stack containing StartObject and StartArray types. The
  30. // top of stack represents the object or the array the current value is
  31. // directly located in.
  32. startStack []Type
  33. // orig is used in reporting line and column.
  34. orig []byte
  35. // in contains the unconsumed input.
  36. in []byte
  37. }
  38. // NewDecoder returns a Decoder to read the given []byte.
  39. func NewDecoder(b []byte) *Decoder {
  40. return &Decoder{orig: b, in: b}
  41. }
  42. // Peek looks ahead and returns the next JSON type without advancing a read.
  43. func (d *Decoder) Peek() Type {
  44. defer func() { d.lastCall = peekCall }()
  45. if d.lastCall == readCall {
  46. d.value, d.err = d.Read()
  47. }
  48. return d.value.typ
  49. }
  50. // Read returns the next JSON value. It will return an error if there is no
  51. // valid value. For String types containing invalid UTF8 characters, a
  52. // non-fatal error is returned and caller can call Read for the next value.
  53. func (d *Decoder) Read() (Value, error) {
  54. defer func() { d.lastCall = readCall }()
  55. if d.lastCall == peekCall {
  56. return d.value, d.err
  57. }
  58. var nerr errors.NonFatal
  59. value, n, err := d.parseNext()
  60. if !nerr.Merge(err) {
  61. return Value{}, err
  62. }
  63. switch value.typ {
  64. case EOF:
  65. if len(d.startStack) != 0 ||
  66. d.value.typ&Null|Bool|Number|String|EndObject|EndArray == 0 {
  67. return Value{}, io.ErrUnexpectedEOF
  68. }
  69. case Null:
  70. if !d.isValueNext() {
  71. return Value{}, d.newSyntaxError("unexpected value null")
  72. }
  73. case Bool, Number:
  74. if !d.isValueNext() {
  75. return Value{}, d.newSyntaxError("unexpected value %v", value.Raw())
  76. }
  77. case String:
  78. if d.isValueNext() {
  79. break
  80. }
  81. // Check if this is for an object name.
  82. if d.value.typ&(StartObject|comma) == 0 {
  83. return Value{}, d.newSyntaxError("unexpected value %v", value.Raw())
  84. }
  85. d.in = d.in[n:]
  86. d.consume(0)
  87. if c := d.in[0]; c != ':' {
  88. return Value{}, d.newSyntaxError(`unexpected character %v, missing ":" after object name`, string(c))
  89. }
  90. n = 1
  91. value.typ = Name
  92. case StartObject, StartArray:
  93. if !d.isValueNext() {
  94. return Value{}, d.newSyntaxError("unexpected character %v", value.Raw())
  95. }
  96. d.startStack = append(d.startStack, value.typ)
  97. case EndObject:
  98. if len(d.startStack) == 0 ||
  99. d.value.typ == comma ||
  100. d.startStack[len(d.startStack)-1] != StartObject {
  101. return Value{}, d.newSyntaxError("unexpected character }")
  102. }
  103. d.startStack = d.startStack[:len(d.startStack)-1]
  104. case EndArray:
  105. if len(d.startStack) == 0 ||
  106. d.value.typ == comma ||
  107. d.startStack[len(d.startStack)-1] != StartArray {
  108. return Value{}, d.newSyntaxError("unexpected character ]")
  109. }
  110. d.startStack = d.startStack[:len(d.startStack)-1]
  111. case comma:
  112. if len(d.startStack) == 0 ||
  113. d.value.typ&(Null|Bool|Number|String|EndObject|EndArray) == 0 {
  114. return Value{}, d.newSyntaxError("unexpected character ,")
  115. }
  116. }
  117. // Update lastType only after validating value to be in the right
  118. // sequence.
  119. d.value.typ = value.typ
  120. d.in = d.in[n:]
  121. if d.value.typ == comma {
  122. return d.Read()
  123. }
  124. return value, nerr.E
  125. }
  126. var (
  127. literalRegexp = regexp.MustCompile(`^(null|true|false)`)
  128. // Any sequence that looks like a non-delimiter (for error reporting).
  129. errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
  130. )
  131. // parseNext parses for the next JSON value. It returns a Value object for
  132. // different types, except for Name. It also returns the size that was parsed.
  133. // It does not handle whether the next value is in a valid sequence or not, it
  134. // only ensures that the value is a valid one.
  135. func (d *Decoder) parseNext() (value Value, n int, err error) {
  136. // Trim leading spaces.
  137. d.consume(0)
  138. in := d.in
  139. if len(in) == 0 {
  140. return d.newValue(nil, EOF), 0, nil
  141. }
  142. switch in[0] {
  143. case 'n', 't', 'f':
  144. n := matchWithDelim(literalRegexp, in)
  145. if n == 0 {
  146. return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
  147. }
  148. switch in[0] {
  149. case 'n':
  150. return d.newValue(in[:n], Null), n, nil
  151. case 't':
  152. return d.newBoolValue(in[:n], true), n, nil
  153. case 'f':
  154. return d.newBoolValue(in[:n], false), n, nil
  155. }
  156. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  157. n, ok := consumeNumber(in)
  158. if !ok {
  159. return Value{}, 0, d.newSyntaxError("invalid number %s", errRegexp.Find(in))
  160. }
  161. return d.newValue(in[:n], Number), n, nil
  162. case '"':
  163. var nerr errors.NonFatal
  164. s, n, err := d.parseString(in)
  165. if !nerr.Merge(err) {
  166. return Value{}, 0, err
  167. }
  168. return d.newStringValue(in[:n], s), n, nerr.E
  169. case '{':
  170. return d.newValue(in[:1], StartObject), 1, nil
  171. case '}':
  172. return d.newValue(in[:1], EndObject), 1, nil
  173. case '[':
  174. return d.newValue(in[:1], StartArray), 1, nil
  175. case ']':
  176. return d.newValue(in[:1], EndArray), 1, nil
  177. case ',':
  178. return d.newValue(in[:1], comma), 1, nil
  179. }
  180. return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
  181. }
  182. // position returns line and column number of parsed bytes.
  183. func (d *Decoder) position() (int, int) {
  184. // Calculate line and column of consumed input.
  185. b := d.orig[:len(d.orig)-len(d.in)]
  186. line := bytes.Count(b, []byte("\n")) + 1
  187. if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
  188. b = b[i+1:]
  189. }
  190. column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
  191. return line, column
  192. }
  193. // newSyntaxError returns an error with line and column information useful for
  194. // syntax errors.
  195. func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
  196. e := errors.New(f, x...)
  197. line, column := d.position()
  198. return errors.New("syntax error (line %d:%d): %v", line, column, e)
  199. }
  200. // matchWithDelim matches r with the input b and verifies that the match
  201. // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
  202. // As a special case, EOF is considered a delimiter.
  203. func matchWithDelim(r *regexp.Regexp, b []byte) int {
  204. n := len(r.Find(b))
  205. if n < len(b) {
  206. // Check that the next character is a delimiter.
  207. if isNotDelim(b[n]) {
  208. return 0
  209. }
  210. }
  211. return n
  212. }
  213. // isNotDelim returns true if given byte is a not delimiter character.
  214. func isNotDelim(c byte) bool {
  215. return (c == '-' || c == '+' || c == '.' || c == '_' ||
  216. ('a' <= c && c <= 'z') ||
  217. ('A' <= c && c <= 'Z') ||
  218. ('0' <= c && c <= '9'))
  219. }
  220. // consume consumes n bytes of input and any subsequent whitespace.
  221. func (d *Decoder) consume(n int) {
  222. d.in = d.in[n:]
  223. for len(d.in) > 0 {
  224. switch d.in[0] {
  225. case ' ', '\n', '\r', '\t':
  226. d.in = d.in[1:]
  227. default:
  228. return
  229. }
  230. }
  231. }
  232. // isValueNext returns true if next type should be a JSON value: Null,
  233. // Number, String or Bool.
  234. func (d *Decoder) isValueNext() bool {
  235. if len(d.startStack) == 0 {
  236. return d.value.typ == 0
  237. }
  238. start := d.startStack[len(d.startStack)-1]
  239. switch start {
  240. case StartObject:
  241. return d.value.typ&Name != 0
  242. case StartArray:
  243. return d.value.typ&(StartArray|comma) != 0
  244. }
  245. panic(fmt.Sprintf(
  246. "unreachable logic in Decoder.isValueNext, lastType: %v, startStack: %v",
  247. d.value.typ, start))
  248. }
  249. // newValue constructs a Value for given Type.
  250. func (d *Decoder) newValue(input []byte, typ Type) Value {
  251. line, column := d.position()
  252. return Value{
  253. input: input,
  254. line: line,
  255. column: column,
  256. typ: typ,
  257. }
  258. }
  259. // newBoolValue constructs a Value for a JSON boolean.
  260. func (d *Decoder) newBoolValue(input []byte, b bool) Value {
  261. line, column := d.position()
  262. return Value{
  263. input: input,
  264. line: line,
  265. column: column,
  266. typ: Bool,
  267. boo: b,
  268. }
  269. }
  270. // newStringValue constructs a Value for a JSON string.
  271. func (d *Decoder) newStringValue(input []byte, s string) Value {
  272. line, column := d.position()
  273. return Value{
  274. input: input,
  275. line: line,
  276. column: column,
  277. typ: String,
  278. str: s,
  279. }
  280. }
  281. // Clone returns a copy of the Decoder for use in reading ahead the next JSON
  282. // object, array or other values without affecting current Decoder.
  283. func (d *Decoder) Clone() *Decoder {
  284. ret := *d
  285. ret.startStack = append([]Type(nil), ret.startStack...)
  286. return &ret
  287. }
  288. // Value contains a JSON type and value parsed from calling Decoder.Read.
  289. // For JSON boolean and string, it holds the converted value in boo and str
  290. // fields respectively. For JSON number, input field holds a valid number which
  291. // is converted only in Int or Float. Other JSON types do not require any
  292. // additional data.
  293. type Value struct {
  294. input []byte
  295. line int
  296. column int
  297. typ Type
  298. boo bool
  299. str string
  300. }
  301. func (v Value) newError(f string, x ...interface{}) error {
  302. e := errors.New(f, x...)
  303. return errors.New("error (line %d:%d): %v", v.line, v.column, e)
  304. }
  305. // Type returns the JSON type.
  306. func (v Value) Type() Type {
  307. return v.typ
  308. }
  309. // Position returns the line and column of the value.
  310. func (v Value) Position() (int, int) {
  311. return v.line, v.column
  312. }
  313. // Bool returns the bool value if token is Bool, else it will return an error.
  314. func (v Value) Bool() (bool, error) {
  315. if v.typ != Bool {
  316. return false, v.newError("%s is not a bool", v.input)
  317. }
  318. return v.boo, nil
  319. }
  320. // String returns the string value for a JSON string token or the read value in
  321. // string if token is not a string.
  322. func (v Value) String() string {
  323. if v.typ != String {
  324. return string(v.input)
  325. }
  326. return v.str
  327. }
  328. // Name returns the object name if token is Name, else it will return an error.
  329. func (v Value) Name() (string, error) {
  330. if v.typ != Name {
  331. return "", v.newError("%s is not an object name", v.input)
  332. }
  333. return v.str, nil
  334. }
  335. // Raw returns the read value in string.
  336. func (v Value) Raw() string {
  337. return string(v.input)
  338. }
  339. // Float returns the floating-point number if token is Number, else it will
  340. // return an error.
  341. //
  342. // The floating-point precision is specified by the bitSize parameter: 32 for
  343. // float32 or 64 for float64. If bitSize=32, the result still has type float64,
  344. // but it will be convertible to float32 without changing its value. It will
  345. // return an error if the number exceeds the floating point limits for given
  346. // bitSize.
  347. func (v Value) Float(bitSize int) (float64, error) {
  348. if v.typ != Number {
  349. return 0, v.newError("%s is not a number", v.input)
  350. }
  351. f, err := strconv.ParseFloat(string(v.input), bitSize)
  352. if err != nil {
  353. return 0, v.newError("%v", err)
  354. }
  355. return f, nil
  356. }
  357. // Int returns the signed integer number if token is Number, else it will
  358. // return an error.
  359. //
  360. // The given bitSize specifies the integer type that the result must fit into.
  361. // It returns an error if the number is not an integer value or if the result
  362. // exceeds the limits for given bitSize.
  363. func (v Value) Int(bitSize int) (int64, error) {
  364. s, err := v.getIntStr()
  365. if err != nil {
  366. return 0, err
  367. }
  368. n, err := strconv.ParseInt(s, 10, bitSize)
  369. if err != nil {
  370. return 0, v.newError("%v", err)
  371. }
  372. return n, nil
  373. }
  374. // Uint returns the signed integer number if token is Number, else it will
  375. // return an error.
  376. //
  377. // The given bitSize specifies the unsigned integer type that the result must
  378. // fit into. It returns an error if the number is not an unsigned integer value
  379. // or if the result exceeds the limits for given bitSize.
  380. func (v Value) Uint(bitSize int) (uint64, error) {
  381. s, err := v.getIntStr()
  382. if err != nil {
  383. return 0, err
  384. }
  385. n, err := strconv.ParseUint(s, 10, bitSize)
  386. if err != nil {
  387. return 0, v.newError("%v", err)
  388. }
  389. return n, nil
  390. }
  391. func (v Value) getIntStr() (string, error) {
  392. if v.typ != Number {
  393. return "", v.newError("%s is not a number", v.input)
  394. }
  395. parts, ok := parseNumber(v.input)
  396. if !ok {
  397. return "", v.newError("%s is not a number", v.input)
  398. }
  399. num, ok := normalizeToIntString(parts)
  400. if !ok {
  401. return "", v.newError("cannot convert %s to integer", v.input)
  402. }
  403. return num, nil
  404. }