decode.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "bytes"
  7. "fmt"
  8. "io"
  9. "regexp"
  10. "strconv"
  11. "unicode/utf8"
  12. "github.com/golang/protobuf/v2/internal/errors"
  13. )
  14. // call specifies which Decoder method was invoked.
  15. type call uint8
  16. const (
  17. readCall call = iota
  18. peekCall
  19. )
  20. // Decoder is a token-based JSON decoder.
  21. type Decoder struct {
  22. // lastCall is last method called, eiterh readCall or peekCall.
  23. lastCall call
  24. // value contains the last read value.
  25. value Value
  26. // err contains the last read error.
  27. err error
  28. // startStack is a stack containing StartObject and StartArray types. The
  29. // top of stack represents the object or the array the current value is
  30. // directly located in.
  31. startStack []Type
  32. // orig is used in reporting line and column.
  33. orig []byte
  34. // in contains the unconsumed input.
  35. in []byte
  36. }
  37. // NewDecoder returns a Decoder to read the given []byte.
  38. func NewDecoder(b []byte) *Decoder {
  39. return &Decoder{orig: b, in: b}
  40. }
  41. // Peek looks ahead and returns the next JSON type without advancing a read.
  42. func (d *Decoder) Peek() Type {
  43. defer func() { d.lastCall = peekCall }()
  44. if d.lastCall == readCall {
  45. d.value, d.err = d.Read()
  46. }
  47. return d.value.typ
  48. }
  49. // Read returns the next JSON value. It will return an error if there is no
  50. // valid value. For String types containing invalid UTF8 characters, a
  51. // non-fatal error is returned and caller can call Read for the next value.
  52. func (d *Decoder) Read() (Value, error) {
  53. defer func() { d.lastCall = readCall }()
  54. if d.lastCall == peekCall {
  55. return d.value, d.err
  56. }
  57. var nerr errors.NonFatal
  58. value, n, err := d.parseNext()
  59. if !nerr.Merge(err) {
  60. return Value{}, err
  61. }
  62. switch value.typ {
  63. case EOF:
  64. if len(d.startStack) != 0 ||
  65. d.value.typ&Null|Bool|Number|String|EndObject|EndArray == 0 {
  66. return Value{}, io.ErrUnexpectedEOF
  67. }
  68. case Null:
  69. if !d.isValueNext() {
  70. return Value{}, d.newSyntaxError("unexpected value null")
  71. }
  72. case Bool, Number:
  73. if !d.isValueNext() {
  74. return Value{}, d.newSyntaxError("unexpected value %v", value)
  75. }
  76. case String:
  77. if d.isValueNext() {
  78. break
  79. }
  80. // Check if this is for an object name.
  81. if d.value.typ&(StartObject|comma) == 0 {
  82. return Value{}, d.newSyntaxError("unexpected value %q", value)
  83. }
  84. d.in = d.in[n:]
  85. d.consume(0)
  86. if c := d.in[0]; c != ':' {
  87. return Value{}, d.newSyntaxError(`unexpected character %v, missing ":" after object name`, string(c))
  88. }
  89. n = 1
  90. value.typ = Name
  91. case StartObject, StartArray:
  92. if !d.isValueNext() {
  93. return Value{}, d.newSyntaxError("unexpected character %v", value)
  94. }
  95. d.startStack = append(d.startStack, value.typ)
  96. case EndObject:
  97. if len(d.startStack) == 0 ||
  98. d.value.typ == comma ||
  99. d.startStack[len(d.startStack)-1] != StartObject {
  100. return Value{}, d.newSyntaxError("unexpected character }")
  101. }
  102. d.startStack = d.startStack[:len(d.startStack)-1]
  103. case EndArray:
  104. if len(d.startStack) == 0 ||
  105. d.value.typ == comma ||
  106. d.startStack[len(d.startStack)-1] != StartArray {
  107. return Value{}, d.newSyntaxError("unexpected character ]")
  108. }
  109. d.startStack = d.startStack[:len(d.startStack)-1]
  110. case comma:
  111. if len(d.startStack) == 0 ||
  112. d.value.typ&(Null|Bool|Number|String|EndObject|EndArray) == 0 {
  113. return Value{}, d.newSyntaxError("unexpected character ,")
  114. }
  115. }
  116. // Update lastType only after validating value to be in the right
  117. // sequence.
  118. d.value.typ = value.typ
  119. d.in = d.in[n:]
  120. if d.value.typ == comma {
  121. return d.Read()
  122. }
  123. return value, nerr.E
  124. }
  125. var (
  126. literalRegexp = regexp.MustCompile(`^(null|true|false)`)
  127. // Any sequence that looks like a non-delimiter (for error reporting).
  128. errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
  129. )
  130. // parseNext parses for the next JSON value. It returns a Value object for
  131. // different types, except for Name. It also returns the size that was parsed.
  132. // It does not handle whether the next value is in a valid sequence or not, it
  133. // only ensures that the value is a valid one.
  134. func (d *Decoder) parseNext() (value Value, n int, err error) {
  135. // Trim leading spaces.
  136. d.consume(0)
  137. in := d.in
  138. if len(in) == 0 {
  139. return d.newValue(EOF, nil, nil), 0, nil
  140. }
  141. switch in[0] {
  142. case 'n', 't', 'f':
  143. n := matchWithDelim(literalRegexp, in)
  144. if n == 0 {
  145. return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
  146. }
  147. switch in[0] {
  148. case 'n':
  149. return d.newValue(Null, in[:n], nil), n, nil
  150. case 't':
  151. return d.newValue(Bool, in[:n], true), n, nil
  152. case 'f':
  153. return d.newValue(Bool, in[:n], false), n, nil
  154. }
  155. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  156. num, n := parseNumber(in)
  157. if num == nil {
  158. return Value{}, 0, d.newSyntaxError("invalid number %s", errRegexp.Find(in))
  159. }
  160. return d.newValue(Number, in[:n], num), n, nil
  161. case '"':
  162. var nerr errors.NonFatal
  163. s, n, err := d.parseString(in)
  164. if !nerr.Merge(err) {
  165. return Value{}, 0, err
  166. }
  167. return d.newValue(String, in[:n], s), n, nerr.E
  168. case '{':
  169. return d.newValue(StartObject, in[:1], nil), 1, nil
  170. case '}':
  171. return d.newValue(EndObject, in[:1], nil), 1, nil
  172. case '[':
  173. return d.newValue(StartArray, in[:1], nil), 1, nil
  174. case ']':
  175. return d.newValue(EndArray, in[:1], nil), 1, nil
  176. case ',':
  177. return d.newValue(comma, in[:1], nil), 1, nil
  178. }
  179. return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
  180. }
  181. // position returns line and column number of parsed bytes.
  182. func (d *Decoder) position() (int, int) {
  183. // Calculate line and column of consumed input.
  184. b := d.orig[:len(d.orig)-len(d.in)]
  185. line := bytes.Count(b, []byte("\n")) + 1
  186. if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
  187. b = b[i+1:]
  188. }
  189. column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
  190. return line, column
  191. }
  192. // newSyntaxError returns an error with line and column information useful for
  193. // syntax errors.
  194. func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
  195. e := errors.New(f, x...)
  196. line, column := d.position()
  197. return errors.New("syntax error (line %d:%d): %v", line, column, e)
  198. }
  199. // matchWithDelim matches r with the input b and verifies that the match
  200. // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
  201. // As a special case, EOF is considered a delimiter.
  202. func matchWithDelim(r *regexp.Regexp, b []byte) int {
  203. n := len(r.Find(b))
  204. if n < len(b) {
  205. // Check that the next character is a delimiter.
  206. if isNotDelim(b[n]) {
  207. return 0
  208. }
  209. }
  210. return n
  211. }
  212. // isNotDelim returns true if given byte is a not delimiter character.
  213. func isNotDelim(c byte) bool {
  214. return (c == '-' || c == '+' || c == '.' || c == '_' ||
  215. ('a' <= c && c <= 'z') ||
  216. ('A' <= c && c <= 'Z') ||
  217. ('0' <= c && c <= '9'))
  218. }
  219. // consume consumes n bytes of input and any subsequent whitespace.
  220. func (d *Decoder) consume(n int) {
  221. d.in = d.in[n:]
  222. for len(d.in) > 0 {
  223. switch d.in[0] {
  224. case ' ', '\n', '\r', '\t':
  225. d.in = d.in[1:]
  226. default:
  227. return
  228. }
  229. }
  230. }
  231. // isValueNext returns true if next type should be a JSON value: Null,
  232. // Number, String or Bool.
  233. func (d *Decoder) isValueNext() bool {
  234. if len(d.startStack) == 0 {
  235. return d.value.typ == 0
  236. }
  237. start := d.startStack[len(d.startStack)-1]
  238. switch start {
  239. case StartObject:
  240. return d.value.typ&Name != 0
  241. case StartArray:
  242. return d.value.typ&(StartArray|comma) != 0
  243. }
  244. panic(fmt.Sprintf(
  245. "unreachable logic in Decoder.isValueNext, lastType: %v, startStack: %v",
  246. d.value.typ, start))
  247. }
  248. // newValue constructs a Value.
  249. func (d *Decoder) newValue(typ Type, input []byte, value interface{}) Value {
  250. line, column := d.position()
  251. return Value{
  252. input: input,
  253. line: line,
  254. column: column,
  255. typ: typ,
  256. value: value,
  257. }
  258. }
  259. // Value contains a JSON type and value parsed from calling Decoder.Read.
  260. type Value struct {
  261. input []byte
  262. line int
  263. column int
  264. typ Type
  265. // value will be set to the following Go type based on the type field:
  266. // Bool => bool
  267. // Number => *numberParts
  268. // String => string
  269. // Name => string
  270. // It will be nil if none of the above.
  271. value interface{}
  272. }
  273. func (v Value) newError(f string, x ...interface{}) error {
  274. e := errors.New(f, x...)
  275. return errors.New("error (line %d:%d): %v", v.line, v.column, e)
  276. }
  277. // Type returns the JSON type.
  278. func (v Value) Type() Type {
  279. return v.typ
  280. }
  281. // Position returns the line and column of the value.
  282. func (v Value) Position() (int, int) {
  283. return v.line, v.column
  284. }
  285. // Bool returns the bool value if token is Bool, else it will return an error.
  286. func (v Value) Bool() (bool, error) {
  287. if v.typ != Bool {
  288. return false, v.newError("%s is not a bool", v.input)
  289. }
  290. return v.value.(bool), nil
  291. }
  292. // String returns the string value for a JSON string token or the read value in
  293. // string if token is not a string.
  294. func (v Value) String() string {
  295. if v.typ != String {
  296. return string(v.input)
  297. }
  298. return v.value.(string)
  299. }
  300. // Name returns the object name if token is Name, else it will return an error.
  301. func (v Value) Name() (string, error) {
  302. if v.typ != Name {
  303. return "", v.newError("%s is not an object name", v.input)
  304. }
  305. return v.value.(string), nil
  306. }
  307. // Float returns the floating-point number if token is Number, else it will
  308. // return an error.
  309. //
  310. // The floating-point precision is specified by the bitSize parameter: 32 for
  311. // float32 or 64 for float64. If bitSize=32, the result still has type float64,
  312. // but it will be convertible to float32 without changing its value. It will
  313. // return an error if the number exceeds the floating point limits for given
  314. // bitSize.
  315. func (v Value) Float(bitSize int) (float64, error) {
  316. if v.typ != Number {
  317. return 0, v.newError("%s is not a number", v.input)
  318. }
  319. f, err := strconv.ParseFloat(string(v.input), bitSize)
  320. if err != nil {
  321. return 0, v.newError("%v", err)
  322. }
  323. return f, nil
  324. }
  325. // Int returns the signed integer number if token is Number, else it will
  326. // return an error.
  327. //
  328. // The given bitSize specifies the integer type that the result must fit into.
  329. // It returns an error if the number is not an integer value or if the result
  330. // exceeds the limits for given bitSize.
  331. func (v Value) Int(bitSize int) (int64, error) {
  332. s, err := v.getIntStr()
  333. if err != nil {
  334. return 0, err
  335. }
  336. n, err := strconv.ParseInt(s, 10, bitSize)
  337. if err != nil {
  338. return 0, v.newError("%v", err)
  339. }
  340. return n, nil
  341. }
  342. // Uint returns the signed integer number if token is Number, else it will
  343. // return an error.
  344. //
  345. // The given bitSize specifies the unsigned integer type that the result must
  346. // fit into. It returns an error if the number is not an unsigned integer value
  347. // or if the result exceeds the limits for given bitSize.
  348. func (v Value) Uint(bitSize int) (uint64, error) {
  349. s, err := v.getIntStr()
  350. if err != nil {
  351. return 0, err
  352. }
  353. n, err := strconv.ParseUint(s, 10, bitSize)
  354. if err != nil {
  355. return 0, v.newError("%v", err)
  356. }
  357. return n, nil
  358. }
  359. func (v Value) getIntStr() (string, error) {
  360. if v.typ != Number {
  361. return "", v.newError("%s is not a number", v.input)
  362. }
  363. pnum := v.value.(*numberParts)
  364. num, ok := normalizeToIntString(pnum)
  365. if !ok {
  366. return "", v.newError("cannot convert %s to integer", v.input)
  367. }
  368. return num, nil
  369. }