decode.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "bytes"
  7. "fmt"
  8. "io"
  9. "regexp"
  10. "strconv"
  11. "unicode/utf8"
  12. "google.golang.org/protobuf/internal/errors"
  13. )
  14. // call specifies which Decoder method was invoked.
  15. type call uint8
  16. const (
  17. readCall call = iota
  18. peekCall
  19. )
  20. // Decoder is a token-based JSON decoder.
  21. type Decoder struct {
  22. // lastCall is last method called, either readCall or peekCall.
  23. // Initial value is readCall.
  24. lastCall call
  25. // value contains the last read value.
  26. value Value
  27. // err contains the last read error.
  28. err error
  29. // startStack is a stack containing StartObject and StartArray types. The
  30. // top of stack represents the object or the array the current value is
  31. // directly located in.
  32. startStack []Type
  33. // orig is used in reporting line and column.
  34. orig []byte
  35. // in contains the unconsumed input.
  36. in []byte
  37. }
  38. // NewDecoder returns a Decoder to read the given []byte.
  39. func NewDecoder(b []byte) *Decoder {
  40. return &Decoder{orig: b, in: b}
  41. }
  42. // Peek looks ahead and returns the next JSON type without advancing a read.
  43. func (d *Decoder) Peek() Type {
  44. defer func() { d.lastCall = peekCall }()
  45. if d.lastCall == readCall {
  46. d.value, d.err = d.Read()
  47. }
  48. return d.value.typ
  49. }
  50. // Read returns the next JSON value. It will return an error if there is no
  51. // valid value. For String types containing invalid UTF8 characters, a non-fatal
  52. // error is returned and caller can call Read for the next value.
  53. func (d *Decoder) Read() (Value, error) {
  54. defer func() { d.lastCall = readCall }()
  55. if d.lastCall == peekCall {
  56. return d.value, d.err
  57. }
  58. value, err := d.parseNext()
  59. if err != nil {
  60. return Value{}, err
  61. }
  62. n := value.size
  63. switch value.typ {
  64. case EOF:
  65. if len(d.startStack) != 0 ||
  66. d.value.typ&Null|Bool|Number|String|EndObject|EndArray == 0 {
  67. return Value{}, io.ErrUnexpectedEOF
  68. }
  69. case Null:
  70. if !d.isValueNext() {
  71. return Value{}, d.newSyntaxError("unexpected value null")
  72. }
  73. case Bool, Number:
  74. if !d.isValueNext() {
  75. return Value{}, d.newSyntaxError("unexpected value %v", value.Raw())
  76. }
  77. case String:
  78. if d.isValueNext() {
  79. break
  80. }
  81. // Check if this is for an object name.
  82. if d.value.typ&(StartObject|comma) == 0 {
  83. return Value{}, d.newSyntaxError("unexpected value %v", value.Raw())
  84. }
  85. d.in = d.in[n:]
  86. d.consume(0)
  87. if c := d.in[0]; c != ':' {
  88. return Value{}, d.newSyntaxError(`unexpected character %v, missing ":" after object name`, string(c))
  89. }
  90. n = 1
  91. value.typ = Name
  92. case StartObject, StartArray:
  93. if !d.isValueNext() {
  94. return Value{}, d.newSyntaxError("unexpected character %v", value.Raw())
  95. }
  96. d.startStack = append(d.startStack, value.typ)
  97. case EndObject:
  98. if len(d.startStack) == 0 ||
  99. d.value.typ == comma ||
  100. d.startStack[len(d.startStack)-1] != StartObject {
  101. return Value{}, d.newSyntaxError("unexpected character }")
  102. }
  103. d.startStack = d.startStack[:len(d.startStack)-1]
  104. case EndArray:
  105. if len(d.startStack) == 0 ||
  106. d.value.typ == comma ||
  107. d.startStack[len(d.startStack)-1] != StartArray {
  108. return Value{}, d.newSyntaxError("unexpected character ]")
  109. }
  110. d.startStack = d.startStack[:len(d.startStack)-1]
  111. case comma:
  112. if len(d.startStack) == 0 ||
  113. d.value.typ&(Null|Bool|Number|String|EndObject|EndArray) == 0 {
  114. return Value{}, d.newSyntaxError("unexpected character ,")
  115. }
  116. }
  117. // Update d.value only after validating value to be in the right sequence.
  118. d.value = value
  119. d.in = d.in[n:]
  120. if d.value.typ == comma {
  121. return d.Read()
  122. }
  123. return value, nil
  124. }
  125. // Any sequence that looks like a non-delimiter (for error reporting).
  126. var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
  127. // parseNext parses for the next JSON value. It returns a Value object for
  128. // different types, except for Name. It does not handle whether the next value
  129. // is in a valid sequence or not.
  130. func (d *Decoder) parseNext() (value Value, err error) {
  131. // Trim leading spaces.
  132. d.consume(0)
  133. in := d.in
  134. if len(in) == 0 {
  135. return d.newValue(EOF, nil, 0), nil
  136. }
  137. switch in[0] {
  138. case 'n':
  139. n := matchWithDelim("null", in)
  140. if n == 0 {
  141. return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
  142. }
  143. return d.newValue(Null, in, n), nil
  144. case 't':
  145. n := matchWithDelim("true", in)
  146. if n == 0 {
  147. return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
  148. }
  149. return d.newBoolValue(in, n, true), nil
  150. case 'f':
  151. n := matchWithDelim("false", in)
  152. if n == 0 {
  153. return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
  154. }
  155. return d.newBoolValue(in, n, false), nil
  156. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  157. n, ok := consumeNumber(in)
  158. if !ok {
  159. return Value{}, d.newSyntaxError("invalid number %s", errRegexp.Find(in))
  160. }
  161. return d.newValue(Number, in, n), nil
  162. case '"':
  163. s, n, err := d.parseString(in)
  164. if err != nil {
  165. return Value{}, err
  166. }
  167. return d.newStringValue(in, n, s), nil
  168. case '{':
  169. return d.newValue(StartObject, in, 1), nil
  170. case '}':
  171. return d.newValue(EndObject, in, 1), nil
  172. case '[':
  173. return d.newValue(StartArray, in, 1), nil
  174. case ']':
  175. return d.newValue(EndArray, in, 1), nil
  176. case ',':
  177. return d.newValue(comma, in, 1), nil
  178. }
  179. return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
  180. }
  181. // position returns line and column number of index in given orig slice.
  182. func position(orig []byte, idx int) (int, int) {
  183. b := orig[:idx]
  184. line := bytes.Count(b, []byte("\n")) + 1
  185. if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
  186. b = b[i+1:]
  187. }
  188. column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
  189. return line, column
  190. }
  191. // newSyntaxError returns an error with line and column information useful for
  192. // syntax errors.
  193. func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
  194. e := errors.New(f, x...)
  195. line, column := position(d.orig, len(d.orig)-len(d.in))
  196. return errors.New("syntax error (line %d:%d): %v", line, column, e)
  197. }
  198. // matchWithDelim matches s with the input b and verifies that the match
  199. // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
  200. // As a special case, EOF is considered a delimiter. It returns the length of s
  201. // if there is a match, else 0.
  202. func matchWithDelim(s string, b []byte) int {
  203. if !bytes.HasPrefix(b, []byte(s)) {
  204. return 0
  205. }
  206. n := len(s)
  207. if n < len(b) && isNotDelim(b[n]) {
  208. return 0
  209. }
  210. return n
  211. }
  212. // isNotDelim returns true if given byte is a not delimiter character.
  213. func isNotDelim(c byte) bool {
  214. return (c == '-' || c == '+' || c == '.' || c == '_' ||
  215. ('a' <= c && c <= 'z') ||
  216. ('A' <= c && c <= 'Z') ||
  217. ('0' <= c && c <= '9'))
  218. }
  219. // consume consumes n bytes of input and any subsequent whitespace.
  220. func (d *Decoder) consume(n int) {
  221. d.in = d.in[n:]
  222. for len(d.in) > 0 {
  223. switch d.in[0] {
  224. case ' ', '\n', '\r', '\t':
  225. d.in = d.in[1:]
  226. default:
  227. return
  228. }
  229. }
  230. }
  231. // isValueNext returns true if next type should be a JSON value: Null,
  232. // Number, String or Bool.
  233. func (d *Decoder) isValueNext() bool {
  234. if len(d.startStack) == 0 {
  235. return d.value.typ == 0
  236. }
  237. start := d.startStack[len(d.startStack)-1]
  238. switch start {
  239. case StartObject:
  240. return d.value.typ&Name != 0
  241. case StartArray:
  242. return d.value.typ&(StartArray|comma) != 0
  243. }
  244. panic(fmt.Sprintf(
  245. "unreachable logic in Decoder.isValueNext, lastType: %v, startStack: %v",
  246. d.value.typ, start))
  247. }
  248. // newValue constructs a Value for given Type.
  249. func (d *Decoder) newValue(typ Type, input []byte, size int) Value {
  250. return Value{
  251. typ: typ,
  252. input: d.orig,
  253. start: len(d.orig) - len(input),
  254. size: size,
  255. }
  256. }
  257. // newBoolValue constructs a Value for a JSON boolean.
  258. func (d *Decoder) newBoolValue(input []byte, size int, b bool) Value {
  259. return Value{
  260. typ: Bool,
  261. input: d.orig,
  262. start: len(d.orig) - len(input),
  263. size: size,
  264. boo: b,
  265. }
  266. }
  267. // newStringValue constructs a Value for a JSON string.
  268. func (d *Decoder) newStringValue(input []byte, size int, s string) Value {
  269. return Value{
  270. typ: String,
  271. input: d.orig,
  272. start: len(d.orig) - len(input),
  273. size: size,
  274. str: s,
  275. }
  276. }
  277. // Clone returns a copy of the Decoder for use in reading ahead the next JSON
  278. // object, array or other values without affecting current Decoder.
  279. func (d *Decoder) Clone() *Decoder {
  280. ret := *d
  281. ret.startStack = append([]Type(nil), ret.startStack...)
  282. return &ret
  283. }
  284. // Value provides a parsed JSON type and value.
  285. //
  286. // The original input slice is stored in this struct in order to compute for
  287. // position as needed. The raw JSON value is derived from the original input
  288. // slice given start and size.
  289. //
  290. // For JSON boolean and string, it holds the converted value in boo and str
  291. // fields respectively. For JSON number, the raw JSON value holds a valid number
  292. // which is converted only in Int or Float. Other JSON types do not require any
  293. // additional data.
  294. type Value struct {
  295. typ Type
  296. input []byte
  297. start int
  298. size int
  299. boo bool
  300. str string
  301. }
  302. func (v Value) newError(f string, x ...interface{}) error {
  303. e := errors.New(f, x...)
  304. line, col := v.Position()
  305. return errors.New("error (line %d:%d): %v", line, col, e)
  306. }
  307. // Type returns the JSON type.
  308. func (v Value) Type() Type {
  309. return v.typ
  310. }
  311. // Position returns the line and column of the value.
  312. func (v Value) Position() (int, int) {
  313. return position(v.input, v.start)
  314. }
  315. // Bool returns the bool value if token is Bool, else it will return an error.
  316. func (v Value) Bool() (bool, error) {
  317. if v.typ != Bool {
  318. return false, v.newError("%s is not a bool", v.Raw())
  319. }
  320. return v.boo, nil
  321. }
  322. // String returns the string value for a JSON string token or the read value in
  323. // string if token is not a string.
  324. func (v Value) String() string {
  325. if v.typ != String {
  326. return v.Raw()
  327. }
  328. return v.str
  329. }
  330. // Name returns the object name if token is Name, else it will return an error.
  331. func (v Value) Name() (string, error) {
  332. if v.typ != Name {
  333. return "", v.newError("%s is not an object name", v.Raw())
  334. }
  335. return v.str, nil
  336. }
  337. // Raw returns the read value in string.
  338. func (v Value) Raw() string {
  339. return string(v.input[v.start : v.start+v.size])
  340. }
  341. // Float returns the floating-point number if token is Number, else it will
  342. // return an error.
  343. //
  344. // The floating-point precision is specified by the bitSize parameter: 32 for
  345. // float32 or 64 for float64. If bitSize=32, the result still has type float64,
  346. // but it will be convertible to float32 without changing its value. It will
  347. // return an error if the number exceeds the floating point limits for given
  348. // bitSize.
  349. func (v Value) Float(bitSize int) (float64, error) {
  350. if v.typ != Number {
  351. return 0, v.newError("%s is not a number", v.Raw())
  352. }
  353. f, err := strconv.ParseFloat(v.Raw(), bitSize)
  354. if err != nil {
  355. return 0, v.newError("%v", err)
  356. }
  357. return f, nil
  358. }
  359. // Int returns the signed integer number if token is Number, else it will
  360. // return an error.
  361. //
  362. // The given bitSize specifies the integer type that the result must fit into.
  363. // It returns an error if the number is not an integer value or if the result
  364. // exceeds the limits for given bitSize.
  365. func (v Value) Int(bitSize int) (int64, error) {
  366. s, err := v.getIntStr()
  367. if err != nil {
  368. return 0, err
  369. }
  370. n, err := strconv.ParseInt(s, 10, bitSize)
  371. if err != nil {
  372. return 0, v.newError("%v", err)
  373. }
  374. return n, nil
  375. }
  376. // Uint returns the signed integer number if token is Number, else it will
  377. // return an error.
  378. //
  379. // The given bitSize specifies the unsigned integer type that the result must
  380. // fit into. It returns an error if the number is not an unsigned integer value
  381. // or if the result exceeds the limits for given bitSize.
  382. func (v Value) Uint(bitSize int) (uint64, error) {
  383. s, err := v.getIntStr()
  384. if err != nil {
  385. return 0, err
  386. }
  387. n, err := strconv.ParseUint(s, 10, bitSize)
  388. if err != nil {
  389. return 0, v.newError("%v", err)
  390. }
  391. return n, nil
  392. }
  393. func (v Value) getIntStr() (string, error) {
  394. if v.typ != Number {
  395. return "", v.newError("%s is not a number", v.input)
  396. }
  397. parts, ok := parseNumber(v.input[v.start : v.start+v.size])
  398. if !ok {
  399. return "", v.newError("%s is not a number", v.input)
  400. }
  401. num, ok := normalizeToIntString(parts)
  402. if !ok {
  403. return "", v.newError("cannot convert %s to integer", v.input)
  404. }
  405. return num, nil
  406. }