decode.go 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "bytes"
  7. "io"
  8. "regexp"
  9. "unicode/utf8"
  10. "github.com/golang/protobuf/v2/internal/errors"
  11. )
  12. type syntaxError struct{ error }
  13. func newSyntaxError(f string, x ...interface{}) error {
  14. return syntaxError{errors.New(f, x...)}
  15. }
  16. // Unmarshal parses b as the JSON format.
  17. // It returns a Value, which represents the input as an AST.
  18. func Unmarshal(b []byte) (Value, error) {
  19. p := decoder{in: b}
  20. p.consume(0) // trim leading spaces
  21. v, err := p.unmarshalValue()
  22. if !p.nerr.Merge(err) {
  23. if e, ok := err.(syntaxError); ok {
  24. b = b[:len(b)-len(p.in)] // consumed input
  25. line := bytes.Count(b, []byte("\n")) + 1
  26. if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
  27. b = b[i+1:]
  28. }
  29. column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
  30. err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
  31. }
  32. return Value{}, err
  33. }
  34. if len(p.in) > 0 {
  35. return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
  36. }
  37. return v, p.nerr.E
  38. }
  39. type decoder struct {
  40. nerr errors.NonFatal
  41. in []byte
  42. }
  43. var literalRegexp = regexp.MustCompile("^(null|true|false)")
  44. func (p *decoder) unmarshalValue() (Value, error) {
  45. if len(p.in) == 0 {
  46. return Value{}, io.ErrUnexpectedEOF
  47. }
  48. switch p.in[0] {
  49. case 'n', 't', 'f':
  50. if n := matchWithDelim(literalRegexp, p.in); n > 0 {
  51. var v Value
  52. switch p.in[0] {
  53. case 'n':
  54. v = rawValueOf(nil, p.in[:n:n])
  55. case 't':
  56. v = rawValueOf(true, p.in[:n:n])
  57. case 'f':
  58. v = rawValueOf(false, p.in[:n:n])
  59. }
  60. p.consume(n)
  61. return v, nil
  62. }
  63. return Value{}, newSyntaxError("invalid %q as literal", errRegexp.Find(p.in))
  64. case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  65. return p.unmarshalNumber()
  66. case '"':
  67. return p.unmarshalString()
  68. case '[':
  69. return p.unmarshalArray()
  70. case '{':
  71. return p.unmarshalObject()
  72. default:
  73. return Value{}, newSyntaxError("invalid %q as value", errRegexp.Find(p.in))
  74. }
  75. }
  76. func (p *decoder) unmarshalArray() (Value, error) {
  77. b := p.in
  78. var elems []Value
  79. if err := p.consumeChar('[', "at start of array"); err != nil {
  80. return Value{}, err
  81. }
  82. if len(p.in) > 0 && p.in[0] != ']' {
  83. for len(p.in) > 0 {
  84. v, err := p.unmarshalValue()
  85. if !p.nerr.Merge(err) {
  86. return Value{}, err
  87. }
  88. elems = append(elems, v)
  89. if !p.tryConsumeChar(',') {
  90. break
  91. }
  92. }
  93. }
  94. if err := p.consumeChar(']', "at end of array"); err != nil {
  95. return Value{}, err
  96. }
  97. b = b[:len(b)-len(p.in)]
  98. return rawValueOf(elems, b[:len(b):len(b)]), nil
  99. }
  100. func (p *decoder) unmarshalObject() (Value, error) {
  101. b := p.in
  102. var items [][2]Value
  103. if err := p.consumeChar('{', "at start of object"); err != nil {
  104. return Value{}, err
  105. }
  106. if len(p.in) > 0 && p.in[0] != '}' {
  107. for len(p.in) > 0 {
  108. k, err := p.unmarshalString()
  109. if !p.nerr.Merge(err) {
  110. return Value{}, err
  111. }
  112. if err := p.consumeChar(':', "in object"); err != nil {
  113. return Value{}, err
  114. }
  115. v, err := p.unmarshalValue()
  116. if !p.nerr.Merge(err) {
  117. return Value{}, err
  118. }
  119. items = append(items, [2]Value{k, v})
  120. if !p.tryConsumeChar(',') {
  121. break
  122. }
  123. }
  124. }
  125. if err := p.consumeChar('}', "at end of object"); err != nil {
  126. return Value{}, err
  127. }
  128. b = b[:len(b)-len(p.in)]
  129. return rawValueOf(items, b[:len(b):len(b)]), nil
  130. }
  131. func (p *decoder) consumeChar(c byte, msg string) error {
  132. if p.tryConsumeChar(c) {
  133. return nil
  134. }
  135. if len(p.in) == 0 {
  136. return io.ErrUnexpectedEOF
  137. }
  138. return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
  139. }
  140. func (p *decoder) tryConsumeChar(c byte) bool {
  141. if len(p.in) > 0 && p.in[0] == c {
  142. p.consume(1)
  143. return true
  144. }
  145. return false
  146. }
  147. // consume consumes n bytes of input and any subsequent whitespace.
  148. func (p *decoder) consume(n int) {
  149. p.in = p.in[n:]
  150. for len(p.in) > 0 {
  151. switch p.in[0] {
  152. case ' ', '\n', '\r', '\t':
  153. p.in = p.in[1:]
  154. default:
  155. return
  156. }
  157. }
  158. }
  159. // Any sequence that looks like a non-delimiter (for error reporting).
  160. var errRegexp = regexp.MustCompile("^([-+._a-zA-Z0-9]{1,32}|.)")
  161. // matchWithDelim matches r with the input b and verifies that the match
  162. // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
  163. // As a special case, EOF is considered a delimiter.
  164. func matchWithDelim(r *regexp.Regexp, b []byte) int {
  165. n := len(r.Find(b))
  166. if n < len(b) {
  167. // Check that that the next character is a delimiter.
  168. c := b[n]
  169. notDelim := (c == '-' || c == '+' || c == '.' || c == '_' ||
  170. ('a' <= c && c <= 'z') ||
  171. ('A' <= c && c <= 'Z') ||
  172. ('0' <= c && c <= '9'))
  173. if notDelim {
  174. return 0
  175. }
  176. }
  177. return n
  178. }