| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194 |
- // Copyright 2018 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package json
- import (
- "bytes"
- "io"
- "regexp"
- "unicode/utf8"
- "github.com/golang/protobuf/v2/internal/errors"
- )
- type syntaxError struct{ error }
- func newSyntaxError(f string, x ...interface{}) error {
- return syntaxError{errors.New(f, x...)}
- }
- // Unmarshal parses b as the JSON format.
- // It returns a Value, which represents the input as an AST.
- func Unmarshal(b []byte) (Value, error) {
- p := decoder{in: b}
- p.consume(0) // trim leading spaces
- v, err := p.unmarshalValue()
- if !p.nerr.Merge(err) {
- if e, ok := err.(syntaxError); ok {
- b = b[:len(b)-len(p.in)] // consumed input
- line := bytes.Count(b, []byte("\n")) + 1
- if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
- b = b[i+1:]
- }
- column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
- err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
- }
- return Value{}, err
- }
- if len(p.in) > 0 {
- return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
- }
- return v, p.nerr.E
- }
- type decoder struct {
- nerr errors.NonFatal
- in []byte
- }
- var literalRegexp = regexp.MustCompile("^(null|true|false)")
- func (p *decoder) unmarshalValue() (Value, error) {
- if len(p.in) == 0 {
- return Value{}, io.ErrUnexpectedEOF
- }
- switch p.in[0] {
- case 'n', 't', 'f':
- if n := matchWithDelim(literalRegexp, p.in); n > 0 {
- var v Value
- switch p.in[0] {
- case 'n':
- v = rawValueOf(nil, p.in[:n:n])
- case 't':
- v = rawValueOf(true, p.in[:n:n])
- case 'f':
- v = rawValueOf(false, p.in[:n:n])
- }
- p.consume(n)
- return v, nil
- }
- return Value{}, newSyntaxError("invalid %q as literal", errRegexp.Find(p.in))
- case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- return p.unmarshalNumber()
- case '"':
- return p.unmarshalString()
- case '[':
- return p.unmarshalArray()
- case '{':
- return p.unmarshalObject()
- default:
- return Value{}, newSyntaxError("invalid %q as value", errRegexp.Find(p.in))
- }
- }
- func (p *decoder) unmarshalArray() (Value, error) {
- b := p.in
- var elems []Value
- if err := p.consumeChar('[', "at start of array"); err != nil {
- return Value{}, err
- }
- if len(p.in) > 0 && p.in[0] != ']' {
- for len(p.in) > 0 {
- v, err := p.unmarshalValue()
- if !p.nerr.Merge(err) {
- return Value{}, err
- }
- elems = append(elems, v)
- if !p.tryConsumeChar(',') {
- break
- }
- }
- }
- if err := p.consumeChar(']', "at end of array"); err != nil {
- return Value{}, err
- }
- b = b[:len(b)-len(p.in)]
- return rawValueOf(elems, b[:len(b):len(b)]), nil
- }
- func (p *decoder) unmarshalObject() (Value, error) {
- b := p.in
- var items [][2]Value
- if err := p.consumeChar('{', "at start of object"); err != nil {
- return Value{}, err
- }
- if len(p.in) > 0 && p.in[0] != '}' {
- for len(p.in) > 0 {
- k, err := p.unmarshalString()
- if !p.nerr.Merge(err) {
- return Value{}, err
- }
- if err := p.consumeChar(':', "in object"); err != nil {
- return Value{}, err
- }
- v, err := p.unmarshalValue()
- if !p.nerr.Merge(err) {
- return Value{}, err
- }
- items = append(items, [2]Value{k, v})
- if !p.tryConsumeChar(',') {
- break
- }
- }
- }
- if err := p.consumeChar('}', "at end of object"); err != nil {
- return Value{}, err
- }
- b = b[:len(b)-len(p.in)]
- return rawValueOf(items, b[:len(b):len(b)]), nil
- }
- func (p *decoder) consumeChar(c byte, msg string) error {
- if p.tryConsumeChar(c) {
- return nil
- }
- if len(p.in) == 0 {
- return io.ErrUnexpectedEOF
- }
- return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
- }
- func (p *decoder) tryConsumeChar(c byte) bool {
- if len(p.in) > 0 && p.in[0] == c {
- p.consume(1)
- return true
- }
- return false
- }
- // consume consumes n bytes of input and any subsequent whitespace.
- func (p *decoder) consume(n int) {
- p.in = p.in[n:]
- for len(p.in) > 0 {
- switch p.in[0] {
- case ' ', '\n', '\r', '\t':
- p.in = p.in[1:]
- default:
- return
- }
- }
- }
- // Any sequence that looks like a non-delimiter (for error reporting).
- var errRegexp = regexp.MustCompile("^([-+._a-zA-Z0-9]{1,32}|.)")
- // matchWithDelim matches r with the input b and verifies that the match
- // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
- // As a special case, EOF is considered a delimiter.
- func matchWithDelim(r *regexp.Regexp, b []byte) int {
- n := len(r.Find(b))
- if n < len(b) {
- // Check that that the next character is a delimiter.
- c := b[n]
- notDelim := (c == '-' || c == '+' || c == '.' || c == '_' ||
- ('a' <= c && c <= 'z') ||
- ('A' <= c && c <= 'Z') ||
- ('0' <= c && c <= '9'))
- if notDelim {
- return 0
- }
- }
- return n
- }
|