123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314 |
- // Copyright 2018 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package text
- import (
- "bytes"
- "io"
- "regexp"
- "strconv"
- "unicode/utf8"
- "google.golang.org/protobuf/internal/errors"
- "google.golang.org/protobuf/reflect/protoreflect"
- )
- type syntaxError struct{ error }
- func newSyntaxError(f string, x ...interface{}) error {
- return syntaxError{errors.New(f, x...)}
- }
- // Unmarshal parses b as the proto text format.
- // It returns a Value, which is always of the Message type.
- func Unmarshal(b []byte) (Value, error) {
- p := decoder{in: b}
- p.consume(0) // trim leading spaces or comments
- v, err := p.unmarshalMessage(false)
- if err != nil {
- if e, ok := err.(syntaxError); ok {
- b = b[:len(b)-len(p.in)] // consumed input
- line := bytes.Count(b, []byte("\n")) + 1
- if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
- b = b[i+1:]
- }
- column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
- err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
- }
- return Value{}, err
- }
- if len(p.in) > 0 {
- return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
- }
- return v, nil
- }
- type decoder struct {
- in []byte
- }
- func (p *decoder) unmarshalList() (Value, error) {
- b := p.in
- var elems []Value
- if err := p.consumeChar('[', "at start of list"); err != nil {
- return Value{}, err
- }
- if len(p.in) > 0 && p.in[0] != ']' {
- for len(p.in) > 0 {
- v, err := p.unmarshalValue()
- if err != nil {
- return Value{}, err
- }
- elems = append(elems, v)
- if !p.tryConsumeChar(',') {
- break
- }
- }
- }
- if err := p.consumeChar(']', "at end of list"); err != nil {
- return Value{}, err
- }
- b = b[:len(b)-len(p.in)]
- return rawValueOf(elems, b[:len(b):len(b)]), nil
- }
- func (p *decoder) unmarshalMessage(checkDelims bool) (Value, error) {
- b := p.in
- var items [][2]Value
- delims := [2]byte{'{', '}'}
- if len(p.in) > 0 && p.in[0] == '<' {
- delims = [2]byte{'<', '>'}
- }
- if checkDelims {
- if err := p.consumeChar(delims[0], "at start of message"); err != nil {
- return Value{}, err
- }
- }
- for len(p.in) > 0 {
- if p.in[0] == '}' || p.in[0] == '>' {
- break
- }
- k, err := p.unmarshalKey()
- if err != nil {
- return Value{}, err
- }
- if !p.tryConsumeChar(':') && len(p.in) > 0 && p.in[0] != '{' && p.in[0] != '<' {
- return Value{}, newSyntaxError("expected ':' after message key")
- }
- v, err := p.unmarshalValue()
- if err != nil {
- return Value{}, err
- }
- if p.tryConsumeChar(';') || p.tryConsumeChar(',') {
- // always optional
- }
- items = append(items, [2]Value{k, v})
- }
- if checkDelims {
- if err := p.consumeChar(delims[1], "at end of message"); err != nil {
- return Value{}, err
- }
- }
- b = b[:len(b)-len(p.in)]
- return rawValueOf(items, b[:len(b):len(b)]), nil
- }
- // unmarshalKey parses the key, which may be a Name, String, or Uint.
- func (p *decoder) unmarshalKey() (v Value, err error) {
- if p.tryConsumeChar('[') {
- if len(p.in) == 0 {
- return Value{}, io.ErrUnexpectedEOF
- }
- if p.in[0] == '\'' || p.in[0] == '"' {
- // Historically, Go's parser allowed a string for the Any type URL.
- // This is specific to Go and contrary to the C++ implementation,
- // which does not support strings for the Any type URL.
- v, err = p.unmarshalString()
- if err != nil {
- return Value{}, err
- }
- } else {
- v, err = p.unmarshalURL()
- if err != nil {
- return Value{}, err
- }
- }
- if err := p.consumeChar(']', "at end of extension name"); err != nil {
- return Value{}, err
- }
- return v, nil
- }
- v, err = p.unmarshalName()
- if err == nil {
- return v, nil
- }
- v, err = p.unmarshalNumberKey()
- if err == nil {
- return v, nil
- }
- return Value{}, err
- }
- // unmarshalURL parses an Any type URL string. The C++ parser does not handle
- // many legal URL strings. This implementation is more liberal and allows for
- // the pattern ^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`).
- func (p *decoder) unmarshalURL() (Value, error) {
- s := p.in
- var size int
- for len(s) > 0 && (s[0] == '-' || s[0] == '_' ||
- ('0' <= s[0] && s[0] <= '9') ||
- ('a' <= s[0] && s[0] <= 'z') ||
- ('A' <= s[0] && s[0] <= 'Z')) {
- s = s[1:]
- size++
- if len(s) > 0 && (s[0] == '/' || s[0] == '.') {
- s = s[1:]
- size++
- }
- }
- // Last character cannot be '.' or '/'.
- // Next byte should either be a delimiter or it is at the end.
- if size == 0 || p.in[size-1] == '.' || p.in[size-1] == '/' ||
- (len(s) > 0 && !isDelim(s[0])) {
- return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
- }
- v := rawValueOf(string(p.in[:size]), p.in[:size:size])
- p.consume(size)
- return v, nil
- }
- // unmarshalNumberKey parses field number as key. Field numbers are non-negative
- // integers.
- func (p *decoder) unmarshalNumberKey() (Value, error) {
- num, ok := parseNumber(p.in)
- if !ok || num.neg || num.typ == numFloat {
- return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
- }
- v, err := strconv.ParseUint(string(num.value), 0, 64)
- if err != nil {
- return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
- }
- p.consume(num.size)
- return rawValueOf(v, num.value), nil
- }
- func (p *decoder) unmarshalValue() (Value, error) {
- if len(p.in) == 0 {
- return Value{}, io.ErrUnexpectedEOF
- }
- switch p.in[0] {
- case '"', '\'':
- return p.unmarshalStrings()
- case '[':
- return p.unmarshalList()
- case '{', '<':
- return p.unmarshalMessage(true)
- default:
- n, ok := consumeName(p.in)
- if ok && literals[string(p.in[:n])] == nil {
- v := rawValueOf(protoreflect.Name(p.in[:n]), p.in[:n:n])
- p.consume(n)
- return v, nil
- }
- return p.unmarshalNumber()
- }
- }
- // unmarshalName unmarshals an unquoted proto identifier.
- // Regular expression that matches an identifier: `^[_a-zA-Z][_a-zA-Z0-9]*`
- //
- // E.g., `field_name` => ValueOf(protoreflect.Name("field_name"))
- func (p *decoder) unmarshalName() (Value, error) {
- n, ok := consumeName(p.in)
- if !ok {
- return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
- }
- v := rawValueOf(protoreflect.Name(p.in[:n]), p.in[:n:n])
- p.consume(n)
- return v, nil
- }
- func consumeName(input []byte) (int, bool) {
- var n int
- s := input
- if len(s) == 0 {
- return 0, false
- }
- switch {
- case s[0] == '_',
- 'a' <= s[0] && s[0] <= 'z',
- 'A' <= s[0] && s[0] <= 'Z':
- s = s[1:]
- n++
- default:
- return 0, false
- }
- for len(s) > 0 && (s[0] == '_' ||
- 'a' <= s[0] && s[0] <= 'z' ||
- 'A' <= s[0] && s[0] <= 'Z' ||
- '0' <= s[0] && s[0] <= '9') {
- s = s[1:]
- n++
- }
- if len(s) > 0 && !isDelim(s[0]) {
- return 0, false
- }
- return n, true
- }
- func (p *decoder) consumeChar(c byte, msg string) error {
- if p.tryConsumeChar(c) {
- return nil
- }
- if len(p.in) == 0 {
- return io.ErrUnexpectedEOF
- }
- return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
- }
- func (p *decoder) tryConsumeChar(c byte) bool {
- if len(p.in) > 0 && p.in[0] == c {
- p.consume(1)
- return true
- }
- return false
- }
- // consume consumes n bytes of input and any subsequent whitespace or comments.
- func (p *decoder) consume(n int) {
- p.in = p.in[n:]
- for len(p.in) > 0 {
- switch p.in[0] {
- case ' ', '\n', '\r', '\t':
- p.in = p.in[1:]
- case '#':
- if i := bytes.IndexByte(p.in, '\n'); i >= 0 {
- p.in = p.in[i+len("\n"):]
- } else {
- p.in = nil
- }
- default:
- return
- }
- }
- }
- // Any sequence that looks like a non-delimiter (for error reporting).
- var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9\/]+|.)`)
- // isDelim returns true if given byte is a delimiter character.
- func isDelim(c byte) bool {
- return !(c == '-' || c == '+' || c == '.' || c == '_' ||
- ('a' <= c && c <= 'z') ||
- ('A' <= c && c <= 'Z') ||
- ('0' <= c && c <= '9'))
- }
|