123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337 |
- // Copyright 2018 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package text
- import (
- "bytes"
- "io"
- "math"
- "strconv"
- "google.golang.org/protobuf/internal/errors"
- )
- // marshalNumber encodes v as either a Bool, Int, Uint, Float32, or Float64.
- func (p *encoder) marshalNumber(v Value) error {
- var err error
- p.out, err = appendNumber(p.out, v)
- return err
- }
- func appendNumber(out []byte, v Value) ([]byte, error) {
- if len(v.raw) > 0 {
- switch v.Type() {
- case Bool, Int, Uint, Float32, Float64:
- return append(out, v.raw...), nil
- }
- }
- switch v.Type() {
- case Bool:
- if b, _ := v.Bool(); b {
- return append(out, "true"...), nil
- } else {
- return append(out, "false"...), nil
- }
- case Int:
- return strconv.AppendInt(out, int64(v.num), 10), nil
- case Uint:
- return strconv.AppendUint(out, uint64(v.num), 10), nil
- case Float32:
- return appendFloat(out, v, 32)
- case Float64:
- return appendFloat(out, v, 64)
- default:
- return nil, errors.New("invalid type %v, expected bool or number", v.Type())
- }
- }
- func appendFloat(out []byte, v Value, bitSize int) ([]byte, error) {
- switch n := math.Float64frombits(v.num); {
- case math.IsNaN(n):
- return append(out, "nan"...), nil
- case math.IsInf(n, +1):
- return append(out, "inf"...), nil
- case math.IsInf(n, -1):
- return append(out, "-inf"...), nil
- default:
- return strconv.AppendFloat(out, n, 'g', -1, bitSize), nil
- }
- }
- // These regular expressions were derived by reverse engineering the C++ code
- // in tokenizer.cc and text_format.cc.
- var (
- literals = map[string]interface{}{
- // These exact literals are the ones supported in C++.
- // In C++, a 1-bit unsigned integers is also allowed to represent
- // a boolean. This is handled in Value.Bool.
- "t": true,
- "true": true,
- "True": true,
- "f": false,
- "false": false,
- "False": false,
- // C++ permits "-nan" and the case-insensitive variants of these.
- // However, Go continues to be case-sensitive.
- "nan": math.NaN(),
- "inf": math.Inf(+1),
- "-inf": math.Inf(-1),
- }
- )
- // unmarshalNumber decodes a Bool, Int, Uint, or Float64 from the input.
- func (p *decoder) unmarshalNumber() (Value, error) {
- v, n, err := consumeNumber(p.in)
- p.consume(n)
- return v, err
- }
- func consumeNumber(in []byte) (Value, int, error) {
- if len(in) == 0 {
- return Value{}, 0, io.ErrUnexpectedEOF
- }
- if v, n := matchLiteral(in); n > 0 {
- return rawValueOf(v, in[:n]), n, nil
- }
- num, ok := parseNumber(in)
- if !ok {
- return Value{}, 0, newSyntaxError("invalid %q as number or bool", errRegexp.Find(in))
- }
- if num.typ == numFloat {
- f, err := strconv.ParseFloat(string(num.value), 64)
- if err != nil {
- return Value{}, 0, err
- }
- return rawValueOf(f, in[:num.size]), num.size, nil
- }
- if num.neg {
- v, err := strconv.ParseInt(string(num.value), 0, 64)
- if err != nil {
- return Value{}, 0, err
- }
- return rawValueOf(v, num.value), num.size, nil
- }
- v, err := strconv.ParseUint(string(num.value), 0, 64)
- if err != nil {
- return Value{}, 0, err
- }
- return rawValueOf(v, num.value), num.size, nil
- }
- func matchLiteral(in []byte) (interface{}, int) {
- switch in[0] {
- case 't', 'T':
- rest := in[1:]
- if len(rest) == 0 || isDelim(rest[0]) {
- return true, 1
- }
- if n := matchStringWithDelim("rue", rest); n > 0 {
- return true, 4
- }
- case 'f', 'F':
- rest := in[1:]
- if len(rest) == 0 || isDelim(rest[0]) {
- return false, 1
- }
- if n := matchStringWithDelim("alse", rest); n > 0 {
- return false, 5
- }
- case 'n':
- if n := matchStringWithDelim("nan", in); n > 0 {
- return math.NaN(), 3
- }
- case 'i':
- if n := matchStringWithDelim("inf", in); n > 0 {
- return math.Inf(1), 3
- }
- case '-':
- if n := matchStringWithDelim("-inf", in); n > 0 {
- return math.Inf(-1), 4
- }
- }
- return nil, 0
- }
- func matchStringWithDelim(s string, b []byte) int {
- if !bytes.HasPrefix(b, []byte(s)) {
- return 0
- }
- n := len(s)
- if n < len(b) && !isDelim(b[n]) {
- return 0
- }
- return n
- }
- type numType uint8
- const (
- numDec numType = (1 << iota) / 2
- numHex
- numOct
- numFloat
- )
- // number is the result of parsing out a valid number from parseNumber. It
- // contains data for doing float or integer conversion via the strconv package.
- type number struct {
- typ numType
- neg bool
- // Size of input taken up by the number. This may not be the same as
- // len(number.value).
- size int
- // Bytes for doing strconv.Parse{Float,Int,Uint} conversion.
- value []byte
- }
- // parseNumber constructs a number object from given input. It allows for the
- // following patterns:
- // integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*)
- // float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?)
- func parseNumber(input []byte) (number, bool) {
- var size int
- var neg bool
- typ := numDec
- s := input
- if len(s) == 0 {
- return number{}, false
- }
- // Optional -
- if s[0] == '-' {
- neg = true
- s = s[1:]
- size++
- if len(s) == 0 {
- return number{}, false
- }
- }
- // C++ allows for whitespace and comments in between the negative sign and
- // the rest of the number. This logic currently does not but is consistent
- // with v1.
- switch {
- case s[0] == '0':
- if len(s) > 1 {
- switch {
- case s[1] == 'x' || s[1] == 'X':
- // Parse as hex number.
- typ = numHex
- n := 2
- s = s[2:]
- for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') ||
- ('a' <= s[0] && s[0] <= 'f') ||
- ('A' <= s[0] && s[0] <= 'F')) {
- s = s[1:]
- n++
- }
- if n == 2 {
- return number{}, false
- }
- size += n
- case '0' <= s[1] && s[1] <= '7':
- // Parse as octal number.
- typ = numOct
- n := 2
- s = s[2:]
- for len(s) > 0 && '0' <= s[0] && s[0] <= '7' {
- s = s[1:]
- n++
- }
- size += n
- }
- if typ&(numHex|numOct) > 0 {
- if len(s) > 0 && !isDelim(s[0]) {
- return number{}, false
- }
- return number{
- typ: typ,
- size: size,
- neg: neg,
- value: input[:size],
- }, true
- }
- }
- s = s[1:]
- size++
- case '1' <= s[0] && s[0] <= '9':
- n := 1
- s = s[1:]
- for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
- s = s[1:]
- n++
- }
- size += n
- case s[0] == '.':
- // Handled below.
- default:
- return number{}, false
- }
- // . followed by 0 or more digits.
- if len(s) > 0 && s[0] == '.' {
- typ = numFloat
- n := 1
- s = s[1:]
- for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
- s = s[1:]
- n++
- }
- size += n
- }
- // e or E followed by an optional - or + and 1 or more digits.
- if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
- typ = numFloat
- s = s[1:]
- n := 1
- if s[0] == '+' || s[0] == '-' {
- s = s[1:]
- n++
- if len(s) == 0 {
- return number{}, false
- }
- }
- for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
- s = s[1:]
- n++
- }
- size += n
- }
- // At this point, input[:size] contains a valid number that can be converted
- // via strconv.Parse{Float,Int,Uint}.
- value := input[:size]
- // Optional suffix f or F for floats.
- if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') {
- typ = numFloat
- s = s[1:]
- size++
- }
- // Check that next byte is a delimiter or it is at the end.
- if len(s) > 0 && !isDelim(s[0]) {
- return number{}, false
- }
- return number{
- typ: typ,
- size: size,
- neg: neg,
- value: value,
- }, true
- }
|