number.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package text
  5. import (
  6. "bytes"
  7. "io"
  8. "math"
  9. "strconv"
  10. "google.golang.org/protobuf/internal/errors"
  11. )
  12. // marshalNumber encodes v as either a Bool, Int, Uint, Float32, or Float64.
  13. func (p *encoder) marshalNumber(v Value) error {
  14. var err error
  15. p.out, err = appendNumber(p.out, v)
  16. return err
  17. }
  18. func appendNumber(out []byte, v Value) ([]byte, error) {
  19. if len(v.raw) > 0 {
  20. switch v.Type() {
  21. case Bool, Int, Uint, Float32, Float64:
  22. return append(out, v.raw...), nil
  23. }
  24. }
  25. switch v.Type() {
  26. case Bool:
  27. if b, _ := v.Bool(); b {
  28. return append(out, "true"...), nil
  29. } else {
  30. return append(out, "false"...), nil
  31. }
  32. case Int:
  33. return strconv.AppendInt(out, int64(v.num), 10), nil
  34. case Uint:
  35. return strconv.AppendUint(out, uint64(v.num), 10), nil
  36. case Float32:
  37. return appendFloat(out, v, 32)
  38. case Float64:
  39. return appendFloat(out, v, 64)
  40. default:
  41. return nil, errors.New("invalid type %v, expected bool or number", v.Type())
  42. }
  43. }
  44. func appendFloat(out []byte, v Value, bitSize int) ([]byte, error) {
  45. switch n := math.Float64frombits(v.num); {
  46. case math.IsNaN(n):
  47. return append(out, "nan"...), nil
  48. case math.IsInf(n, +1):
  49. return append(out, "inf"...), nil
  50. case math.IsInf(n, -1):
  51. return append(out, "-inf"...), nil
  52. default:
  53. return strconv.AppendFloat(out, n, 'g', -1, bitSize), nil
  54. }
  55. }
  56. // These regular expressions were derived by reverse engineering the C++ code
  57. // in tokenizer.cc and text_format.cc.
  58. var (
  59. literals = map[string]interface{}{
  60. // These exact literals are the ones supported in C++.
  61. // In C++, a 1-bit unsigned integers is also allowed to represent
  62. // a boolean. This is handled in Value.Bool.
  63. "t": true,
  64. "true": true,
  65. "True": true,
  66. "f": false,
  67. "false": false,
  68. "False": false,
  69. // C++ permits "-nan" and the case-insensitive variants of these.
  70. // However, Go continues to be case-sensitive.
  71. "nan": math.NaN(),
  72. "inf": math.Inf(+1),
  73. "-inf": math.Inf(-1),
  74. }
  75. )
  76. // unmarshalNumber decodes a Bool, Int, Uint, or Float64 from the input.
  77. func (p *decoder) unmarshalNumber() (Value, error) {
  78. v, n, err := consumeNumber(p.in)
  79. p.consume(n)
  80. return v, err
  81. }
  82. func consumeNumber(in []byte) (Value, int, error) {
  83. if len(in) == 0 {
  84. return Value{}, 0, io.ErrUnexpectedEOF
  85. }
  86. if v, n := matchLiteral(in); n > 0 {
  87. return rawValueOf(v, in[:n]), n, nil
  88. }
  89. num, ok := parseNumber(in)
  90. if !ok {
  91. return Value{}, 0, newSyntaxError("invalid %q as number or bool", errRegexp.Find(in))
  92. }
  93. if num.typ == numFloat {
  94. f, err := strconv.ParseFloat(string(num.value), 64)
  95. if err != nil {
  96. return Value{}, 0, err
  97. }
  98. return rawValueOf(f, in[:num.size]), num.size, nil
  99. }
  100. if num.neg {
  101. v, err := strconv.ParseInt(string(num.value), 0, 64)
  102. if err != nil {
  103. return Value{}, 0, err
  104. }
  105. return rawValueOf(v, num.value), num.size, nil
  106. }
  107. v, err := strconv.ParseUint(string(num.value), 0, 64)
  108. if err != nil {
  109. return Value{}, 0, err
  110. }
  111. return rawValueOf(v, num.value), num.size, nil
  112. }
  113. func matchLiteral(in []byte) (interface{}, int) {
  114. switch in[0] {
  115. case 't', 'T':
  116. rest := in[1:]
  117. if len(rest) == 0 || isDelim(rest[0]) {
  118. return true, 1
  119. }
  120. if n := matchStringWithDelim("rue", rest); n > 0 {
  121. return true, 4
  122. }
  123. case 'f', 'F':
  124. rest := in[1:]
  125. if len(rest) == 0 || isDelim(rest[0]) {
  126. return false, 1
  127. }
  128. if n := matchStringWithDelim("alse", rest); n > 0 {
  129. return false, 5
  130. }
  131. case 'n':
  132. if n := matchStringWithDelim("nan", in); n > 0 {
  133. return math.NaN(), 3
  134. }
  135. case 'i':
  136. if n := matchStringWithDelim("inf", in); n > 0 {
  137. return math.Inf(1), 3
  138. }
  139. case '-':
  140. if n := matchStringWithDelim("-inf", in); n > 0 {
  141. return math.Inf(-1), 4
  142. }
  143. }
  144. return nil, 0
  145. }
  146. func matchStringWithDelim(s string, b []byte) int {
  147. if !bytes.HasPrefix(b, []byte(s)) {
  148. return 0
  149. }
  150. n := len(s)
  151. if n < len(b) && !isDelim(b[n]) {
  152. return 0
  153. }
  154. return n
  155. }
  156. type numType uint8
  157. const (
  158. numDec numType = (1 << iota) / 2
  159. numHex
  160. numOct
  161. numFloat
  162. )
  163. // number is the result of parsing out a valid number from parseNumber. It
  164. // contains data for doing float or integer conversion via the strconv package.
  165. type number struct {
  166. typ numType
  167. neg bool
  168. // Size of input taken up by the number. This may not be the same as
  169. // len(number.value).
  170. size int
  171. // Bytes for doing strconv.Parse{Float,Int,Uint} conversion.
  172. value []byte
  173. }
  174. // parseNumber constructs a number object from given input. It allows for the
  175. // following patterns:
  176. // integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*)
  177. // float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?)
  178. func parseNumber(input []byte) (number, bool) {
  179. var size int
  180. var neg bool
  181. typ := numDec
  182. s := input
  183. if len(s) == 0 {
  184. return number{}, false
  185. }
  186. // Optional -
  187. if s[0] == '-' {
  188. neg = true
  189. s = s[1:]
  190. size++
  191. if len(s) == 0 {
  192. return number{}, false
  193. }
  194. }
  195. // C++ allows for whitespace and comments in between the negative sign and
  196. // the rest of the number. This logic currently does not but is consistent
  197. // with v1.
  198. switch {
  199. case s[0] == '0':
  200. if len(s) > 1 {
  201. switch {
  202. case s[1] == 'x' || s[1] == 'X':
  203. // Parse as hex number.
  204. typ = numHex
  205. n := 2
  206. s = s[2:]
  207. for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') ||
  208. ('a' <= s[0] && s[0] <= 'f') ||
  209. ('A' <= s[0] && s[0] <= 'F')) {
  210. s = s[1:]
  211. n++
  212. }
  213. if n == 2 {
  214. return number{}, false
  215. }
  216. size += n
  217. case '0' <= s[1] && s[1] <= '7':
  218. // Parse as octal number.
  219. typ = numOct
  220. n := 2
  221. s = s[2:]
  222. for len(s) > 0 && '0' <= s[0] && s[0] <= '7' {
  223. s = s[1:]
  224. n++
  225. }
  226. size += n
  227. }
  228. if typ&(numHex|numOct) > 0 {
  229. if len(s) > 0 && !isDelim(s[0]) {
  230. return number{}, false
  231. }
  232. return number{
  233. typ: typ,
  234. size: size,
  235. neg: neg,
  236. value: input[:size],
  237. }, true
  238. }
  239. }
  240. s = s[1:]
  241. size++
  242. case '1' <= s[0] && s[0] <= '9':
  243. n := 1
  244. s = s[1:]
  245. for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
  246. s = s[1:]
  247. n++
  248. }
  249. size += n
  250. case s[0] == '.':
  251. // Handled below.
  252. default:
  253. return number{}, false
  254. }
  255. // . followed by 0 or more digits.
  256. if len(s) > 0 && s[0] == '.' {
  257. typ = numFloat
  258. n := 1
  259. s = s[1:]
  260. for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
  261. s = s[1:]
  262. n++
  263. }
  264. size += n
  265. }
  266. // e or E followed by an optional - or + and 1 or more digits.
  267. if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
  268. typ = numFloat
  269. s = s[1:]
  270. n := 1
  271. if s[0] == '+' || s[0] == '-' {
  272. s = s[1:]
  273. n++
  274. if len(s) == 0 {
  275. return number{}, false
  276. }
  277. }
  278. for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
  279. s = s[1:]
  280. n++
  281. }
  282. size += n
  283. }
  284. // At this point, input[:size] contains a valid number that can be converted
  285. // via strconv.Parse{Float,Int,Uint}.
  286. value := input[:size]
  287. // Optional suffix f or F for floats.
  288. if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') {
  289. typ = numFloat
  290. s = s[1:]
  291. size++
  292. }
  293. // Check that next byte is a delimiter or it is at the end.
  294. if len(s) > 0 && !isDelim(s[0]) {
  295. return number{}, false
  296. }
  297. return number{
  298. typ: typ,
  299. size: size,
  300. neg: neg,
  301. value: value,
  302. }, true
  303. }