text_parser.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880
  1. // Go support for Protocol Buffers - Google's data interchange format
  2. //
  3. // Copyright 2010 The Go Authors. All rights reserved.
  4. // https://github.com/golang/protobuf
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. package proto
  32. // Functions for parsing the Text protocol buffer format.
  33. // TODO: message sets.
  34. import (
  35. "encoding"
  36. "errors"
  37. "fmt"
  38. "reflect"
  39. "strconv"
  40. "strings"
  41. "unicode/utf8"
  42. )
  43. type ParseError struct {
  44. Message string
  45. Line int // 1-based line number
  46. Offset int // 0-based byte offset from start of input
  47. }
  48. func (p *ParseError) Error() string {
  49. if p.Line == 1 {
  50. // show offset only for first line
  51. return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
  52. }
  53. return fmt.Sprintf("line %d: %v", p.Line, p.Message)
  54. }
  55. type token struct {
  56. value string
  57. err *ParseError
  58. line int // line number
  59. offset int // byte number from start of input, not start of line
  60. unquoted string // the unquoted version of value, if it was a quoted string
  61. }
  62. func (t *token) String() string {
  63. if t.err == nil {
  64. return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
  65. }
  66. return fmt.Sprintf("parse error: %v", t.err)
  67. }
  68. type textParser struct {
  69. s string // remaining input
  70. done bool // whether the parsing is finished (success or error)
  71. backed bool // whether back() was called
  72. offset, line int
  73. cur token
  74. }
  75. func newTextParser(s string) *textParser {
  76. p := new(textParser)
  77. p.s = s
  78. p.line = 1
  79. p.cur.line = 1
  80. return p
  81. }
  82. func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
  83. pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
  84. p.cur.err = pe
  85. p.done = true
  86. return pe
  87. }
  88. // Numbers and identifiers are matched by [-+._A-Za-z0-9]
  89. func isIdentOrNumberChar(c byte) bool {
  90. switch {
  91. case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
  92. return true
  93. case '0' <= c && c <= '9':
  94. return true
  95. }
  96. switch c {
  97. case '-', '+', '.', '_':
  98. return true
  99. }
  100. return false
  101. }
  102. func isWhitespace(c byte) bool {
  103. switch c {
  104. case ' ', '\t', '\n', '\r':
  105. return true
  106. }
  107. return false
  108. }
  109. func isQuote(c byte) bool {
  110. switch c {
  111. case '"', '\'':
  112. return true
  113. }
  114. return false
  115. }
  116. func (p *textParser) skipWhitespace() {
  117. i := 0
  118. for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
  119. if p.s[i] == '#' {
  120. // comment; skip to end of line or input
  121. for i < len(p.s) && p.s[i] != '\n' {
  122. i++
  123. }
  124. if i == len(p.s) {
  125. break
  126. }
  127. }
  128. if p.s[i] == '\n' {
  129. p.line++
  130. }
  131. i++
  132. }
  133. p.offset += i
  134. p.s = p.s[i:len(p.s)]
  135. if len(p.s) == 0 {
  136. p.done = true
  137. }
  138. }
  139. func (p *textParser) advance() {
  140. // Skip whitespace
  141. p.skipWhitespace()
  142. if p.done {
  143. return
  144. }
  145. // Start of non-whitespace
  146. p.cur.err = nil
  147. p.cur.offset, p.cur.line = p.offset, p.line
  148. p.cur.unquoted = ""
  149. switch p.s[0] {
  150. case '<', '>', '{', '}', ':', '[', ']', ';', ',', '/':
  151. // Single symbol
  152. p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
  153. case '"', '\'':
  154. // Quoted string
  155. i := 1
  156. for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
  157. if p.s[i] == '\\' && i+1 < len(p.s) {
  158. // skip escaped char
  159. i++
  160. }
  161. i++
  162. }
  163. if i >= len(p.s) || p.s[i] != p.s[0] {
  164. p.errorf("unmatched quote")
  165. return
  166. }
  167. unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
  168. if err != nil {
  169. p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err)
  170. return
  171. }
  172. p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
  173. p.cur.unquoted = unq
  174. default:
  175. i := 0
  176. for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
  177. i++
  178. }
  179. if i == 0 {
  180. p.errorf("unexpected byte %#x", p.s[0])
  181. return
  182. }
  183. p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
  184. }
  185. p.offset += len(p.cur.value)
  186. }
  187. var (
  188. errBadUTF8 = errors.New("proto: bad UTF-8")
  189. errBadHex = errors.New("proto: bad hexadecimal")
  190. )
  191. func unquoteC(s string, quote rune) (string, error) {
  192. // This is based on C++'s tokenizer.cc.
  193. // Despite its name, this is *not* parsing C syntax.
  194. // For instance, "\0" is an invalid quoted string.
  195. // Avoid allocation in trivial cases.
  196. simple := true
  197. for _, r := range s {
  198. if r == '\\' || r == quote {
  199. simple = false
  200. break
  201. }
  202. }
  203. if simple {
  204. return s, nil
  205. }
  206. buf := make([]byte, 0, 3*len(s)/2)
  207. for len(s) > 0 {
  208. r, n := utf8.DecodeRuneInString(s)
  209. if r == utf8.RuneError && n == 1 {
  210. return "", errBadUTF8
  211. }
  212. s = s[n:]
  213. if r != '\\' {
  214. if r < utf8.RuneSelf {
  215. buf = append(buf, byte(r))
  216. } else {
  217. buf = append(buf, string(r)...)
  218. }
  219. continue
  220. }
  221. ch, tail, err := unescape(s)
  222. if err != nil {
  223. return "", err
  224. }
  225. buf = append(buf, ch...)
  226. s = tail
  227. }
  228. return string(buf), nil
  229. }
  230. func unescape(s string) (ch string, tail string, err error) {
  231. r, n := utf8.DecodeRuneInString(s)
  232. if r == utf8.RuneError && n == 1 {
  233. return "", "", errBadUTF8
  234. }
  235. s = s[n:]
  236. switch r {
  237. case 'a':
  238. return "\a", s, nil
  239. case 'b':
  240. return "\b", s, nil
  241. case 'f':
  242. return "\f", s, nil
  243. case 'n':
  244. return "\n", s, nil
  245. case 'r':
  246. return "\r", s, nil
  247. case 't':
  248. return "\t", s, nil
  249. case 'v':
  250. return "\v", s, nil
  251. case '?':
  252. return "?", s, nil // trigraph workaround
  253. case '\'', '"', '\\':
  254. return string(r), s, nil
  255. case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X':
  256. if len(s) < 2 {
  257. return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
  258. }
  259. base := 8
  260. ss := s[:2]
  261. s = s[2:]
  262. if r == 'x' || r == 'X' {
  263. base = 16
  264. } else {
  265. ss = string(r) + ss
  266. }
  267. i, err := strconv.ParseUint(ss, base, 8)
  268. if err != nil {
  269. return "", "", err
  270. }
  271. return string([]byte{byte(i)}), s, nil
  272. case 'u', 'U':
  273. n := 4
  274. if r == 'U' {
  275. n = 8
  276. }
  277. if len(s) < n {
  278. return "", "", fmt.Errorf(`\%c requires %d digits`, r, n)
  279. }
  280. bs := make([]byte, n/2)
  281. for i := 0; i < n; i += 2 {
  282. a, ok1 := unhex(s[i])
  283. b, ok2 := unhex(s[i+1])
  284. if !ok1 || !ok2 {
  285. return "", "", errBadHex
  286. }
  287. bs[i/2] = a<<4 | b
  288. }
  289. s = s[n:]
  290. return string(bs), s, nil
  291. }
  292. return "", "", fmt.Errorf(`unknown escape \%c`, r)
  293. }
  294. // Adapted from src/pkg/strconv/quote.go.
  295. func unhex(b byte) (v byte, ok bool) {
  296. switch {
  297. case '0' <= b && b <= '9':
  298. return b - '0', true
  299. case 'a' <= b && b <= 'f':
  300. return b - 'a' + 10, true
  301. case 'A' <= b && b <= 'F':
  302. return b - 'A' + 10, true
  303. }
  304. return 0, false
  305. }
  306. // Back off the parser by one token. Can only be done between calls to next().
  307. // It makes the next advance() a no-op.
  308. func (p *textParser) back() { p.backed = true }
  309. // Advances the parser and returns the new current token.
  310. func (p *textParser) next() *token {
  311. if p.backed || p.done {
  312. p.backed = false
  313. return &p.cur
  314. }
  315. p.advance()
  316. if p.done {
  317. p.cur.value = ""
  318. } else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) {
  319. // Look for multiple quoted strings separated by whitespace,
  320. // and concatenate them.
  321. cat := p.cur
  322. for {
  323. p.skipWhitespace()
  324. if p.done || !isQuote(p.s[0]) {
  325. break
  326. }
  327. p.advance()
  328. if p.cur.err != nil {
  329. return &p.cur
  330. }
  331. cat.value += " " + p.cur.value
  332. cat.unquoted += p.cur.unquoted
  333. }
  334. p.done = false // parser may have seen EOF, but we want to return cat
  335. p.cur = cat
  336. }
  337. return &p.cur
  338. }
  339. func (p *textParser) consumeToken(s string) error {
  340. tok := p.next()
  341. if tok.err != nil {
  342. return tok.err
  343. }
  344. if tok.value != s {
  345. p.back()
  346. return p.errorf("expected %q, found %q", s, tok.value)
  347. }
  348. return nil
  349. }
  350. // Return a RequiredNotSetError indicating which required field was not set.
  351. func (p *textParser) missingRequiredFieldError(sv reflect.Value) *RequiredNotSetError {
  352. st := sv.Type()
  353. sprops := GetProperties(st)
  354. for i := 0; i < st.NumField(); i++ {
  355. if !isNil(sv.Field(i)) {
  356. continue
  357. }
  358. props := sprops.Prop[i]
  359. if props.Required {
  360. return &RequiredNotSetError{fmt.Sprintf("%v.%v", st, props.OrigName)}
  361. }
  362. }
  363. return &RequiredNotSetError{fmt.Sprintf("%v.<unknown field name>", st)} // should not happen
  364. }
  365. // Returns the index in the struct for the named field, as well as the parsed tag properties.
  366. func structFieldByName(sprops *StructProperties, name string) (int, *Properties, bool) {
  367. i, ok := sprops.decoderOrigNames[name]
  368. if ok {
  369. return i, sprops.Prop[i], true
  370. }
  371. return -1, nil, false
  372. }
  373. // Consume a ':' from the input stream (if the next token is a colon),
  374. // returning an error if a colon is needed but not present.
  375. func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
  376. tok := p.next()
  377. if tok.err != nil {
  378. return tok.err
  379. }
  380. if tok.value != ":" {
  381. // Colon is optional when the field is a group or message.
  382. needColon := true
  383. switch props.Wire {
  384. case "group":
  385. needColon = false
  386. case "bytes":
  387. // A "bytes" field is either a message, a string, or a repeated field;
  388. // those three become *T, *string and []T respectively, so we can check for
  389. // this field being a pointer to a non-string.
  390. if typ.Kind() == reflect.Ptr {
  391. // *T or *string
  392. if typ.Elem().Kind() == reflect.String {
  393. break
  394. }
  395. } else if typ.Kind() == reflect.Slice {
  396. // []T or []*T
  397. if typ.Elem().Kind() != reflect.Ptr {
  398. break
  399. }
  400. } else if typ.Kind() == reflect.String {
  401. // The proto3 exception is for a string field,
  402. // which requires a colon.
  403. break
  404. }
  405. needColon = false
  406. }
  407. if needColon {
  408. return p.errorf("expected ':', found %q", tok.value)
  409. }
  410. p.back()
  411. }
  412. return nil
  413. }
  414. func (p *textParser) readStruct(sv reflect.Value, terminator string) error {
  415. st := sv.Type()
  416. sprops := GetProperties(st)
  417. reqCount := sprops.reqCount
  418. var reqFieldErr error
  419. fieldSet := make(map[string]bool)
  420. // A struct is a sequence of "name: value", terminated by one of
  421. // '>' or '}', or the end of the input. A name may also be
  422. // "[extension]" or "[type/url]".
  423. //
  424. // The whole struct can also be an expanded Any message, like:
  425. // [type/url] < ... struct contents ... >
  426. for {
  427. tok := p.next()
  428. if tok.err != nil {
  429. return tok.err
  430. }
  431. if tok.value == terminator {
  432. break
  433. }
  434. if tok.value == "[" {
  435. // Looks like an extension or an Any.
  436. //
  437. // TODO: Check whether we need to handle
  438. // namespace rooted names (e.g. ".something.Foo").
  439. extName, err := p.consumeExtName()
  440. if err != nil {
  441. return err
  442. }
  443. if s := strings.LastIndex(extName, "/"); s >= 0 {
  444. // If it contains a slash, it's an Any type URL.
  445. messageName := extName[s+1:]
  446. mt := MessageType(messageName)
  447. if mt == nil {
  448. return p.errorf("unrecognized message %q in google.protobuf.Any", messageName)
  449. }
  450. tok = p.next()
  451. if tok.err != nil {
  452. return tok.err
  453. }
  454. // consume an optional colon
  455. if tok.value == ":" {
  456. tok = p.next()
  457. if tok.err != nil {
  458. return tok.err
  459. }
  460. }
  461. var terminator string
  462. switch tok.value {
  463. case "<":
  464. terminator = ">"
  465. case "{":
  466. terminator = "}"
  467. default:
  468. return p.errorf("expected '{' or '<', found %q", tok.value)
  469. }
  470. v := reflect.New(mt.Elem())
  471. if pe := p.readStruct(v.Elem(), terminator); pe != nil {
  472. return pe
  473. }
  474. b, err := Marshal(v.Interface().(Message))
  475. if err != nil {
  476. return p.errorf("failed to marshal message of type %q: %v", messageName, err)
  477. }
  478. sv.FieldByName("TypeUrl").SetString(extName)
  479. sv.FieldByName("Value").SetBytes(b)
  480. continue
  481. }
  482. var desc *ExtensionDesc
  483. // This could be faster, but it's functional.
  484. // TODO: Do something smarter than a linear scan.
  485. for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) {
  486. if d.Name == extName {
  487. desc = d
  488. break
  489. }
  490. }
  491. if desc == nil {
  492. return p.errorf("unrecognized extension %q", extName)
  493. }
  494. props := &Properties{}
  495. props.Parse(desc.Tag)
  496. typ := reflect.TypeOf(desc.ExtensionType)
  497. if err := p.checkForColon(props, typ); err != nil {
  498. return err
  499. }
  500. rep := desc.repeated()
  501. // Read the extension structure, and set it in
  502. // the value we're constructing.
  503. var ext reflect.Value
  504. if !rep {
  505. ext = reflect.New(typ).Elem()
  506. } else {
  507. ext = reflect.New(typ.Elem()).Elem()
  508. }
  509. if err := p.readAny(ext, props); err != nil {
  510. if _, ok := err.(*RequiredNotSetError); !ok {
  511. return err
  512. }
  513. reqFieldErr = err
  514. }
  515. ep := sv.Addr().Interface().(Message)
  516. if !rep {
  517. SetExtension(ep, desc, ext.Interface())
  518. } else {
  519. old, err := GetExtension(ep, desc)
  520. var sl reflect.Value
  521. if err == nil {
  522. sl = reflect.ValueOf(old) // existing slice
  523. } else {
  524. sl = reflect.MakeSlice(typ, 0, 1)
  525. }
  526. sl = reflect.Append(sl, ext)
  527. SetExtension(ep, desc, sl.Interface())
  528. }
  529. if err := p.consumeOptionalSeparator(); err != nil {
  530. return err
  531. }
  532. continue
  533. }
  534. // This is a normal, non-extension field.
  535. name := tok.value
  536. var dst reflect.Value
  537. fi, props, ok := structFieldByName(sprops, name)
  538. if ok {
  539. dst = sv.Field(fi)
  540. } else if oop, ok := sprops.OneofTypes[name]; ok {
  541. // It is a oneof.
  542. props = oop.Prop
  543. nv := reflect.New(oop.Type.Elem())
  544. dst = nv.Elem().Field(0)
  545. sv.Field(oop.Field).Set(nv)
  546. }
  547. if !dst.IsValid() {
  548. return p.errorf("unknown field name %q in %v", name, st)
  549. }
  550. if dst.Kind() == reflect.Map {
  551. // Consume any colon.
  552. if err := p.checkForColon(props, dst.Type()); err != nil {
  553. return err
  554. }
  555. // Construct the map if it doesn't already exist.
  556. if dst.IsNil() {
  557. dst.Set(reflect.MakeMap(dst.Type()))
  558. }
  559. key := reflect.New(dst.Type().Key()).Elem()
  560. val := reflect.New(dst.Type().Elem()).Elem()
  561. // The map entry should be this sequence of tokens:
  562. // < key : KEY value : VALUE >
  563. // However, implementations may omit key or value, and technically
  564. // we should support them in any order. See b/28924776 for a time
  565. // this went wrong.
  566. tok := p.next()
  567. var terminator string
  568. switch tok.value {
  569. case "<":
  570. terminator = ">"
  571. case "{":
  572. terminator = "}"
  573. default:
  574. return p.errorf("expected '{' or '<', found %q", tok.value)
  575. }
  576. for {
  577. tok := p.next()
  578. if tok.err != nil {
  579. return tok.err
  580. }
  581. if tok.value == terminator {
  582. break
  583. }
  584. switch tok.value {
  585. case "key":
  586. if err := p.consumeToken(":"); err != nil {
  587. return err
  588. }
  589. if err := p.readAny(key, props.mkeyprop); err != nil {
  590. return err
  591. }
  592. if err := p.consumeOptionalSeparator(); err != nil {
  593. return err
  594. }
  595. case "value":
  596. if err := p.checkForColon(props.mvalprop, dst.Type().Elem()); err != nil {
  597. return err
  598. }
  599. if err := p.readAny(val, props.mvalprop); err != nil {
  600. return err
  601. }
  602. if err := p.consumeOptionalSeparator(); err != nil {
  603. return err
  604. }
  605. default:
  606. p.back()
  607. return p.errorf(`expected "key", "value", or %q, found %q`, terminator, tok.value)
  608. }
  609. }
  610. dst.SetMapIndex(key, val)
  611. continue
  612. }
  613. // Check that it's not already set if it's not a repeated field.
  614. if !props.Repeated && fieldSet[name] {
  615. return p.errorf("non-repeated field %q was repeated", name)
  616. }
  617. if err := p.checkForColon(props, dst.Type()); err != nil {
  618. return err
  619. }
  620. // Parse into the field.
  621. fieldSet[name] = true
  622. if err := p.readAny(dst, props); err != nil {
  623. if _, ok := err.(*RequiredNotSetError); !ok {
  624. return err
  625. }
  626. reqFieldErr = err
  627. }
  628. if props.Required {
  629. reqCount--
  630. }
  631. if err := p.consumeOptionalSeparator(); err != nil {
  632. return err
  633. }
  634. }
  635. if reqCount > 0 {
  636. return p.missingRequiredFieldError(sv)
  637. }
  638. return reqFieldErr
  639. }
  640. // consumeExtName consumes extension name or expanded Any type URL and the
  641. // following ']'. It returns the name or URL consumed.
  642. func (p *textParser) consumeExtName() (string, error) {
  643. tok := p.next()
  644. if tok.err != nil {
  645. return "", tok.err
  646. }
  647. // If extension name or type url is quoted, it's a single token.
  648. if len(tok.value) > 2 && isQuote(tok.value[0]) && tok.value[len(tok.value)-1] == tok.value[0] {
  649. name, err := unquoteC(tok.value[1:len(tok.value)-1], rune(tok.value[0]))
  650. if err != nil {
  651. return "", err
  652. }
  653. return name, p.consumeToken("]")
  654. }
  655. // Consume everything up to "]"
  656. var parts []string
  657. for tok.value != "]" {
  658. parts = append(parts, tok.value)
  659. tok = p.next()
  660. if tok.err != nil {
  661. return "", p.errorf("unrecognized type_url or extension name: %s", tok.err)
  662. }
  663. }
  664. return strings.Join(parts, ""), nil
  665. }
  666. // consumeOptionalSeparator consumes an optional semicolon or comma.
  667. // It is used in readStruct to provide backward compatibility.
  668. func (p *textParser) consumeOptionalSeparator() error {
  669. tok := p.next()
  670. if tok.err != nil {
  671. return tok.err
  672. }
  673. if tok.value != ";" && tok.value != "," {
  674. p.back()
  675. }
  676. return nil
  677. }
  678. func (p *textParser) readAny(v reflect.Value, props *Properties) error {
  679. tok := p.next()
  680. if tok.err != nil {
  681. return tok.err
  682. }
  683. if tok.value == "" {
  684. return p.errorf("unexpected EOF")
  685. }
  686. switch fv := v; fv.Kind() {
  687. case reflect.Slice:
  688. at := v.Type()
  689. if at.Elem().Kind() == reflect.Uint8 {
  690. // Special case for []byte
  691. if tok.value[0] != '"' && tok.value[0] != '\'' {
  692. // Deliberately written out here, as the error after
  693. // this switch statement would write "invalid []byte: ...",
  694. // which is not as user-friendly.
  695. return p.errorf("invalid string: %v", tok.value)
  696. }
  697. bytes := []byte(tok.unquoted)
  698. fv.Set(reflect.ValueOf(bytes))
  699. return nil
  700. }
  701. // Repeated field.
  702. if tok.value == "[" {
  703. // Repeated field with list notation, like [1,2,3].
  704. for {
  705. fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
  706. err := p.readAny(fv.Index(fv.Len()-1), props)
  707. if err != nil {
  708. return err
  709. }
  710. tok := p.next()
  711. if tok.err != nil {
  712. return tok.err
  713. }
  714. if tok.value == "]" {
  715. break
  716. }
  717. if tok.value != "," {
  718. return p.errorf("Expected ']' or ',' found %q", tok.value)
  719. }
  720. }
  721. return nil
  722. }
  723. // One value of the repeated field.
  724. p.back()
  725. fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
  726. return p.readAny(fv.Index(fv.Len()-1), props)
  727. case reflect.Bool:
  728. // Either "true", "false", 1 or 0.
  729. switch tok.value {
  730. case "true", "1":
  731. fv.SetBool(true)
  732. return nil
  733. case "false", "0":
  734. fv.SetBool(false)
  735. return nil
  736. }
  737. case reflect.Float32, reflect.Float64:
  738. v := tok.value
  739. // Ignore 'f' for compatibility with output generated by C++, but don't
  740. // remove 'f' when the value is "-inf" or "inf".
  741. if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" {
  742. v = v[:len(v)-1]
  743. }
  744. if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil {
  745. fv.SetFloat(f)
  746. return nil
  747. }
  748. case reflect.Int32:
  749. if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
  750. fv.SetInt(x)
  751. return nil
  752. }
  753. if len(props.Enum) == 0 {
  754. break
  755. }
  756. m, ok := enumValueMaps[props.Enum]
  757. if !ok {
  758. break
  759. }
  760. x, ok := m[tok.value]
  761. if !ok {
  762. break
  763. }
  764. fv.SetInt(int64(x))
  765. return nil
  766. case reflect.Int64:
  767. if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
  768. fv.SetInt(x)
  769. return nil
  770. }
  771. case reflect.Ptr:
  772. // A basic field (indirected through pointer), or a repeated message/group
  773. p.back()
  774. fv.Set(reflect.New(fv.Type().Elem()))
  775. return p.readAny(fv.Elem(), props)
  776. case reflect.String:
  777. if tok.value[0] == '"' || tok.value[0] == '\'' {
  778. fv.SetString(tok.unquoted)
  779. return nil
  780. }
  781. case reflect.Struct:
  782. var terminator string
  783. switch tok.value {
  784. case "{":
  785. terminator = "}"
  786. case "<":
  787. terminator = ">"
  788. default:
  789. return p.errorf("expected '{' or '<', found %q", tok.value)
  790. }
  791. // TODO: Handle nested messages which implement encoding.TextUnmarshaler.
  792. return p.readStruct(fv, terminator)
  793. case reflect.Uint32:
  794. if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
  795. fv.SetUint(uint64(x))
  796. return nil
  797. }
  798. case reflect.Uint64:
  799. if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
  800. fv.SetUint(x)
  801. return nil
  802. }
  803. }
  804. return p.errorf("invalid %v: %v", v.Type(), tok.value)
  805. }
  806. // UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb
  807. // before starting to unmarshal, so any existing data in pb is always removed.
  808. // If a required field is not set and no other error occurs,
  809. // UnmarshalText returns *RequiredNotSetError.
  810. func UnmarshalText(s string, pb Message) error {
  811. if um, ok := pb.(encoding.TextUnmarshaler); ok {
  812. err := um.UnmarshalText([]byte(s))
  813. return err
  814. }
  815. pb.Reset()
  816. v := reflect.ValueOf(pb)
  817. if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
  818. return pe
  819. }
  820. return nil
  821. }