text_parser.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890
  1. // Copyright 2010 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package proto
  5. // Functions for parsing the Text protocol buffer format.
  6. // TODO: message sets.
  7. import (
  8. "encoding"
  9. "errors"
  10. "fmt"
  11. "reflect"
  12. "strconv"
  13. "strings"
  14. "sync"
  15. "unicode/utf8"
  16. )
  17. // Error string emitted when deserializing Any and fields are already set
  18. const anyRepeatedlyUnpacked = "Any message unpacked multiple times, or %q already set"
  19. type ParseError struct {
  20. Message string
  21. Line int // 1-based line number
  22. Offset int // 0-based byte offset from start of input
  23. }
  24. func (p *ParseError) Error() string {
  25. if p.Line == 1 {
  26. // show offset only for first line
  27. return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
  28. }
  29. return fmt.Sprintf("line %d: %v", p.Line, p.Message)
  30. }
  31. type token struct {
  32. value string
  33. err *ParseError
  34. line int // line number
  35. offset int // byte number from start of input, not start of line
  36. unquoted string // the unquoted version of value, if it was a quoted string
  37. }
  38. func (t *token) String() string {
  39. if t.err == nil {
  40. return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
  41. }
  42. return fmt.Sprintf("parse error: %v", t.err)
  43. }
  44. type textParser struct {
  45. s string // remaining input
  46. done bool // whether the parsing is finished (success or error)
  47. backed bool // whether back() was called
  48. offset, line int
  49. cur token
  50. }
  51. func newTextParser(s string) *textParser {
  52. p := new(textParser)
  53. p.s = s
  54. p.line = 1
  55. p.cur.line = 1
  56. return p
  57. }
  58. func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
  59. pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
  60. p.cur.err = pe
  61. p.done = true
  62. return pe
  63. }
  64. // Numbers and identifiers are matched by [-+._A-Za-z0-9]
  65. func isIdentOrNumberChar(c byte) bool {
  66. switch {
  67. case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
  68. return true
  69. case '0' <= c && c <= '9':
  70. return true
  71. }
  72. switch c {
  73. case '-', '+', '.', '_':
  74. return true
  75. }
  76. return false
  77. }
  78. func isWhitespace(c byte) bool {
  79. switch c {
  80. case ' ', '\t', '\n', '\r':
  81. return true
  82. }
  83. return false
  84. }
  85. func isQuote(c byte) bool {
  86. switch c {
  87. case '"', '\'':
  88. return true
  89. }
  90. return false
  91. }
  92. func (p *textParser) skipWhitespace() {
  93. i := 0
  94. for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
  95. if p.s[i] == '#' {
  96. // comment; skip to end of line or input
  97. for i < len(p.s) && p.s[i] != '\n' {
  98. i++
  99. }
  100. if i == len(p.s) {
  101. break
  102. }
  103. }
  104. if p.s[i] == '\n' {
  105. p.line++
  106. }
  107. i++
  108. }
  109. p.offset += i
  110. p.s = p.s[i:len(p.s)]
  111. if len(p.s) == 0 {
  112. p.done = true
  113. }
  114. }
  115. func (p *textParser) advance() {
  116. // Skip whitespace
  117. p.skipWhitespace()
  118. if p.done {
  119. return
  120. }
  121. // Start of non-whitespace
  122. p.cur.err = nil
  123. p.cur.offset, p.cur.line = p.offset, p.line
  124. p.cur.unquoted = ""
  125. switch p.s[0] {
  126. case '<', '>', '{', '}', ':', '[', ']', ';', ',', '/':
  127. // Single symbol
  128. p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
  129. case '"', '\'':
  130. // Quoted string
  131. i := 1
  132. for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
  133. if p.s[i] == '\\' && i+1 < len(p.s) {
  134. // skip escaped char
  135. i++
  136. }
  137. i++
  138. }
  139. if i >= len(p.s) || p.s[i] != p.s[0] {
  140. p.errorf("unmatched quote")
  141. return
  142. }
  143. unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
  144. if err != nil {
  145. p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err)
  146. return
  147. }
  148. p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
  149. p.cur.unquoted = unq
  150. default:
  151. i := 0
  152. for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
  153. i++
  154. }
  155. if i == 0 {
  156. p.errorf("unexpected byte %#x", p.s[0])
  157. return
  158. }
  159. p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
  160. }
  161. p.offset += len(p.cur.value)
  162. }
  163. var (
  164. errBadUTF8 = errors.New("proto: bad UTF-8")
  165. )
  166. func unquoteC(s string, quote rune) (string, error) {
  167. // This is based on C++'s tokenizer.cc.
  168. // Despite its name, this is *not* parsing C syntax.
  169. // For instance, "\0" is an invalid quoted string.
  170. // Avoid allocation in trivial cases.
  171. simple := true
  172. for _, r := range s {
  173. if r == '\\' || r == quote {
  174. simple = false
  175. break
  176. }
  177. }
  178. if simple {
  179. return s, nil
  180. }
  181. buf := make([]byte, 0, 3*len(s)/2)
  182. for len(s) > 0 {
  183. r, n := utf8.DecodeRuneInString(s)
  184. if r == utf8.RuneError && n == 1 {
  185. return "", errBadUTF8
  186. }
  187. s = s[n:]
  188. if r != '\\' {
  189. if r < utf8.RuneSelf {
  190. buf = append(buf, byte(r))
  191. } else {
  192. buf = append(buf, string(r)...)
  193. }
  194. continue
  195. }
  196. ch, tail, err := unescape(s)
  197. if err != nil {
  198. return "", err
  199. }
  200. buf = append(buf, ch...)
  201. s = tail
  202. }
  203. return string(buf), nil
  204. }
  205. func unescape(s string) (ch string, tail string, err error) {
  206. r, n := utf8.DecodeRuneInString(s)
  207. if r == utf8.RuneError && n == 1 {
  208. return "", "", errBadUTF8
  209. }
  210. s = s[n:]
  211. switch r {
  212. case 'a':
  213. return "\a", s, nil
  214. case 'b':
  215. return "\b", s, nil
  216. case 'f':
  217. return "\f", s, nil
  218. case 'n':
  219. return "\n", s, nil
  220. case 'r':
  221. return "\r", s, nil
  222. case 't':
  223. return "\t", s, nil
  224. case 'v':
  225. return "\v", s, nil
  226. case '?':
  227. return "?", s, nil // trigraph workaround
  228. case '\'', '"', '\\':
  229. return string(r), s, nil
  230. case '0', '1', '2', '3', '4', '5', '6', '7':
  231. if len(s) < 2 {
  232. return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
  233. }
  234. ss := string(r) + s[:2]
  235. s = s[2:]
  236. i, err := strconv.ParseUint(ss, 8, 8)
  237. if err != nil {
  238. return "", "", fmt.Errorf(`\%s contains non-octal digits`, ss)
  239. }
  240. return string([]byte{byte(i)}), s, nil
  241. case 'x', 'X', 'u', 'U':
  242. var n int
  243. switch r {
  244. case 'x', 'X':
  245. n = 2
  246. case 'u':
  247. n = 4
  248. case 'U':
  249. n = 8
  250. }
  251. if len(s) < n {
  252. return "", "", fmt.Errorf(`\%c requires %d following digits`, r, n)
  253. }
  254. ss := s[:n]
  255. s = s[n:]
  256. i, err := strconv.ParseUint(ss, 16, 64)
  257. if err != nil {
  258. return "", "", fmt.Errorf(`\%c%s contains non-hexadecimal digits`, r, ss)
  259. }
  260. if r == 'x' || r == 'X' {
  261. return string([]byte{byte(i)}), s, nil
  262. }
  263. if i > utf8.MaxRune {
  264. return "", "", fmt.Errorf(`\%c%s is not a valid Unicode code point`, r, ss)
  265. }
  266. return string(i), s, nil
  267. }
  268. return "", "", fmt.Errorf(`unknown escape \%c`, r)
  269. }
  270. // Back off the parser by one token. Can only be done between calls to next().
  271. // It makes the next advance() a no-op.
  272. func (p *textParser) back() { p.backed = true }
  273. // Advances the parser and returns the new current token.
  274. func (p *textParser) next() *token {
  275. if p.backed || p.done {
  276. p.backed = false
  277. return &p.cur
  278. }
  279. p.advance()
  280. if p.done {
  281. p.cur.value = ""
  282. } else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) {
  283. // Look for multiple quoted strings separated by whitespace,
  284. // and concatenate them.
  285. cat := p.cur
  286. for {
  287. p.skipWhitespace()
  288. if p.done || !isQuote(p.s[0]) {
  289. break
  290. }
  291. p.advance()
  292. if p.cur.err != nil {
  293. return &p.cur
  294. }
  295. cat.value += " " + p.cur.value
  296. cat.unquoted += p.cur.unquoted
  297. }
  298. p.done = false // parser may have seen EOF, but we want to return cat
  299. p.cur = cat
  300. }
  301. return &p.cur
  302. }
  303. func (p *textParser) consumeToken(s string) error {
  304. tok := p.next()
  305. if tok.err != nil {
  306. return tok.err
  307. }
  308. if tok.value != s {
  309. p.back()
  310. return p.errorf("expected %q, found %q", s, tok.value)
  311. }
  312. return nil
  313. }
  314. // Return a requiredNotSetError indicating which required field was not set.
  315. func (p *textParser) missingRequiredFieldError(sv reflect.Value) *requiredNotSetError {
  316. st := sv.Type()
  317. sprops := GetProperties(st)
  318. for i := 0; i < st.NumField(); i++ {
  319. if !isNil(sv.Field(i)) {
  320. continue
  321. }
  322. props := sprops.Prop[i]
  323. if props.Required {
  324. return &requiredNotSetError{fmt.Sprintf("%v.%v", st, props.OrigName)}
  325. }
  326. }
  327. return &requiredNotSetError{fmt.Sprintf("%v.<unknown field name>", st)} // should not happen
  328. }
  329. // Returns the index in the struct for the named field, as well as the parsed tag properties.
  330. func structFieldByName(sprops *textStructProperties, name string) (int, *Properties, bool) {
  331. i, ok := sprops.decoderOrigNames[name]
  332. if ok {
  333. return i, sprops.Prop[i], true
  334. }
  335. return -1, nil, false
  336. }
  337. // Consume a ':' from the input stream (if the next token is a colon),
  338. // returning an error if a colon is needed but not present.
  339. func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
  340. tok := p.next()
  341. if tok.err != nil {
  342. return tok.err
  343. }
  344. if tok.value != ":" {
  345. // Colon is optional when the field is a group or message.
  346. needColon := true
  347. switch props.Wire {
  348. case "group":
  349. needColon = false
  350. case "bytes":
  351. // A "bytes" field is either a message, a string, or a repeated field;
  352. // those three become *T, *string and []T respectively, so we can check for
  353. // this field being a pointer to a non-string.
  354. if typ.Kind() == reflect.Ptr {
  355. // *T or *string
  356. if typ.Elem().Kind() == reflect.String {
  357. break
  358. }
  359. } else if typ.Kind() == reflect.Slice {
  360. // []T or []*T
  361. if typ.Elem().Kind() != reflect.Ptr {
  362. break
  363. }
  364. } else if typ.Kind() == reflect.String {
  365. // The proto3 exception is for a string field,
  366. // which requires a colon.
  367. break
  368. }
  369. needColon = false
  370. }
  371. if needColon {
  372. return p.errorf("expected ':', found %q", tok.value)
  373. }
  374. p.back()
  375. }
  376. return nil
  377. }
  378. var textPropertiesCache sync.Map // map[reflect.Type]*textStructProperties
  379. type textStructProperties struct {
  380. *StructProperties
  381. reqCount int
  382. decoderOrigNames map[string]int
  383. }
  384. func getTextProperties(t reflect.Type) *textStructProperties {
  385. if p, ok := textPropertiesCache.Load(t); ok {
  386. return p.(*textStructProperties)
  387. }
  388. prop := &textStructProperties{StructProperties: GetProperties(t)}
  389. reqCount := 0
  390. prop.decoderOrigNames = make(map[string]int)
  391. for i, p := range prop.Prop {
  392. if strings.HasPrefix(p.Name, "XXX_") {
  393. // Internal fields should not appear in tags/origNames maps.
  394. // They are handled specially when encoding and decoding.
  395. continue
  396. }
  397. if p.Required {
  398. reqCount++
  399. }
  400. prop.decoderOrigNames[p.OrigName] = i
  401. }
  402. prop.reqCount = reqCount
  403. textPropertiesCache.Store(t, prop)
  404. return prop
  405. }
  406. func (p *textParser) readStruct(sv reflect.Value, terminator string) error {
  407. st := sv.Type()
  408. sprops := getTextProperties(st)
  409. reqCount := sprops.reqCount
  410. var reqFieldErr error
  411. fieldSet := make(map[string]bool)
  412. // A struct is a sequence of "name: value", terminated by one of
  413. // '>' or '}', or the end of the input. A name may also be
  414. // "[extension]" or "[type/url]".
  415. //
  416. // The whole struct can also be an expanded Any message, like:
  417. // [type/url] < ... struct contents ... >
  418. for {
  419. tok := p.next()
  420. if tok.err != nil {
  421. return tok.err
  422. }
  423. if tok.value == terminator {
  424. break
  425. }
  426. if tok.value == "[" {
  427. // Looks like an extension or an Any.
  428. //
  429. // TODO: Check whether we need to handle
  430. // namespace rooted names (e.g. ".something.Foo").
  431. extName, err := p.consumeExtName()
  432. if err != nil {
  433. return err
  434. }
  435. if s := strings.LastIndex(extName, "/"); s >= 0 {
  436. // If it contains a slash, it's an Any type URL.
  437. messageName := extName[s+1:]
  438. mt := MessageType(messageName)
  439. if mt == nil {
  440. return p.errorf("unrecognized message %q in google.protobuf.Any", messageName)
  441. }
  442. tok = p.next()
  443. if tok.err != nil {
  444. return tok.err
  445. }
  446. // consume an optional colon
  447. if tok.value == ":" {
  448. tok = p.next()
  449. if tok.err != nil {
  450. return tok.err
  451. }
  452. }
  453. var terminator string
  454. switch tok.value {
  455. case "<":
  456. terminator = ">"
  457. case "{":
  458. terminator = "}"
  459. default:
  460. return p.errorf("expected '{' or '<', found %q", tok.value)
  461. }
  462. v := reflect.New(mt.Elem())
  463. if pe := p.readStruct(v.Elem(), terminator); pe != nil {
  464. return pe
  465. }
  466. b, err := Marshal(v.Interface().(Message))
  467. if err != nil {
  468. return p.errorf("failed to marshal message of type %q: %v", messageName, err)
  469. }
  470. if fieldSet["type_url"] {
  471. return p.errorf(anyRepeatedlyUnpacked, "type_url")
  472. }
  473. if fieldSet["value"] {
  474. return p.errorf(anyRepeatedlyUnpacked, "value")
  475. }
  476. sv.FieldByName("TypeUrl").SetString(extName)
  477. sv.FieldByName("Value").SetBytes(b)
  478. fieldSet["type_url"] = true
  479. fieldSet["value"] = true
  480. continue
  481. }
  482. var desc *ExtensionDesc
  483. // This could be faster, but it's functional.
  484. // TODO: Do something smarter than a linear scan.
  485. for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) {
  486. if d.Name == extName {
  487. desc = d
  488. break
  489. }
  490. }
  491. if desc == nil {
  492. return p.errorf("unrecognized extension %q", extName)
  493. }
  494. props := &Properties{}
  495. props.Parse(desc.Tag)
  496. typ := reflect.TypeOf(desc.ExtensionType)
  497. if err := p.checkForColon(props, typ); err != nil {
  498. return err
  499. }
  500. rep := isRepeatedExtension(desc)
  501. // Read the extension structure, and set it in
  502. // the value we're constructing.
  503. var ext reflect.Value
  504. if !rep {
  505. ext = reflect.New(typ).Elem()
  506. } else {
  507. ext = reflect.New(typ.Elem()).Elem()
  508. }
  509. if err := p.readAny(ext, props); err != nil {
  510. if _, ok := err.(*requiredNotSetError); !ok {
  511. return err
  512. }
  513. reqFieldErr = err
  514. }
  515. ep := sv.Addr().Interface().(Message)
  516. if !rep {
  517. SetExtension(ep, desc, ext.Interface())
  518. } else {
  519. old, err := GetExtension(ep, desc)
  520. var sl reflect.Value
  521. if err == nil {
  522. sl = reflect.ValueOf(old) // existing slice
  523. } else {
  524. sl = reflect.MakeSlice(typ, 0, 1)
  525. }
  526. sl = reflect.Append(sl, ext)
  527. SetExtension(ep, desc, sl.Interface())
  528. }
  529. if err := p.consumeOptionalSeparator(); err != nil {
  530. return err
  531. }
  532. continue
  533. }
  534. // This is a normal, non-extension field.
  535. name := tok.value
  536. var dst reflect.Value
  537. fi, props, ok := structFieldByName(sprops, name)
  538. if ok {
  539. dst = sv.Field(fi)
  540. } else if oop, ok := sprops.OneofTypes[name]; ok {
  541. // It is a oneof.
  542. props = oop.Prop
  543. nv := reflect.New(oop.Type.Elem())
  544. dst = nv.Elem().Field(0)
  545. field := sv.Field(oop.Field)
  546. if !field.IsNil() {
  547. return p.errorf("field '%s' would overwrite already parsed oneof '%s'", name, sv.Type().Field(oop.Field).Name)
  548. }
  549. field.Set(nv)
  550. }
  551. if !dst.IsValid() {
  552. return p.errorf("unknown field name %q in %v", name, st)
  553. }
  554. if dst.Kind() == reflect.Map {
  555. // Consume any colon.
  556. if err := p.checkForColon(props, dst.Type()); err != nil {
  557. return err
  558. }
  559. // Construct the map if it doesn't already exist.
  560. if dst.IsNil() {
  561. dst.Set(reflect.MakeMap(dst.Type()))
  562. }
  563. key := reflect.New(dst.Type().Key()).Elem()
  564. val := reflect.New(dst.Type().Elem()).Elem()
  565. // The map entry should be this sequence of tokens:
  566. // < key : KEY value : VALUE >
  567. // However, implementations may omit key or value, and technically
  568. // we should support them in any order. See b/28924776 for a time
  569. // this went wrong.
  570. tok := p.next()
  571. var terminator string
  572. switch tok.value {
  573. case "<":
  574. terminator = ">"
  575. case "{":
  576. terminator = "}"
  577. default:
  578. return p.errorf("expected '{' or '<', found %q", tok.value)
  579. }
  580. for {
  581. tok := p.next()
  582. if tok.err != nil {
  583. return tok.err
  584. }
  585. if tok.value == terminator {
  586. break
  587. }
  588. switch tok.value {
  589. case "key":
  590. if err := p.consumeToken(":"); err != nil {
  591. return err
  592. }
  593. if err := p.readAny(key, props.MapKeyProp); err != nil {
  594. return err
  595. }
  596. if err := p.consumeOptionalSeparator(); err != nil {
  597. return err
  598. }
  599. case "value":
  600. if err := p.checkForColon(props.MapValProp, dst.Type().Elem()); err != nil {
  601. return err
  602. }
  603. if err := p.readAny(val, props.MapValProp); err != nil {
  604. return err
  605. }
  606. if err := p.consumeOptionalSeparator(); err != nil {
  607. return err
  608. }
  609. default:
  610. p.back()
  611. return p.errorf(`expected "key", "value", or %q, found %q`, terminator, tok.value)
  612. }
  613. }
  614. dst.SetMapIndex(key, val)
  615. continue
  616. }
  617. // Check that it's not already set if it's not a repeated field.
  618. if !props.Repeated && fieldSet[name] {
  619. return p.errorf("non-repeated field %q was repeated", name)
  620. }
  621. if err := p.checkForColon(props, dst.Type()); err != nil {
  622. return err
  623. }
  624. // Parse into the field.
  625. fieldSet[name] = true
  626. if err := p.readAny(dst, props); err != nil {
  627. if _, ok := err.(*requiredNotSetError); !ok {
  628. return err
  629. }
  630. reqFieldErr = err
  631. }
  632. if props.Required {
  633. reqCount--
  634. }
  635. if err := p.consumeOptionalSeparator(); err != nil {
  636. return err
  637. }
  638. }
  639. if reqCount > 0 {
  640. return p.missingRequiredFieldError(sv)
  641. }
  642. return reqFieldErr
  643. }
  644. // consumeExtName consumes extension name or expanded Any type URL and the
  645. // following ']'. It returns the name or URL consumed.
  646. func (p *textParser) consumeExtName() (string, error) {
  647. tok := p.next()
  648. if tok.err != nil {
  649. return "", tok.err
  650. }
  651. // If extension name or type url is quoted, it's a single token.
  652. if len(tok.value) > 2 && isQuote(tok.value[0]) && tok.value[len(tok.value)-1] == tok.value[0] {
  653. name, err := unquoteC(tok.value[1:len(tok.value)-1], rune(tok.value[0]))
  654. if err != nil {
  655. return "", err
  656. }
  657. return name, p.consumeToken("]")
  658. }
  659. // Consume everything up to "]"
  660. var parts []string
  661. for tok.value != "]" {
  662. parts = append(parts, tok.value)
  663. tok = p.next()
  664. if tok.err != nil {
  665. return "", p.errorf("unrecognized type_url or extension name: %s", tok.err)
  666. }
  667. if p.done && tok.value != "]" {
  668. return "", p.errorf("unclosed type_url or extension name")
  669. }
  670. }
  671. return strings.Join(parts, ""), nil
  672. }
  673. // consumeOptionalSeparator consumes an optional semicolon or comma.
  674. // It is used in readStruct to provide backward compatibility.
  675. func (p *textParser) consumeOptionalSeparator() error {
  676. tok := p.next()
  677. if tok.err != nil {
  678. return tok.err
  679. }
  680. if tok.value != ";" && tok.value != "," {
  681. p.back()
  682. }
  683. return nil
  684. }
  685. func (p *textParser) readAny(v reflect.Value, props *Properties) error {
  686. tok := p.next()
  687. if tok.err != nil {
  688. return tok.err
  689. }
  690. if tok.value == "" {
  691. return p.errorf("unexpected EOF")
  692. }
  693. switch fv := v; fv.Kind() {
  694. case reflect.Slice:
  695. at := v.Type()
  696. if at.Elem().Kind() == reflect.Uint8 {
  697. // Special case for []byte
  698. if tok.value[0] != '"' && tok.value[0] != '\'' {
  699. // Deliberately written out here, as the error after
  700. // this switch statement would write "invalid []byte: ...",
  701. // which is not as user-friendly.
  702. return p.errorf("invalid string: %v", tok.value)
  703. }
  704. bytes := []byte(tok.unquoted)
  705. fv.Set(reflect.ValueOf(bytes))
  706. return nil
  707. }
  708. // Repeated field.
  709. if tok.value == "[" {
  710. // Repeated field with list notation, like [1,2,3].
  711. for {
  712. fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
  713. err := p.readAny(fv.Index(fv.Len()-1), props)
  714. if err != nil {
  715. return err
  716. }
  717. tok := p.next()
  718. if tok.err != nil {
  719. return tok.err
  720. }
  721. if tok.value == "]" {
  722. break
  723. }
  724. if tok.value != "," {
  725. return p.errorf("Expected ']' or ',' found %q", tok.value)
  726. }
  727. }
  728. return nil
  729. }
  730. // One value of the repeated field.
  731. p.back()
  732. fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
  733. return p.readAny(fv.Index(fv.Len()-1), props)
  734. case reflect.Bool:
  735. // true/1/t/True or false/f/0/False.
  736. switch tok.value {
  737. case "true", "1", "t", "True":
  738. fv.SetBool(true)
  739. return nil
  740. case "false", "0", "f", "False":
  741. fv.SetBool(false)
  742. return nil
  743. }
  744. case reflect.Float32, reflect.Float64:
  745. v := tok.value
  746. // Ignore 'f' for compatibility with output generated by C++, but don't
  747. // remove 'f' when the value is "-inf" or "inf".
  748. if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" {
  749. v = v[:len(v)-1]
  750. }
  751. if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil {
  752. fv.SetFloat(f)
  753. return nil
  754. }
  755. case reflect.Int32:
  756. if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
  757. fv.SetInt(x)
  758. return nil
  759. }
  760. if len(props.Enum) == 0 {
  761. break
  762. }
  763. m := EnumValueMap(props.Enum)
  764. if m == nil {
  765. break
  766. }
  767. x, ok := m[tok.value]
  768. if !ok {
  769. break
  770. }
  771. fv.SetInt(int64(x))
  772. return nil
  773. case reflect.Int64:
  774. if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
  775. fv.SetInt(x)
  776. return nil
  777. }
  778. case reflect.Ptr:
  779. // A basic field (indirected through pointer), or a repeated message/group
  780. p.back()
  781. fv.Set(reflect.New(fv.Type().Elem()))
  782. return p.readAny(fv.Elem(), props)
  783. case reflect.String:
  784. if tok.value[0] == '"' || tok.value[0] == '\'' {
  785. fv.SetString(tok.unquoted)
  786. return nil
  787. }
  788. case reflect.Struct:
  789. var terminator string
  790. switch tok.value {
  791. case "{":
  792. terminator = "}"
  793. case "<":
  794. terminator = ">"
  795. default:
  796. return p.errorf("expected '{' or '<', found %q", tok.value)
  797. }
  798. // TODO: Handle nested messages which implement encoding.TextUnmarshaler.
  799. return p.readStruct(fv, terminator)
  800. case reflect.Uint32:
  801. if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
  802. fv.SetUint(uint64(x))
  803. return nil
  804. }
  805. case reflect.Uint64:
  806. if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
  807. fv.SetUint(x)
  808. return nil
  809. }
  810. }
  811. return p.errorf("invalid %v: %v", v.Type(), tok.value)
  812. }
  813. // UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb
  814. // before starting to unmarshal, so any existing data in pb is always removed.
  815. // If a required field is not set and no other error occurs,
  816. // UnmarshalText returns *requiredNotSetError.
  817. func UnmarshalText(s string, pb Message) error {
  818. if unmarshalTextAlt != nil {
  819. return unmarshalTextAlt(s, pb) // populated by hooks_enabled.go
  820. }
  821. if um, ok := pb.(encoding.TextUnmarshaler); ok {
  822. return um.UnmarshalText([]byte(s))
  823. }
  824. pb.Reset()
  825. v := reflect.ValueOf(pb)
  826. return newTextParser(s).readStruct(v.Elem(), "")
  827. }