text_parser.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. // Go support for Protocol Buffers - Google's data interchange format
  2. //
  3. // Copyright 2010 Google Inc. All rights reserved.
  4. // http://code.google.com/p/goprotobuf/
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. package proto
  32. // Functions for parsing the Text protocol buffer format.
  33. // TODO:
  34. // - groups.
  35. import (
  36. "fmt"
  37. "os"
  38. "reflect"
  39. "strconv"
  40. )
  41. // ParseError satisfies the os.Error interface.
  42. type ParseError struct {
  43. Message string
  44. Line int // 1-based line number
  45. Offset int // 0-based byte offset from start of input
  46. }
  47. func (p *ParseError) String() string {
  48. if p.Line == 1 {
  49. // show offset only for first line
  50. return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
  51. }
  52. return fmt.Sprintf("line %d: %v", p.Line, p.Message)
  53. }
  54. type token struct {
  55. value string
  56. err *ParseError
  57. line int // line number
  58. offset int // byte number from start of input, not start of line
  59. unquoted string // the unquoted version of value, if it was a quoted string
  60. }
  61. func (t *token) String() string {
  62. if t.err == nil {
  63. return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
  64. }
  65. return fmt.Sprintf("parse error: %v", t.err)
  66. }
  67. type textParser struct {
  68. s string // remaining input
  69. done bool // whether the parsing is finished (success or error)
  70. backed bool // whether back() was called
  71. offset, line int
  72. cur token
  73. }
  74. func newTextParser(s string) *textParser {
  75. p := new(textParser)
  76. p.s = s
  77. p.line = 1
  78. p.cur.line = 1
  79. return p
  80. }
  81. func (p *textParser) error(format string, a ...interface{}) *ParseError {
  82. pe := &ParseError{fmt.Sprintf(format, a), p.cur.line, p.cur.offset}
  83. p.cur.err = pe
  84. p.done = true
  85. return pe
  86. }
  87. // Numbers and identifiers are matched by [-+._A-Za-z0-9]
  88. func isIdentOrNumberChar(c byte) bool {
  89. switch {
  90. case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
  91. return true
  92. case '0' <= c && c <= '9':
  93. return true
  94. }
  95. switch c {
  96. case '-', '+', '.', '_':
  97. return true
  98. }
  99. return false
  100. }
  101. func isWhitespace(c byte) bool {
  102. switch c {
  103. case ' ', '\t', '\n', '\r':
  104. return true
  105. }
  106. return false
  107. }
  108. func (p *textParser) skipWhitespace() {
  109. i := 0
  110. for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
  111. if p.s[i] == '#' {
  112. // comment; skip to end of line or input
  113. for i < len(p.s) && p.s[i] != '\n' {
  114. i++
  115. }
  116. if i == len(p.s) {
  117. break
  118. }
  119. }
  120. if p.s[i] == '\n' {
  121. p.line++
  122. }
  123. i++
  124. }
  125. p.offset += i
  126. p.s = p.s[i:len(p.s)]
  127. if len(p.s) == 0 {
  128. p.done = true
  129. }
  130. }
  131. func (p *textParser) advance() {
  132. // Skip whitespace
  133. p.skipWhitespace()
  134. if p.done {
  135. return
  136. }
  137. // Start of non-whitespace
  138. p.cur.err = nil
  139. p.cur.offset, p.cur.line = p.offset, p.line
  140. p.cur.unquoted = ""
  141. switch p.s[0] {
  142. case '<', '>', '{', '}', ':':
  143. // Single symbol
  144. p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
  145. case '"':
  146. // Quoted string
  147. i := 1
  148. for i < len(p.s) && p.s[i] != '"' && p.s[i] != '\n' {
  149. if p.s[i] == '\\' && i+1 < len(p.s) {
  150. // skip escaped char
  151. i++
  152. }
  153. i++
  154. }
  155. if i >= len(p.s) || p.s[i] != '"' {
  156. p.error("unmatched quote")
  157. return
  158. }
  159. // TODO: Should be UnquoteC.
  160. unq, err := strconv.Unquote(p.s[0 : i+1])
  161. if err != nil {
  162. p.error("invalid quoted string %v", p.s[0:i+1])
  163. return
  164. }
  165. p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
  166. p.cur.unquoted = unq
  167. default:
  168. i := 0
  169. for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
  170. i++
  171. }
  172. if i == 0 {
  173. p.error("unexpected byte %#x", p.s[0])
  174. return
  175. }
  176. p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
  177. }
  178. p.offset += len(p.cur.value)
  179. }
  180. // Back off the parser by one token. Can only be done between calls to next().
  181. // It makes the next advance() a no-op.
  182. func (p *textParser) back() { p.backed = true }
  183. // Advances the parser and returns the new current token.
  184. func (p *textParser) next() *token {
  185. if p.backed || p.done {
  186. p.backed = false
  187. return &p.cur
  188. }
  189. p.advance()
  190. if p.done {
  191. p.cur.value = ""
  192. } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
  193. // Look for multiple quoted strings separated by whitespace,
  194. // and concatenate them.
  195. cat := p.cur
  196. for {
  197. p.skipWhitespace()
  198. if p.done || p.s[0] != '"' {
  199. break
  200. }
  201. p.advance()
  202. if p.cur.err != nil {
  203. return &p.cur
  204. }
  205. cat.value += " " + p.cur.value
  206. cat.unquoted += p.cur.unquoted
  207. }
  208. p.done = false // parser may have seen EOF, but we want to return cat
  209. p.cur = cat
  210. }
  211. return &p.cur
  212. }
  213. type nillable interface {
  214. IsNil() bool
  215. }
  216. // Return an error indicating which required field was not set.
  217. func (p *textParser) missingRequiredFieldError(sv *reflect.StructValue) *ParseError {
  218. st := sv.Type().(*reflect.StructType)
  219. sprops := GetProperties(st)
  220. for i := 0; i < st.NumField(); i++ {
  221. // All protocol buffer fields are nillable, but let's be careful.
  222. nfv, ok := sv.Field(i).(nillable)
  223. if !ok || !nfv.IsNil() {
  224. continue
  225. }
  226. props := sprops.Prop[i]
  227. if props.Required {
  228. return p.error("message %v missing required field %q", st, props.OrigName)
  229. }
  230. }
  231. return p.error("message %v missing required field", st) // should not happen
  232. }
  233. // Returns the index in the struct for the named field, as well as the parsed tag properties.
  234. func structFieldByName(st *reflect.StructType, name string) (int, *Properties, bool) {
  235. sprops := GetProperties(st)
  236. for i := 0; i < st.NumField(); i++ {
  237. props := sprops.Prop[i]
  238. if props.OrigName == name {
  239. return i, props, true
  240. }
  241. }
  242. return -1, nil, false
  243. }
  244. func (p *textParser) readStruct(sv *reflect.StructValue, terminator string) *ParseError {
  245. st := sv.Type().(*reflect.StructType)
  246. reqCount := GetProperties(st).reqCount
  247. // A struct is a sequence of "name: value", terminated by one of
  248. // '>' or '}', or the end of the input.
  249. for {
  250. tok := p.next()
  251. if tok.err != nil {
  252. return tok.err
  253. }
  254. if tok.value == terminator {
  255. break
  256. }
  257. fi, props, ok := structFieldByName(st, tok.value)
  258. if !ok {
  259. return p.error("unknown field name %q in %v", tok.value, st)
  260. }
  261. // Check that it's not already set if it's not a repeated field.
  262. if !props.Repeated {
  263. if nfv, ok := sv.Field(fi).(nillable); ok && !nfv.IsNil() {
  264. return p.error("non-repeated field %q was repeated", tok.value)
  265. }
  266. }
  267. tok = p.next()
  268. if tok.err != nil {
  269. return tok.err
  270. }
  271. if tok.value != ":" {
  272. // Colon is optional when the field is a group or message.
  273. needColon := true
  274. switch props.Wire {
  275. case "group":
  276. needColon = false
  277. case "bytes":
  278. // A "bytes" field is either a message, a string, or a repeated field;
  279. // those three become *T, *string and []T respectively, so we can check for
  280. // this field being a pointer to a non-string.
  281. typ := st.Field(fi).Type
  282. if pt, ok := typ.(*reflect.PtrType); ok {
  283. // *T or *string
  284. if _, ok := pt.Elem().(*reflect.StringType); ok {
  285. break
  286. }
  287. } else if st, ok := typ.(*reflect.SliceType); ok {
  288. // []T or []*T
  289. if _, ok := st.Elem().(*reflect.PtrType); !ok {
  290. break
  291. }
  292. }
  293. needColon = false
  294. }
  295. if needColon {
  296. return p.error("expected ':', found %q", tok.value)
  297. }
  298. p.back()
  299. }
  300. // Parse into the field.
  301. if err := p.readAny(sv.Field(fi), props); err != nil {
  302. return err
  303. }
  304. if props.Required {
  305. reqCount--
  306. }
  307. }
  308. if reqCount > 0 {
  309. return p.missingRequiredFieldError(sv)
  310. }
  311. return nil
  312. }
  313. const (
  314. minInt32 = -1 << 31
  315. maxInt32 = 1<<31 - 1
  316. maxUint32 = 1<<32 - 1
  317. )
  318. func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError {
  319. tok := p.next()
  320. if tok.err != nil {
  321. return tok.err
  322. }
  323. if tok.value == "" {
  324. return p.error("unexpected EOF")
  325. }
  326. switch fv := v.(type) {
  327. case *reflect.SliceValue:
  328. at := v.Type().(*reflect.SliceType)
  329. if at.Elem().Kind() == reflect.Uint8 {
  330. // Special case for []byte
  331. if tok.value[0] != '"' {
  332. // Deliberately written out here, as the error after
  333. // this switch statement would write "invalid []byte: ...",
  334. // which is not as user-friendly.
  335. return p.error("invalid string: %v", tok.value)
  336. }
  337. bytes := []byte(tok.unquoted)
  338. fv.Set(reflect.NewValue(bytes).(*reflect.SliceValue))
  339. return nil
  340. }
  341. // Repeated field. May already exist.
  342. cnt := fv.Len()
  343. nav := reflect.MakeSlice(at, cnt, cnt+1)
  344. reflect.ArrayCopy(nav, fv)
  345. fv.Set(nav)
  346. fv.SetLen(cnt + 1)
  347. // Read one.
  348. p.back()
  349. return p.readAny(fv.Elem(cnt), nil) // TODO: pass properties?
  350. case *reflect.BoolValue:
  351. // Either "true", "false", 1 or 0.
  352. switch tok.value {
  353. case "true", "1":
  354. fv.Set(true)
  355. return nil
  356. case "false", "0":
  357. fv.Set(false)
  358. return nil
  359. }
  360. case *reflect.FloatValue:
  361. if f, err := strconv.AtofN(tok.value, fv.Type().Bits()); err == nil {
  362. fv.Set(f)
  363. return nil
  364. }
  365. case *reflect.IntValue:
  366. switch fv.Type().Bits() {
  367. case 32:
  368. if x, err := strconv.Atoi64(tok.value); err == nil && minInt32 <= x && x <= maxInt32 {
  369. fv.Set(x)
  370. return nil
  371. }
  372. if len(props.Enum) == 0 {
  373. break
  374. }
  375. m, ok := enumValueMaps[props.Enum]
  376. if !ok {
  377. break
  378. }
  379. x, ok := m[tok.value]
  380. if !ok {
  381. break
  382. }
  383. fv.Set(int64(x))
  384. return nil
  385. case 64:
  386. if x, err := strconv.Atoi64(tok.value); err == nil {
  387. fv.Set(x)
  388. return nil
  389. }
  390. }
  391. case *reflect.PtrValue:
  392. // A basic field (indirected through pointer), or a repeated message/group
  393. p.back()
  394. fv.PointTo(reflect.MakeZero(fv.Type().(*reflect.PtrType).Elem()))
  395. return p.readAny(fv.Elem(), props)
  396. case *reflect.StringValue:
  397. if tok.value[0] == '"' {
  398. fv.Set(tok.unquoted)
  399. return nil
  400. }
  401. case *reflect.StructValue:
  402. var terminator string
  403. switch tok.value {
  404. case "{":
  405. terminator = "}"
  406. case "<":
  407. terminator = ">"
  408. default:
  409. return p.error("expected '{' or '<', found %q", tok.value)
  410. }
  411. return p.readStruct(fv, terminator)
  412. case *reflect.UintValue:
  413. switch fv.Type().Bits() {
  414. case 32:
  415. if x, err := strconv.Atoui64(tok.value); err == nil && x <= maxUint32 {
  416. fv.Set(uint64(x))
  417. return nil
  418. }
  419. case 64:
  420. if x, err := strconv.Atoui64(tok.value); err == nil {
  421. fv.Set(x)
  422. return nil
  423. }
  424. }
  425. }
  426. return p.error("invalid %v: %v", v.Type(), tok.value)
  427. }
  428. var notPtrStruct os.Error = &ParseError{"destination is not a pointer to a struct", 0, 0}
  429. // UnmarshalText reads a protobuffer in Text format.
  430. func UnmarshalText(s string, pb interface{}) os.Error {
  431. pv, ok := reflect.NewValue(pb).(*reflect.PtrValue)
  432. if !ok {
  433. return notPtrStruct
  434. }
  435. sv, ok := pv.Elem().(*reflect.StructValue)
  436. if !ok {
  437. return notPtrStruct
  438. }
  439. if pe := newTextParser(s).readStruct(sv, ""); pe != nil {
  440. return pe
  441. }
  442. return nil
  443. }