text_parser.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. // Go support for Protocol Buffers - Google's data interchange format
  2. //
  3. // Copyright 2010 Google Inc. All rights reserved.
  4. // http://code.google.com/p/goprotobuf/
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. package proto
  32. // Functions for parsing the Text protocol buffer format.
  33. // TODO: message sets, extensions.
  34. import (
  35. "fmt"
  36. "os"
  37. "reflect"
  38. "strconv"
  39. )
  40. // ParseError satisfies the os.Error interface.
  41. type ParseError struct {
  42. Message string
  43. Line int // 1-based line number
  44. Offset int // 0-based byte offset from start of input
  45. }
  46. func (p *ParseError) String() string {
  47. if p.Line == 1 {
  48. // show offset only for first line
  49. return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
  50. }
  51. return fmt.Sprintf("line %d: %v", p.Line, p.Message)
  52. }
  53. type token struct {
  54. value string
  55. err *ParseError
  56. line int // line number
  57. offset int // byte number from start of input, not start of line
  58. unquoted string // the unquoted version of value, if it was a quoted string
  59. }
  60. func (t *token) String() string {
  61. if t.err == nil {
  62. return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
  63. }
  64. return fmt.Sprintf("parse error: %v", t.err)
  65. }
  66. type textParser struct {
  67. s string // remaining input
  68. done bool // whether the parsing is finished (success or error)
  69. backed bool // whether back() was called
  70. offset, line int
  71. cur token
  72. }
  73. func newTextParser(s string) *textParser {
  74. p := new(textParser)
  75. p.s = s
  76. p.line = 1
  77. p.cur.line = 1
  78. return p
  79. }
  80. func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
  81. pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
  82. p.cur.err = pe
  83. p.done = true
  84. return pe
  85. }
  86. // Numbers and identifiers are matched by [-+._A-Za-z0-9]
  87. func isIdentOrNumberChar(c byte) bool {
  88. switch {
  89. case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
  90. return true
  91. case '0' <= c && c <= '9':
  92. return true
  93. }
  94. switch c {
  95. case '-', '+', '.', '_':
  96. return true
  97. }
  98. return false
  99. }
  100. func isWhitespace(c byte) bool {
  101. switch c {
  102. case ' ', '\t', '\n', '\r':
  103. return true
  104. }
  105. return false
  106. }
  107. func (p *textParser) skipWhitespace() {
  108. i := 0
  109. for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
  110. if p.s[i] == '#' {
  111. // comment; skip to end of line or input
  112. for i < len(p.s) && p.s[i] != '\n' {
  113. i++
  114. }
  115. if i == len(p.s) {
  116. break
  117. }
  118. }
  119. if p.s[i] == '\n' {
  120. p.line++
  121. }
  122. i++
  123. }
  124. p.offset += i
  125. p.s = p.s[i:len(p.s)]
  126. if len(p.s) == 0 {
  127. p.done = true
  128. }
  129. }
  130. func (p *textParser) advance() {
  131. // Skip whitespace
  132. p.skipWhitespace()
  133. if p.done {
  134. return
  135. }
  136. // Start of non-whitespace
  137. p.cur.err = nil
  138. p.cur.offset, p.cur.line = p.offset, p.line
  139. p.cur.unquoted = ""
  140. switch p.s[0] {
  141. case '<', '>', '{', '}', ':':
  142. // Single symbol
  143. p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
  144. case '"':
  145. // Quoted string
  146. i := 1
  147. for i < len(p.s) && p.s[i] != '"' && p.s[i] != '\n' {
  148. if p.s[i] == '\\' && i+1 < len(p.s) {
  149. // skip escaped char
  150. i++
  151. }
  152. i++
  153. }
  154. if i >= len(p.s) || p.s[i] != '"' {
  155. p.errorf("unmatched quote")
  156. return
  157. }
  158. // TODO: Should be UnquoteC.
  159. unq, err := strconv.Unquote(p.s[0 : i+1])
  160. if err != nil {
  161. p.errorf("invalid quoted string %v", p.s[0:i+1])
  162. return
  163. }
  164. p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
  165. p.cur.unquoted = unq
  166. default:
  167. i := 0
  168. for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
  169. i++
  170. }
  171. if i == 0 {
  172. p.errorf("unexpected byte %#x", p.s[0])
  173. return
  174. }
  175. p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
  176. }
  177. p.offset += len(p.cur.value)
  178. }
  179. // Back off the parser by one token. Can only be done between calls to next().
  180. // It makes the next advance() a no-op.
  181. func (p *textParser) back() { p.backed = true }
  182. // Advances the parser and returns the new current token.
  183. func (p *textParser) next() *token {
  184. if p.backed || p.done {
  185. p.backed = false
  186. return &p.cur
  187. }
  188. p.advance()
  189. if p.done {
  190. p.cur.value = ""
  191. } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
  192. // Look for multiple quoted strings separated by whitespace,
  193. // and concatenate them.
  194. cat := p.cur
  195. for {
  196. p.skipWhitespace()
  197. if p.done || p.s[0] != '"' {
  198. break
  199. }
  200. p.advance()
  201. if p.cur.err != nil {
  202. return &p.cur
  203. }
  204. cat.value += " " + p.cur.value
  205. cat.unquoted += p.cur.unquoted
  206. }
  207. p.done = false // parser may have seen EOF, but we want to return cat
  208. p.cur = cat
  209. }
  210. return &p.cur
  211. }
  212. // Return an error indicating which required field was not set.
  213. func (p *textParser) missingRequiredFieldError(sv reflect.Value) *ParseError {
  214. st := sv.Type()
  215. sprops := GetProperties(st)
  216. for i := 0; i < st.NumField(); i++ {
  217. if !isNil(sv.Field(i)) {
  218. continue
  219. }
  220. props := sprops.Prop[i]
  221. if props.Required {
  222. return p.errorf("message %v missing required field %q", st, props.OrigName)
  223. }
  224. }
  225. return p.errorf("message %v missing required field", st) // should not happen
  226. }
  227. // Returns the index in the struct for the named field, as well as the parsed tag properties.
  228. func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) {
  229. sprops := GetProperties(st)
  230. i, ok := sprops.origNames[name]
  231. if ok {
  232. return i, sprops.Prop[i], true
  233. }
  234. return -1, nil, false
  235. }
  236. func (p *textParser) readStruct(sv reflect.Value, terminator string) *ParseError {
  237. st := sv.Type()
  238. reqCount := GetProperties(st).reqCount
  239. // A struct is a sequence of "name: value", terminated by one of
  240. // '>' or '}', or the end of the input.
  241. for {
  242. tok := p.next()
  243. if tok.err != nil {
  244. return tok.err
  245. }
  246. if tok.value == terminator {
  247. break
  248. }
  249. fi, props, ok := structFieldByName(st, tok.value)
  250. if !ok {
  251. return p.errorf("unknown field name %q in %v", tok.value, st)
  252. }
  253. // Check that it's not already set if it's not a repeated field.
  254. if !props.Repeated && !isNil(sv.Field(fi)) {
  255. return p.errorf("non-repeated field %q was repeated", tok.value)
  256. }
  257. tok = p.next()
  258. if tok.err != nil {
  259. return tok.err
  260. }
  261. if tok.value != ":" {
  262. // Colon is optional when the field is a group or message.
  263. needColon := true
  264. switch props.Wire {
  265. case "group":
  266. needColon = false
  267. case "bytes":
  268. // A "bytes" field is either a message, a string, or a repeated field;
  269. // those three become *T, *string and []T respectively, so we can check for
  270. // this field being a pointer to a non-string.
  271. typ := st.Field(fi).Type
  272. if typ.Kind() == reflect.Ptr {
  273. // *T or *string
  274. if typ.Elem().Kind() == reflect.String {
  275. break
  276. }
  277. } else if typ.Kind() == reflect.Slice {
  278. // []T or []*T
  279. if typ.Elem().Kind() != reflect.Ptr {
  280. break
  281. }
  282. }
  283. needColon = false
  284. }
  285. if needColon {
  286. return p.errorf("expected ':', found %q", tok.value)
  287. }
  288. p.back()
  289. }
  290. // Parse into the field.
  291. if err := p.readAny(sv.Field(fi), props); err != nil {
  292. return err
  293. }
  294. if props.Required {
  295. reqCount--
  296. }
  297. }
  298. if reqCount > 0 {
  299. return p.missingRequiredFieldError(sv)
  300. }
  301. return nil
  302. }
  303. const (
  304. minInt32 = -1 << 31
  305. maxInt32 = 1<<31 - 1
  306. maxUint32 = 1<<32 - 1
  307. )
  308. func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError {
  309. tok := p.next()
  310. if tok.err != nil {
  311. return tok.err
  312. }
  313. if tok.value == "" {
  314. return p.errorf("unexpected EOF")
  315. }
  316. switch fv := v; fv.Kind() {
  317. case reflect.Slice:
  318. at := v.Type()
  319. if at.Elem().Kind() == reflect.Uint8 {
  320. // Special case for []byte
  321. if tok.value[0] != '"' {
  322. // Deliberately written out here, as the error after
  323. // this switch statement would write "invalid []byte: ...",
  324. // which is not as user-friendly.
  325. return p.errorf("invalid string: %v", tok.value)
  326. }
  327. bytes := []byte(tok.unquoted)
  328. fv.Set(reflect.ValueOf(bytes))
  329. return nil
  330. }
  331. // Repeated field. May already exist.
  332. flen := fv.Len()
  333. if flen == fv.Cap() {
  334. nav := reflect.MakeSlice(at, flen, 2*flen+1)
  335. reflect.Copy(nav, fv)
  336. fv.Set(nav)
  337. }
  338. fv.SetLen(flen + 1)
  339. // Read one.
  340. p.back()
  341. return p.readAny(fv.Index(flen), nil) // TODO: pass properties?
  342. case reflect.Bool:
  343. // Either "true", "false", 1 or 0.
  344. switch tok.value {
  345. case "true", "1":
  346. fv.SetBool(true)
  347. return nil
  348. case "false", "0":
  349. fv.SetBool(false)
  350. return nil
  351. }
  352. case reflect.Float32, reflect.Float64:
  353. if f, err := strconv.AtofN(tok.value, fv.Type().Bits()); err == nil {
  354. fv.SetFloat(f)
  355. return nil
  356. }
  357. case reflect.Int32:
  358. if x, err := strconv.Atoi64(tok.value); err == nil && minInt32 <= x && x <= maxInt32 {
  359. fv.SetInt(x)
  360. return nil
  361. }
  362. if len(props.Enum) == 0 {
  363. break
  364. }
  365. m, ok := enumValueMaps[props.Enum]
  366. if !ok {
  367. break
  368. }
  369. x, ok := m[tok.value]
  370. if !ok {
  371. break
  372. }
  373. fv.SetInt(int64(x))
  374. return nil
  375. case reflect.Int64:
  376. if x, err := strconv.Atoi64(tok.value); err == nil {
  377. fv.SetInt(x)
  378. return nil
  379. }
  380. case reflect.Ptr:
  381. // A basic field (indirected through pointer), or a repeated message/group
  382. p.back()
  383. fv.Set(reflect.New(fv.Type().Elem()))
  384. return p.readAny(fv.Elem(), props)
  385. case reflect.String:
  386. if tok.value[0] == '"' {
  387. fv.SetString(tok.unquoted)
  388. return nil
  389. }
  390. case reflect.Struct:
  391. var terminator string
  392. switch tok.value {
  393. case "{":
  394. terminator = "}"
  395. case "<":
  396. terminator = ">"
  397. default:
  398. return p.errorf("expected '{' or '<', found %q", tok.value)
  399. }
  400. return p.readStruct(fv, terminator)
  401. case reflect.Uint32:
  402. if x, err := strconv.Atoui64(tok.value); err == nil && x <= maxUint32 {
  403. fv.SetUint(uint64(x))
  404. return nil
  405. }
  406. case reflect.Uint64:
  407. if x, err := strconv.Atoui64(tok.value); err == nil {
  408. fv.SetUint(x)
  409. return nil
  410. }
  411. }
  412. return p.errorf("invalid %v: %v", v.Type(), tok.value)
  413. }
  414. var notPtrStruct os.Error = &ParseError{"destination is not a pointer to a struct", 0, 0}
  415. // UnmarshalText reads a protobuffer in Text format.
  416. func UnmarshalText(s string, pb interface{}) os.Error {
  417. v := reflect.ValueOf(pb)
  418. if v.Kind() != reflect.Ptr || v.Elem().Kind() != reflect.Struct {
  419. return notPtrStruct
  420. }
  421. if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
  422. return pe
  423. }
  424. return nil
  425. }