codec.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. // Copyright 2017 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package catmsg
  5. import (
  6. "errors"
  7. "fmt"
  8. "golang.org/x/text/language"
  9. )
  10. // A Renderer renders a Message.
  11. type Renderer interface {
  12. // Render renders the given string. The given string may be interpreted as a
  13. // format string, such as the one used by the fmt package or a template.
  14. Render(s string)
  15. // Arg returns the i-th argument passed to format a message. This method
  16. // should return nil if there is no such argument. Messages need access to
  17. // arguments to allow selecting a message based on linguistic features of
  18. // those arguments.
  19. Arg(i int) interface{}
  20. }
  21. // A Dictionary specifies a source of messages, including variables or macros.
  22. type Dictionary interface {
  23. // Lookup returns the message for the given key. It returns false for ok if
  24. // such a message could not be found.
  25. Lookup(key string) (data string, ok bool)
  26. // TODO: consider returning an interface, instead of a string. This will
  27. // allow implementations to do their own message type decoding.
  28. }
  29. // An Encoder serializes a Message to a string.
  30. type Encoder struct {
  31. // The root encoder is used for storing encoded variables.
  32. root *Encoder
  33. // The parent encoder provides the surrounding scopes for resolving variable
  34. // names.
  35. parent *Encoder
  36. tag language.Tag
  37. // buf holds the encoded message so far. After a message completes encoding,
  38. // the contents of buf, prefixed by the encoded length, are flushed to the
  39. // parent buffer.
  40. buf []byte
  41. // vars is the lookup table of variables in the current scope.
  42. vars []keyVal
  43. err error
  44. inBody bool // if false next call must be EncodeMessageType
  45. }
  46. type keyVal struct {
  47. key string
  48. offset int
  49. }
  50. // Language reports the language for which the encoded message will be stored
  51. // in the Catalog.
  52. func (e *Encoder) Language() language.Tag { return e.tag }
  53. func (e *Encoder) setError(err error) {
  54. if e.root.err == nil {
  55. e.root.err = err
  56. }
  57. }
  58. // EncodeUint encodes x.
  59. func (e *Encoder) EncodeUint(x uint64) {
  60. e.checkInBody()
  61. var buf [maxVarintBytes]byte
  62. n := encodeUint(buf[:], x)
  63. e.buf = append(e.buf, buf[:n]...)
  64. }
  65. // EncodeString encodes s.
  66. func (e *Encoder) EncodeString(s string) {
  67. e.checkInBody()
  68. e.EncodeUint(uint64(len(s)))
  69. e.buf = append(e.buf, s...)
  70. }
  71. // EncodeMessageType marks the current message to be of type h.
  72. //
  73. // It must be the first call of a Message's Compile method.
  74. func (e *Encoder) EncodeMessageType(h Handle) {
  75. if e.inBody {
  76. panic("catmsg: EncodeMessageType not the first method called")
  77. }
  78. e.inBody = true
  79. e.EncodeUint(uint64(h))
  80. }
  81. // EncodeMessage serializes the given message inline at the current position.
  82. func (e *Encoder) EncodeMessage(m Message) error {
  83. e = &Encoder{root: e.root, parent: e, tag: e.tag}
  84. err := m.Compile(e)
  85. if _, ok := m.(*Var); !ok {
  86. e.flushTo(e.parent)
  87. }
  88. return err
  89. }
  90. func (e *Encoder) checkInBody() {
  91. if !e.inBody {
  92. panic("catmsg: expected prior call to EncodeMessageType")
  93. }
  94. }
  95. // stripPrefix indicates the number of prefix bytes that must be stripped to
  96. // turn a single-element sequence into a message that is just this single member
  97. // without its size prefix. If the message can be stripped, b[1:n] contains the
  98. // size prefix.
  99. func stripPrefix(b []byte) (n int) {
  100. if len(b) > 0 && Handle(b[0]) == msgFirst {
  101. x, n, _ := decodeUint(b[1:])
  102. if 1+n+int(x) == len(b) {
  103. return 1 + n
  104. }
  105. }
  106. return 0
  107. }
  108. func (e *Encoder) flushTo(dst *Encoder) {
  109. data := e.buf
  110. p := stripPrefix(data)
  111. if p > 0 {
  112. data = data[1:]
  113. } else {
  114. // Prefix the size.
  115. dst.EncodeUint(uint64(len(data)))
  116. }
  117. dst.buf = append(dst.buf, data...)
  118. }
  119. func (e *Encoder) addVar(key string, m Message) error {
  120. for _, v := range e.parent.vars {
  121. if v.key == key {
  122. err := fmt.Errorf("catmsg: duplicate variable %q", key)
  123. e.setError(err)
  124. return err
  125. }
  126. }
  127. scope := e.parent
  128. // If a variable message is Incomplete, and does not evaluate to a message
  129. // during execution, we fall back to the variable name. We encode this by
  130. // appending the variable name if the message reports it's incomplete.
  131. err := m.Compile(e)
  132. if err != ErrIncomplete {
  133. e.setError(err)
  134. }
  135. switch {
  136. case len(e.buf) == 1 && Handle(e.buf[0]) == msgFirst: // empty sequence
  137. e.buf = e.buf[:0]
  138. e.inBody = false
  139. fallthrough
  140. case len(e.buf) == 0:
  141. // Empty message.
  142. if err := String(key).Compile(e); err != nil {
  143. e.setError(err)
  144. }
  145. case err == ErrIncomplete:
  146. if Handle(e.buf[0]) != msgFirst {
  147. seq := &Encoder{root: e.root, parent: e}
  148. seq.EncodeMessageType(msgFirst)
  149. e.flushTo(seq)
  150. e = seq
  151. }
  152. // e contains a sequence; append the fallback string.
  153. e.EncodeMessage(String(key))
  154. }
  155. // Flush result to variable heap.
  156. offset := len(e.root.buf)
  157. e.flushTo(e.root)
  158. e.buf = e.buf[:0]
  159. // Record variable offset in current scope.
  160. scope.vars = append(scope.vars, keyVal{key: key, offset: offset})
  161. return err
  162. }
  163. const (
  164. substituteVar = iota
  165. substituteMacro
  166. substituteError
  167. )
  168. // EncodeSubstitution inserts a resolved reference to a variable or macro.
  169. //
  170. // This call must be matched with a call to ExecuteSubstitution at decoding
  171. // time.
  172. func (e *Encoder) EncodeSubstitution(name string, arguments ...int) {
  173. if arity := len(arguments); arity > 0 {
  174. // TODO: also resolve macros.
  175. e.EncodeUint(substituteMacro)
  176. e.EncodeString(name)
  177. for _, a := range arguments {
  178. e.EncodeUint(uint64(a))
  179. }
  180. return
  181. }
  182. for scope := e; scope != nil; scope = scope.parent {
  183. for _, v := range scope.vars {
  184. if v.key != name {
  185. continue
  186. }
  187. e.EncodeUint(substituteVar) // TODO: support arity > 0
  188. e.EncodeUint(uint64(v.offset))
  189. return
  190. }
  191. }
  192. // TODO: refer to dictionary-wide scoped variables.
  193. e.EncodeUint(substituteError)
  194. e.EncodeString(name)
  195. e.setError(fmt.Errorf("catmsg: unknown var %q", name))
  196. }
  197. // A Decoder deserializes and evaluates messages that are encoded by an encoder.
  198. type Decoder struct {
  199. tag language.Tag
  200. dst Renderer
  201. macros Dictionary
  202. err error
  203. vars string
  204. data string
  205. macroArg int // TODO: allow more than one argument
  206. }
  207. // NewDecoder returns a new Decoder.
  208. //
  209. // Decoders are designed to be reused for multiple invocations of Execute.
  210. // Only one goroutine may call Execute concurrently.
  211. func NewDecoder(tag language.Tag, r Renderer, macros Dictionary) *Decoder {
  212. return &Decoder{
  213. tag: tag,
  214. dst: r,
  215. macros: macros,
  216. }
  217. }
  218. func (d *Decoder) setError(err error) {
  219. if d.err == nil {
  220. d.err = err
  221. }
  222. }
  223. // Language returns the language in which the message is being rendered.
  224. //
  225. // The destination language may be a child language of the language used for
  226. // encoding. For instance, a decoding language of "pt-PT"" is consistent with an
  227. // encoding language of "pt".
  228. func (d *Decoder) Language() language.Tag { return d.tag }
  229. // Done reports whether there are more bytes to process in this message.
  230. func (d *Decoder) Done() bool { return len(d.data) == 0 }
  231. // Render implements Renderer.
  232. func (d *Decoder) Render(s string) { d.dst.Render(s) }
  233. // Arg implements Renderer.
  234. //
  235. // During evaluation of macros, the argument positions may be mapped to
  236. // arguments that differ from the original call.
  237. func (d *Decoder) Arg(i int) interface{} {
  238. if d.macroArg != 0 {
  239. if i != 1 {
  240. panic("catmsg: only macros with single argument supported")
  241. }
  242. i = d.macroArg
  243. }
  244. return d.dst.Arg(i)
  245. }
  246. // DecodeUint decodes a number that was encoded with EncodeUint and advances the
  247. // position.
  248. func (d *Decoder) DecodeUint() uint64 {
  249. x, n, err := decodeUintString(d.data)
  250. d.data = d.data[n:]
  251. if err != nil {
  252. d.setError(err)
  253. }
  254. return x
  255. }
  256. // DecodeString decodes a string that was encoded with EncodeString and advances
  257. // the position.
  258. func (d *Decoder) DecodeString() string {
  259. size := d.DecodeUint()
  260. s := d.data[:size]
  261. d.data = d.data[size:]
  262. return s
  263. }
  264. // SkipMessage skips the message at the current location and advances the
  265. // position.
  266. func (d *Decoder) SkipMessage() {
  267. n := int(d.DecodeUint())
  268. d.data = d.data[n:]
  269. }
  270. // Execute decodes and evaluates msg.
  271. //
  272. // Only one goroutine may call execute.
  273. func (d *Decoder) Execute(msg string) error {
  274. d.err = nil
  275. if !d.execute(msg) {
  276. return ErrNoMatch
  277. }
  278. return d.err
  279. }
  280. func (d *Decoder) execute(msg string) bool {
  281. saved := d.data
  282. d.data = msg
  283. ok := d.executeMessage()
  284. d.data = saved
  285. return ok
  286. }
  287. // executeMessageFromData is like execute, but also decodes a leading message
  288. // size and clips the given string accordingly.
  289. //
  290. // It reports the number of bytes consumed and whether a message was selected.
  291. func (d *Decoder) executeMessageFromData(s string) (n int, ok bool) {
  292. saved := d.data
  293. d.data = s
  294. size := int(d.DecodeUint())
  295. n = len(s) - len(d.data)
  296. // Sanitize the setting. This allows skipping a size argument for
  297. // RawString and method Done.
  298. d.data = d.data[:size]
  299. ok = d.executeMessage()
  300. n += size - len(d.data)
  301. d.data = saved
  302. return n, ok
  303. }
  304. var errUnknownHandler = errors.New("catmsg: string contains unsupported handler")
  305. // executeMessage reads the handle id, initializes the decoder and executes the
  306. // message. It is assumed that all of d.data[d.p:] is the single message.
  307. func (d *Decoder) executeMessage() bool {
  308. if d.Done() {
  309. // We interpret no data as a valid empty message.
  310. return true
  311. }
  312. handle := d.DecodeUint()
  313. var fn Handler
  314. mutex.Lock()
  315. if int(handle) < len(handlers) {
  316. fn = handlers[handle]
  317. }
  318. mutex.Unlock()
  319. if fn == nil {
  320. d.setError(errUnknownHandler)
  321. d.execute(fmt.Sprintf("\x02$!(UNKNOWNMSGHANDLER=%#x)", handle))
  322. return true
  323. }
  324. return fn(d)
  325. }
  326. // ExecuteMessage decodes and executes the message at the current position.
  327. func (d *Decoder) ExecuteMessage() bool {
  328. n, ok := d.executeMessageFromData(d.data)
  329. d.data = d.data[n:]
  330. return ok
  331. }
  332. // ExecuteSubstitution executes the message corresponding to the substitution
  333. // as encoded by EncodeSubstitution.
  334. func (d *Decoder) ExecuteSubstitution() {
  335. switch x := d.DecodeUint(); x {
  336. case substituteVar:
  337. offset := d.DecodeUint()
  338. d.executeMessageFromData(d.vars[offset:])
  339. case substituteMacro:
  340. name := d.DecodeString()
  341. data, ok := d.macros.Lookup(name)
  342. old := d.macroArg
  343. // TODO: support macros of arity other than 1.
  344. d.macroArg = int(d.DecodeUint())
  345. switch {
  346. case !ok:
  347. // TODO: detect this at creation time.
  348. d.setError(fmt.Errorf("catmsg: undefined macro %q", name))
  349. fallthrough
  350. case !d.execute(data):
  351. d.dst.Render(name) // fall back to macro name.
  352. }
  353. d.macroArg = old
  354. case substituteError:
  355. d.dst.Render(d.DecodeString())
  356. default:
  357. panic("catmsg: unreachable")
  358. }
  359. }