catmsg.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. // Copyright 2017 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package catmsg contains support types for package x/text/message/catalog.
  5. //
  6. // This package contains the low-level implementations of Message used by the
  7. // catalog package and provides primitives for other packages to implement their
  8. // own. For instance, the plural package provides functionality for selecting
  9. // translation strings based on the plural category of substitution arguments.
  10. //
  11. //
  12. // Encoding and Decoding
  13. //
  14. // Catalogs store Messages encoded as a single string. Compiling a message into
  15. // a string both results in compacter representation and speeds up evaluation.
  16. //
  17. // A Message must implement a Compile method to convert its arbitrary
  18. // representation to a string. The Compile method takes an Encoder which
  19. // facilitates serializing the message. Encoders also provide more context of
  20. // the messages's creation (such as for which language the message is intended),
  21. // which may not be known at the time of the creation of the message.
  22. //
  23. // Each message type must also have an accompanying decoder registered to decode
  24. // the message. This decoder takes a Decoder argument which provides the
  25. // counterparts for the decoding.
  26. //
  27. //
  28. // Renderers
  29. //
  30. // A Decoder must be initialized with a Renderer implementation. These
  31. // implementations must be provided by packages that use Catalogs, typically
  32. // formatting packages such as x/text/message. A typical user will not need to
  33. // worry about this type; it is only relevant to packages that do string
  34. // formatting and want to use the catalog package to handle localized strings.
  35. //
  36. // A package that uses catalogs for selecting strings receives selection results
  37. // as sequence of substrings passed to the Renderer. The following snippet shows
  38. // how to express the above example using the message package.
  39. //
  40. // message.Set(language.English, "You are %d minute(s) late.",
  41. // catalog.Var("minutes", plural.Select(1, "one", "minute")),
  42. // catalog.String("You are %[1]d ${minutes} late."))
  43. //
  44. // p := message.NewPrinter(language.English)
  45. // p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late.
  46. //
  47. // To evaluate the Printf, package message wraps the arguments in a Renderer
  48. // that is passed to the catalog for message decoding. The call sequence that
  49. // results from evaluating the above message, assuming the person is rather
  50. // tardy, is:
  51. //
  52. // Render("You are %[1]d ")
  53. // Arg(1)
  54. // Render("minutes")
  55. // Render(" late.")
  56. //
  57. // The calls to Arg is caused by the plural.Select execution, which evaluates
  58. // the argument to determine whether the singular or plural message form should
  59. // be selected. The calls to Render reports the partial results to the message
  60. // package for further evaluation.
  61. package catmsg
  62. import (
  63. "errors"
  64. "fmt"
  65. "strconv"
  66. "strings"
  67. "sync"
  68. "golang.org/x/text/language"
  69. )
  70. // A Handle refers to a registered message type.
  71. type Handle int
  72. // A Handler decodes and evaluates data compiled by a Message and sends the
  73. // result to the Decoder. The output may depend on the value of the substitution
  74. // arguments, accessible by the Decoder's Arg method. The Handler returns false
  75. // if there is no translation for the given substitution arguments.
  76. type Handler func(d *Decoder) bool
  77. // Register records the existence of a message type and returns a Handle that
  78. // can be used in the Encoder's EncodeMessageType method to create such
  79. // messages. The prefix of the name should be the package path followed by
  80. // an optional disambiguating string.
  81. // Register will panic if a handle for the same name was already registered.
  82. func Register(name string, handler Handler) Handle {
  83. mutex.Lock()
  84. defer mutex.Unlock()
  85. if _, ok := names[name]; ok {
  86. panic(fmt.Errorf("catmsg: handler for %q already exists", name))
  87. }
  88. h := Handle(len(handlers))
  89. names[name] = h
  90. handlers = append(handlers, handler)
  91. return h
  92. }
  93. // These handlers require fixed positions in the handlers slice.
  94. const (
  95. msgVars Handle = iota
  96. msgFirst
  97. msgRaw
  98. msgString
  99. msgAffix
  100. // Leave some arbitrary room for future expansion: 20 should suffice.
  101. numInternal = 20
  102. )
  103. const prefix = "golang.org/x/text/internal/catmsg."
  104. var (
  105. // TODO: find a more stable way to link handles to message types.
  106. mutex sync.Mutex
  107. names = map[string]Handle{
  108. prefix + "Vars": msgVars,
  109. prefix + "First": msgFirst,
  110. prefix + "Raw": msgRaw,
  111. prefix + "String": msgString,
  112. prefix + "Affix": msgAffix,
  113. }
  114. handlers = make([]Handler, numInternal)
  115. )
  116. func init() {
  117. // This handler is a message type wrapper that initializes a decoder
  118. // with a variable block. This message type, if present, is always at the
  119. // start of an encoded message.
  120. handlers[msgVars] = func(d *Decoder) bool {
  121. blockSize := int(d.DecodeUint())
  122. d.vars = d.data[:blockSize]
  123. d.data = d.data[blockSize:]
  124. return d.executeMessage()
  125. }
  126. // First takes the first message in a sequence that results in a match for
  127. // the given substitution arguments.
  128. handlers[msgFirst] = func(d *Decoder) bool {
  129. for !d.Done() {
  130. if d.ExecuteMessage() {
  131. return true
  132. }
  133. }
  134. return false
  135. }
  136. handlers[msgRaw] = func(d *Decoder) bool {
  137. d.Render(d.data)
  138. return true
  139. }
  140. // A String message alternates between a string constant and a variable
  141. // substitution.
  142. handlers[msgString] = func(d *Decoder) bool {
  143. for !d.Done() {
  144. if str := d.DecodeString(); str != "" {
  145. d.Render(str)
  146. }
  147. if d.Done() {
  148. break
  149. }
  150. d.ExecuteSubstitution()
  151. }
  152. return true
  153. }
  154. handlers[msgAffix] = func(d *Decoder) bool {
  155. // TODO: use an alternative method for common cases.
  156. prefix := d.DecodeString()
  157. suffix := d.DecodeString()
  158. if prefix != "" {
  159. d.Render(prefix)
  160. }
  161. ret := d.ExecuteMessage()
  162. if suffix != "" {
  163. d.Render(suffix)
  164. }
  165. return ret
  166. }
  167. }
  168. var (
  169. // ErrIncomplete indicates a compiled message does not define translations
  170. // for all possible argument values. If this message is returned, evaluating
  171. // a message may result in the ErrNoMatch error.
  172. ErrIncomplete = errors.New("catmsg: incomplete message; may not give result for all inputs")
  173. // ErrNoMatch indicates no translation message matched the given input
  174. // parameters when evaluating a message.
  175. ErrNoMatch = errors.New("catmsg: no translation for inputs")
  176. )
  177. // A Message holds a collection of translations for the same phrase that may
  178. // vary based on the values of substitution arguments.
  179. type Message interface {
  180. // Compile encodes the format string(s) of the message as a string for later
  181. // evaluation.
  182. //
  183. // The first call Compile makes on the encoder must be EncodeMessageType.
  184. // The handle passed to this call may either be a handle returned by
  185. // Register to encode a single custom message, or HandleFirst followed by
  186. // a sequence of calls to EncodeMessage.
  187. //
  188. // Compile must return ErrIncomplete if it is possible for evaluation to
  189. // not match any translation for a given set of formatting parameters.
  190. // For example, selecting a translation based on plural form may not yield
  191. // a match if the form "Other" is not one of the selectors.
  192. //
  193. // Compile may return any other application-specific error. For backwards
  194. // compatibility with package like fmt, which often do not do sanity
  195. // checking of format strings ahead of time, Compile should still make an
  196. // effort to have some sensible fallback in case of an error.
  197. Compile(e *Encoder) error
  198. }
  199. // Compile converts a Message to a data string that can be stored in a Catalog.
  200. // The resulting string can subsequently be decoded by passing to the Execute
  201. // method of a Decoder.
  202. func Compile(tag language.Tag, macros Dictionary, m Message) (data string, err error) {
  203. // TODO: pass macros so they can be used for validation.
  204. v := &Encoder{inBody: true} // encoder for variables
  205. v.root = v
  206. e := &Encoder{root: v, parent: v, tag: tag} // encoder for messages
  207. err = m.Compile(e)
  208. // This package serves te message package, which in turn is meant to be a
  209. // drop-in replacement for fmt. With the fmt package, format strings are
  210. // evaluated lazily and errors are handled by substituting strings in the
  211. // result, rather then returning an error. Dealing with multiple languages
  212. // makes it more important to check errors ahead of time. We chose to be
  213. // consistent and compatible and allow graceful degradation in case of
  214. // errors.
  215. buf := e.buf[stripPrefix(e.buf):]
  216. if len(v.buf) > 0 {
  217. // Prepend variable block.
  218. b := make([]byte, 1+maxVarintBytes+len(v.buf)+len(buf))
  219. b[0] = byte(msgVars)
  220. b = b[:1+encodeUint(b[1:], uint64(len(v.buf)))]
  221. b = append(b, v.buf...)
  222. b = append(b, buf...)
  223. buf = b
  224. }
  225. if err == nil {
  226. err = v.err
  227. }
  228. return string(buf), err
  229. }
  230. // FirstOf is a message type that prints the first message in the sequence that
  231. // resolves to a match for the given substitution arguments.
  232. type FirstOf []Message
  233. // Compile implements Message.
  234. func (s FirstOf) Compile(e *Encoder) error {
  235. e.EncodeMessageType(msgFirst)
  236. err := ErrIncomplete
  237. for i, m := range s {
  238. if err == nil {
  239. return fmt.Errorf("catalog: message argument %d is complete and blocks subsequent messages", i-1)
  240. }
  241. err = e.EncodeMessage(m)
  242. }
  243. return err
  244. }
  245. // Var defines a message that can be substituted for a placeholder of the same
  246. // name. If an expression does not result in a string after evaluation, Name is
  247. // used as the substitution. For example:
  248. // Var{
  249. // Name: "minutes",
  250. // Message: plural.Select(1, "one", "minute"),
  251. // }
  252. // will resolve to minute for singular and minutes for plural forms.
  253. type Var struct {
  254. Name string
  255. Message Message
  256. }
  257. var errIsVar = errors.New("catmsg: variable used as message")
  258. // Compile implements Message.
  259. //
  260. // Note that this method merely registers a variable; it does not create an
  261. // encoded message.
  262. func (v *Var) Compile(e *Encoder) error {
  263. if err := e.addVar(v.Name, v.Message); err != nil {
  264. return err
  265. }
  266. // Using a Var by itself is an error. If it is in a sequence followed by
  267. // other messages referring to it, this error will be ignored.
  268. return errIsVar
  269. }
  270. // Raw is a message consisting of a single format string that is passed as is
  271. // to the Renderer.
  272. //
  273. // Note that a Renderer may still do its own variable substitution.
  274. type Raw string
  275. // Compile implements Message.
  276. func (r Raw) Compile(e *Encoder) (err error) {
  277. e.EncodeMessageType(msgRaw)
  278. // Special case: raw strings don't have a size encoding and so don't use
  279. // EncodeString.
  280. e.buf = append(e.buf, r...)
  281. return nil
  282. }
  283. // String is a message consisting of a single format string which contains
  284. // placeholders that may be substituted with variables.
  285. //
  286. // Variable substitutions are marked with placeholders and a variable name of
  287. // the form ${name}. Any other substitutions such as Go templates or
  288. // printf-style substitutions are left to be done by the Renderer.
  289. //
  290. // When evaluation a string interpolation, a Renderer will receive separate
  291. // calls for each placeholder and interstitial string. For example, for the
  292. // message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls
  293. // is:
  294. // d.Render("%[1]v ")
  295. // d.Arg(1)
  296. // d.Render(resultOfInvites)
  297. // d.Render(" %[2]v to ")
  298. // d.Arg(2)
  299. // d.Render(resultOfTheir)
  300. // d.Render(" party.")
  301. // where the messages for "invites" and "their" both use a plural.Select
  302. // referring to the first argument.
  303. //
  304. // Strings may also invoke macros. Macros are essentially variables that can be
  305. // reused. Macros may, for instance, be used to make selections between
  306. // different conjugations of a verb. See the catalog package description for an
  307. // overview of macros.
  308. type String string
  309. // Compile implements Message. It parses the placeholder formats and returns
  310. // any error.
  311. func (s String) Compile(e *Encoder) (err error) {
  312. msg := string(s)
  313. const subStart = "${"
  314. hasHeader := false
  315. p := 0
  316. b := []byte{}
  317. for {
  318. i := strings.Index(msg[p:], subStart)
  319. if i == -1 {
  320. break
  321. }
  322. b = append(b, msg[p:p+i]...)
  323. p += i + len(subStart)
  324. if i = strings.IndexByte(msg[p:], '}'); i == -1 {
  325. b = append(b, "$!(MISSINGBRACE)"...)
  326. err = fmt.Errorf("catmsg: missing '}'")
  327. p = len(msg)
  328. break
  329. }
  330. name := strings.TrimSpace(msg[p : p+i])
  331. if q := strings.IndexByte(name, '('); q == -1 {
  332. if !hasHeader {
  333. hasHeader = true
  334. e.EncodeMessageType(msgString)
  335. }
  336. e.EncodeString(string(b))
  337. e.EncodeSubstitution(name)
  338. b = b[:0]
  339. } else if j := strings.IndexByte(name[q:], ')'); j == -1 {
  340. // TODO: what should the error be?
  341. b = append(b, "$!(MISSINGPAREN)"...)
  342. err = fmt.Errorf("catmsg: missing ')'")
  343. } else if x, sErr := strconv.ParseUint(strings.TrimSpace(name[q+1:q+j]), 10, 32); sErr != nil {
  344. // TODO: handle more than one argument
  345. b = append(b, "$!(BADNUM)"...)
  346. err = fmt.Errorf("catmsg: invalid number %q", strings.TrimSpace(name[q+1:q+j]))
  347. } else {
  348. if !hasHeader {
  349. hasHeader = true
  350. e.EncodeMessageType(msgString)
  351. }
  352. e.EncodeString(string(b))
  353. e.EncodeSubstitution(name[:q], int(x))
  354. b = b[:0]
  355. }
  356. p += i + 1
  357. }
  358. b = append(b, msg[p:]...)
  359. if !hasHeader {
  360. // Simplify string to a raw string.
  361. Raw(string(b)).Compile(e)
  362. } else if len(b) > 0 {
  363. e.EncodeString(string(b))
  364. }
  365. return err
  366. }
  367. // Affix is a message that adds a prefix and suffix to another message.
  368. // This is mostly used add back whitespace to a translation that was stripped
  369. // before sending it out.
  370. type Affix struct {
  371. Message Message
  372. Prefix string
  373. Suffix string
  374. }
  375. // Compile implements Message.
  376. func (a Affix) Compile(e *Encoder) (err error) {
  377. // TODO: consider adding a special message type that just adds a single
  378. // return. This is probably common enough to handle the majority of cases.
  379. // Get some stats first, though.
  380. e.EncodeMessageType(msgAffix)
  381. e.EncodeString(a.Prefix)
  382. e.EncodeString(a.Suffix)
  383. e.EncodeMessage(a.Message)
  384. return nil
  385. }