pattern.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package number
  5. import (
  6. "errors"
  7. "unicode/utf8"
  8. )
  9. // This file contains a parser for the CLDR number patterns as described in
  10. // https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
  11. //
  12. // The following BNF is derived from this standard.
  13. //
  14. // pattern := subpattern (';' subpattern)?
  15. // subpattern := affix? number exponent? affix?
  16. // number := decimal | sigDigits
  17. // decimal := '#'* '0'* ('.' fraction)? | '#' | '0'
  18. // fraction := '0'* '#'*
  19. // sigDigits := '#'* '@' '@'* '#'*
  20. // exponent := 'E' '+'? '0'* '0'
  21. // padSpec := '*' \L
  22. //
  23. // Notes:
  24. // - An affix pattern may contain any runes, but runes with special meaning
  25. // should be escaped.
  26. // - Sequences of digits, '#', and '@' in decimal and sigDigits may have
  27. // interstitial commas.
  28. // TODO: replace special characters in affixes (-, +, ¤) with control codes.
  29. // Pattern holds information for formatting numbers. It is designed to hold
  30. // information from CLDR number patterns.
  31. //
  32. // This pattern is precompiled for all patterns for all languages. Even though
  33. // the number of patterns is not very large, we want to keep this small.
  34. //
  35. // This type is only intended for internal use.
  36. type Pattern struct {
  37. RoundingContext
  38. Affix string // includes prefix and suffix. First byte is prefix length.
  39. Offset uint16 // Offset into Affix for prefix and suffix
  40. NegOffset uint16 // Offset into Affix for negative prefix and suffix or 0.
  41. PadRune rune
  42. FormatWidth uint16
  43. GroupingSize [2]uint8
  44. Flags PatternFlag
  45. }
  46. // A RoundingContext indicates how a number should be converted to digits.
  47. // It contains all information needed to determine the "visible digits" as
  48. // required by the pluralization rules.
  49. type RoundingContext struct {
  50. // TODO: unify these two fields so that there is a more unambiguous meaning
  51. // of how precision is handled.
  52. MaxSignificantDigits int16 // -1 is unlimited
  53. MaxFractionDigits int16 // -1 is unlimited
  54. Increment uint32
  55. IncrementScale uint8 // May differ from printed scale.
  56. Mode RoundingMode
  57. DigitShift uint8 // Number of decimals to shift. Used for % and ‰.
  58. // Number of digits.
  59. MinIntegerDigits uint8
  60. MaxIntegerDigits uint8
  61. MinFractionDigits uint8
  62. MinSignificantDigits uint8
  63. MinExponentDigits uint8
  64. }
  65. // RoundSignificantDigits returns the number of significant digits an
  66. // implementation of Convert may round to or n < 0 if there is no maximum or
  67. // a maximum is not recommended.
  68. func (r *RoundingContext) RoundSignificantDigits() (n int) {
  69. if r.MaxFractionDigits == 0 && r.MaxSignificantDigits > 0 {
  70. return int(r.MaxSignificantDigits)
  71. } else if r.isScientific() && r.MaxIntegerDigits == 1 {
  72. if r.MaxSignificantDigits == 0 ||
  73. int(r.MaxFractionDigits+1) == int(r.MaxSignificantDigits) {
  74. // Note: don't add DigitShift: it is only used for decimals.
  75. return int(r.MaxFractionDigits) + 1
  76. }
  77. }
  78. return -1
  79. }
  80. // RoundFractionDigits returns the number of fraction digits an implementation
  81. // of Convert may round to or n < 0 if there is no maximum or a maximum is not
  82. // recommended.
  83. func (r *RoundingContext) RoundFractionDigits() (n int) {
  84. if r.MinExponentDigits == 0 &&
  85. r.MaxSignificantDigits == 0 &&
  86. r.MaxFractionDigits >= 0 {
  87. return int(r.MaxFractionDigits) + int(r.DigitShift)
  88. }
  89. return -1
  90. }
  91. // SetScale fixes the RoundingContext to a fixed number of fraction digits.
  92. func (r *RoundingContext) SetScale(scale int) {
  93. r.MinFractionDigits = uint8(scale)
  94. r.MaxFractionDigits = int16(scale)
  95. }
  96. func (r *RoundingContext) SetPrecision(prec int) {
  97. r.MaxSignificantDigits = int16(prec)
  98. }
  99. func (r *RoundingContext) isScientific() bool {
  100. return r.MinExponentDigits > 0
  101. }
  102. func (f *Pattern) needsSep(pos int) bool {
  103. p := pos - 1
  104. size := int(f.GroupingSize[0])
  105. if size == 0 || p == 0 {
  106. return false
  107. }
  108. if p == size {
  109. return true
  110. }
  111. if p -= size; p < 0 {
  112. return false
  113. }
  114. // TODO: make second groupingsize the same as first if 0 so that we can
  115. // avoid this check.
  116. if x := int(f.GroupingSize[1]); x != 0 {
  117. size = x
  118. }
  119. return p%size == 0
  120. }
  121. // A PatternFlag is a bit mask for the flag field of a Pattern.
  122. type PatternFlag uint8
  123. const (
  124. AlwaysSign PatternFlag = 1 << iota
  125. ElideSign // Use space instead of plus sign. AlwaysSign must be true.
  126. AlwaysExpSign
  127. AlwaysDecimalSeparator
  128. ParenthesisForNegative // Common pattern. Saves space.
  129. PadAfterNumber
  130. PadAfterAffix
  131. PadBeforePrefix = 0 // Default
  132. PadAfterPrefix = PadAfterAffix
  133. PadBeforeSuffix = PadAfterNumber
  134. PadAfterSuffix = PadAfterNumber | PadAfterAffix
  135. PadMask = PadAfterNumber | PadAfterAffix
  136. )
  137. type parser struct {
  138. *Pattern
  139. leadingSharps int
  140. pos int
  141. err error
  142. doNotTerminate bool
  143. groupingCount uint
  144. hasGroup bool
  145. buf []byte
  146. }
  147. func (p *parser) setError(err error) {
  148. if p.err == nil {
  149. p.err = err
  150. }
  151. }
  152. func (p *parser) updateGrouping() {
  153. if p.hasGroup &&
  154. 0 < p.groupingCount && p.groupingCount < 255 {
  155. p.GroupingSize[1] = p.GroupingSize[0]
  156. p.GroupingSize[0] = uint8(p.groupingCount)
  157. }
  158. p.groupingCount = 0
  159. p.hasGroup = true
  160. }
  161. var (
  162. // TODO: more sensible and localizeable error messages.
  163. errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers")
  164. errInvalidPadSpecifier = errors.New("format: invalid pad specifier")
  165. errInvalidQuote = errors.New("format: invalid quote")
  166. errAffixTooLarge = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes")
  167. errDuplicatePercentSign = errors.New("format: duplicate percent sign")
  168. errDuplicatePermilleSign = errors.New("format: duplicate permille sign")
  169. errUnexpectedEnd = errors.New("format: unexpected end of pattern")
  170. )
  171. // ParsePattern extracts formatting information from a CLDR number pattern.
  172. //
  173. // See https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
  174. func ParsePattern(s string) (f *Pattern, err error) {
  175. p := parser{Pattern: &Pattern{}}
  176. s = p.parseSubPattern(s)
  177. if s != "" {
  178. // Parse negative sub pattern.
  179. if s[0] != ';' {
  180. p.setError(errors.New("format: error parsing first sub pattern"))
  181. return nil, p.err
  182. }
  183. neg := parser{Pattern: &Pattern{}} // just for extracting the affixes.
  184. s = neg.parseSubPattern(s[len(";"):])
  185. p.NegOffset = uint16(len(p.buf))
  186. p.buf = append(p.buf, neg.buf...)
  187. }
  188. if s != "" {
  189. p.setError(errors.New("format: spurious characters at end of pattern"))
  190. }
  191. if p.err != nil {
  192. return nil, p.err
  193. }
  194. if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" {
  195. // No prefix or suffixes.
  196. p.NegOffset = 0
  197. } else {
  198. p.Affix = affix
  199. }
  200. if p.Increment == 0 {
  201. p.IncrementScale = 0
  202. }
  203. return p.Pattern, nil
  204. }
  205. func (p *parser) parseSubPattern(s string) string {
  206. s = p.parsePad(s, PadBeforePrefix)
  207. s = p.parseAffix(s)
  208. s = p.parsePad(s, PadAfterPrefix)
  209. s = p.parse(p.number, s)
  210. p.updateGrouping()
  211. s = p.parsePad(s, PadBeforeSuffix)
  212. s = p.parseAffix(s)
  213. s = p.parsePad(s, PadAfterSuffix)
  214. return s
  215. }
  216. func (p *parser) parsePad(s string, f PatternFlag) (tail string) {
  217. if len(s) >= 2 && s[0] == '*' {
  218. r, sz := utf8.DecodeRuneInString(s[1:])
  219. if p.PadRune != 0 {
  220. p.err = errMultiplePadSpecifiers
  221. } else {
  222. p.Flags |= f
  223. p.PadRune = r
  224. }
  225. return s[1+sz:]
  226. }
  227. return s
  228. }
  229. func (p *parser) parseAffix(s string) string {
  230. x := len(p.buf)
  231. p.buf = append(p.buf, 0) // placeholder for affix length
  232. s = p.parse(p.affix, s)
  233. n := len(p.buf) - x - 1
  234. if n > 0xFF {
  235. p.setError(errAffixTooLarge)
  236. }
  237. p.buf[x] = uint8(n)
  238. return s
  239. }
  240. // state implements a state transition. It returns the new state. A state
  241. // function may set an error on the parser or may simply return on an incorrect
  242. // token and let the next phase fail.
  243. type state func(r rune) state
  244. // parse repeatedly applies a state function on the given string until a
  245. // termination condition is reached.
  246. func (p *parser) parse(fn state, s string) (tail string) {
  247. for i, r := range s {
  248. p.doNotTerminate = false
  249. if fn = fn(r); fn == nil || p.err != nil {
  250. return s[i:]
  251. }
  252. p.FormatWidth++
  253. }
  254. if p.doNotTerminate {
  255. p.setError(errUnexpectedEnd)
  256. }
  257. return ""
  258. }
  259. func (p *parser) affix(r rune) state {
  260. switch r {
  261. case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  262. '#', '@', '.', '*', ',', ';':
  263. return nil
  264. case '\'':
  265. p.FormatWidth--
  266. return p.escapeFirst
  267. case '%':
  268. if p.DigitShift != 0 {
  269. p.setError(errDuplicatePercentSign)
  270. }
  271. p.DigitShift = 2
  272. case '\u2030': // ‰ Per mille
  273. if p.DigitShift != 0 {
  274. p.setError(errDuplicatePermilleSign)
  275. }
  276. p.DigitShift = 3
  277. // TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤
  278. }
  279. p.buf = append(p.buf, string(r)...)
  280. return p.affix
  281. }
  282. func (p *parser) escapeFirst(r rune) state {
  283. switch r {
  284. case '\'':
  285. p.buf = append(p.buf, "\\'"...)
  286. return p.affix
  287. default:
  288. p.buf = append(p.buf, '\'')
  289. p.buf = append(p.buf, string(r)...)
  290. }
  291. return p.escape
  292. }
  293. func (p *parser) escape(r rune) state {
  294. switch r {
  295. case '\'':
  296. p.FormatWidth--
  297. p.buf = append(p.buf, '\'')
  298. return p.affix
  299. default:
  300. p.buf = append(p.buf, string(r)...)
  301. }
  302. return p.escape
  303. }
  304. // number parses a number. The BNF says the integer part should always have
  305. // a '0', but that does not appear to be the case according to the rest of the
  306. // documentation. We will allow having only '#' numbers.
  307. func (p *parser) number(r rune) state {
  308. switch r {
  309. case '#':
  310. p.groupingCount++
  311. p.leadingSharps++
  312. case '@':
  313. p.groupingCount++
  314. p.leadingSharps = 0
  315. p.MaxFractionDigits = -1
  316. return p.sigDigits(r)
  317. case ',':
  318. if p.leadingSharps == 0 { // no leading commas
  319. return nil
  320. }
  321. p.updateGrouping()
  322. case 'E':
  323. p.MaxIntegerDigits = uint8(p.leadingSharps)
  324. return p.exponent
  325. case '.': // allow ".##" etc.
  326. p.updateGrouping()
  327. return p.fraction
  328. case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  329. return p.integer(r)
  330. default:
  331. return nil
  332. }
  333. return p.number
  334. }
  335. func (p *parser) integer(r rune) state {
  336. if !('0' <= r && r <= '9') {
  337. var next state
  338. switch r {
  339. case 'E':
  340. if p.leadingSharps > 0 {
  341. p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
  342. }
  343. next = p.exponent
  344. case '.':
  345. next = p.fraction
  346. case ',':
  347. next = p.integer
  348. }
  349. p.updateGrouping()
  350. return next
  351. }
  352. p.Increment = p.Increment*10 + uint32(r-'0')
  353. p.groupingCount++
  354. p.MinIntegerDigits++
  355. return p.integer
  356. }
  357. func (p *parser) sigDigits(r rune) state {
  358. switch r {
  359. case '@':
  360. p.groupingCount++
  361. p.MaxSignificantDigits++
  362. p.MinSignificantDigits++
  363. case '#':
  364. return p.sigDigitsFinal(r)
  365. case 'E':
  366. p.updateGrouping()
  367. return p.normalizeSigDigitsWithExponent()
  368. default:
  369. p.updateGrouping()
  370. return nil
  371. }
  372. return p.sigDigits
  373. }
  374. func (p *parser) sigDigitsFinal(r rune) state {
  375. switch r {
  376. case '#':
  377. p.groupingCount++
  378. p.MaxSignificantDigits++
  379. case 'E':
  380. p.updateGrouping()
  381. return p.normalizeSigDigitsWithExponent()
  382. default:
  383. p.updateGrouping()
  384. return nil
  385. }
  386. return p.sigDigitsFinal
  387. }
  388. func (p *parser) normalizeSigDigitsWithExponent() state {
  389. p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1
  390. p.MinFractionDigits = p.MinSignificantDigits - 1
  391. p.MaxFractionDigits = p.MaxSignificantDigits - 1
  392. p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0
  393. return p.exponent
  394. }
  395. func (p *parser) fraction(r rune) state {
  396. switch r {
  397. case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  398. p.Increment = p.Increment*10 + uint32(r-'0')
  399. p.IncrementScale++
  400. p.MinFractionDigits++
  401. p.MaxFractionDigits++
  402. case '#':
  403. p.MaxFractionDigits++
  404. case 'E':
  405. if p.leadingSharps > 0 {
  406. p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
  407. }
  408. return p.exponent
  409. default:
  410. return nil
  411. }
  412. return p.fraction
  413. }
  414. func (p *parser) exponent(r rune) state {
  415. switch r {
  416. case '+':
  417. // Set mode and check it wasn't already set.
  418. if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 {
  419. break
  420. }
  421. p.Flags |= AlwaysExpSign
  422. p.doNotTerminate = true
  423. return p.exponent
  424. case '0':
  425. p.MinExponentDigits++
  426. return p.exponent
  427. }
  428. // termination condition
  429. if p.MinExponentDigits == 0 {
  430. p.setError(errors.New("format: need at least one digit"))
  431. }
  432. return nil
  433. }