123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485 |
- // Copyright 2015 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package number
- import (
- "errors"
- "unicode/utf8"
- )
- // This file contains a parser for the CLDR number patterns as described in
- // https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
- //
- // The following BNF is derived from this standard.
- //
- // pattern := subpattern (';' subpattern)?
- // subpattern := affix? number exponent? affix?
- // number := decimal | sigDigits
- // decimal := '#'* '0'* ('.' fraction)? | '#' | '0'
- // fraction := '0'* '#'*
- // sigDigits := '#'* '@' '@'* '#'*
- // exponent := 'E' '+'? '0'* '0'
- // padSpec := '*' \L
- //
- // Notes:
- // - An affix pattern may contain any runes, but runes with special meaning
- // should be escaped.
- // - Sequences of digits, '#', and '@' in decimal and sigDigits may have
- // interstitial commas.
- // TODO: replace special characters in affixes (-, +, ¤) with control codes.
- // Pattern holds information for formatting numbers. It is designed to hold
- // information from CLDR number patterns.
- //
- // This pattern is precompiled for all patterns for all languages. Even though
- // the number of patterns is not very large, we want to keep this small.
- //
- // This type is only intended for internal use.
- type Pattern struct {
- RoundingContext
- Affix string // includes prefix and suffix. First byte is prefix length.
- Offset uint16 // Offset into Affix for prefix and suffix
- NegOffset uint16 // Offset into Affix for negative prefix and suffix or 0.
- PadRune rune
- FormatWidth uint16
- GroupingSize [2]uint8
- Flags PatternFlag
- }
- // A RoundingContext indicates how a number should be converted to digits.
- // It contains all information needed to determine the "visible digits" as
- // required by the pluralization rules.
- type RoundingContext struct {
- // TODO: unify these two fields so that there is a more unambiguous meaning
- // of how precision is handled.
- MaxSignificantDigits int16 // -1 is unlimited
- MaxFractionDigits int16 // -1 is unlimited
- Increment uint32
- IncrementScale uint8 // May differ from printed scale.
- Mode RoundingMode
- DigitShift uint8 // Number of decimals to shift. Used for % and ‰.
- // Number of digits.
- MinIntegerDigits uint8
- MaxIntegerDigits uint8
- MinFractionDigits uint8
- MinSignificantDigits uint8
- MinExponentDigits uint8
- }
- // RoundSignificantDigits returns the number of significant digits an
- // implementation of Convert may round to or n < 0 if there is no maximum or
- // a maximum is not recommended.
- func (r *RoundingContext) RoundSignificantDigits() (n int) {
- if r.MaxFractionDigits == 0 && r.MaxSignificantDigits > 0 {
- return int(r.MaxSignificantDigits)
- } else if r.isScientific() && r.MaxIntegerDigits == 1 {
- if r.MaxSignificantDigits == 0 ||
- int(r.MaxFractionDigits+1) == int(r.MaxSignificantDigits) {
- // Note: don't add DigitShift: it is only used for decimals.
- return int(r.MaxFractionDigits) + 1
- }
- }
- return -1
- }
- // RoundFractionDigits returns the number of fraction digits an implementation
- // of Convert may round to or n < 0 if there is no maximum or a maximum is not
- // recommended.
- func (r *RoundingContext) RoundFractionDigits() (n int) {
- if r.MinExponentDigits == 0 &&
- r.MaxSignificantDigits == 0 &&
- r.MaxFractionDigits >= 0 {
- return int(r.MaxFractionDigits) + int(r.DigitShift)
- }
- return -1
- }
- // SetScale fixes the RoundingContext to a fixed number of fraction digits.
- func (r *RoundingContext) SetScale(scale int) {
- r.MinFractionDigits = uint8(scale)
- r.MaxFractionDigits = int16(scale)
- }
- func (r *RoundingContext) SetPrecision(prec int) {
- r.MaxSignificantDigits = int16(prec)
- }
- func (r *RoundingContext) isScientific() bool {
- return r.MinExponentDigits > 0
- }
- func (f *Pattern) needsSep(pos int) bool {
- p := pos - 1
- size := int(f.GroupingSize[0])
- if size == 0 || p == 0 {
- return false
- }
- if p == size {
- return true
- }
- if p -= size; p < 0 {
- return false
- }
- // TODO: make second groupingsize the same as first if 0 so that we can
- // avoid this check.
- if x := int(f.GroupingSize[1]); x != 0 {
- size = x
- }
- return p%size == 0
- }
- // A PatternFlag is a bit mask for the flag field of a Pattern.
- type PatternFlag uint8
- const (
- AlwaysSign PatternFlag = 1 << iota
- ElideSign // Use space instead of plus sign. AlwaysSign must be true.
- AlwaysExpSign
- AlwaysDecimalSeparator
- ParenthesisForNegative // Common pattern. Saves space.
- PadAfterNumber
- PadAfterAffix
- PadBeforePrefix = 0 // Default
- PadAfterPrefix = PadAfterAffix
- PadBeforeSuffix = PadAfterNumber
- PadAfterSuffix = PadAfterNumber | PadAfterAffix
- PadMask = PadAfterNumber | PadAfterAffix
- )
- type parser struct {
- *Pattern
- leadingSharps int
- pos int
- err error
- doNotTerminate bool
- groupingCount uint
- hasGroup bool
- buf []byte
- }
- func (p *parser) setError(err error) {
- if p.err == nil {
- p.err = err
- }
- }
- func (p *parser) updateGrouping() {
- if p.hasGroup &&
- 0 < p.groupingCount && p.groupingCount < 255 {
- p.GroupingSize[1] = p.GroupingSize[0]
- p.GroupingSize[0] = uint8(p.groupingCount)
- }
- p.groupingCount = 0
- p.hasGroup = true
- }
- var (
- // TODO: more sensible and localizeable error messages.
- errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers")
- errInvalidPadSpecifier = errors.New("format: invalid pad specifier")
- errInvalidQuote = errors.New("format: invalid quote")
- errAffixTooLarge = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes")
- errDuplicatePercentSign = errors.New("format: duplicate percent sign")
- errDuplicatePermilleSign = errors.New("format: duplicate permille sign")
- errUnexpectedEnd = errors.New("format: unexpected end of pattern")
- )
- // ParsePattern extracts formatting information from a CLDR number pattern.
- //
- // See https://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
- func ParsePattern(s string) (f *Pattern, err error) {
- p := parser{Pattern: &Pattern{}}
- s = p.parseSubPattern(s)
- if s != "" {
- // Parse negative sub pattern.
- if s[0] != ';' {
- p.setError(errors.New("format: error parsing first sub pattern"))
- return nil, p.err
- }
- neg := parser{Pattern: &Pattern{}} // just for extracting the affixes.
- s = neg.parseSubPattern(s[len(";"):])
- p.NegOffset = uint16(len(p.buf))
- p.buf = append(p.buf, neg.buf...)
- }
- if s != "" {
- p.setError(errors.New("format: spurious characters at end of pattern"))
- }
- if p.err != nil {
- return nil, p.err
- }
- if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" {
- // No prefix or suffixes.
- p.NegOffset = 0
- } else {
- p.Affix = affix
- }
- if p.Increment == 0 {
- p.IncrementScale = 0
- }
- return p.Pattern, nil
- }
- func (p *parser) parseSubPattern(s string) string {
- s = p.parsePad(s, PadBeforePrefix)
- s = p.parseAffix(s)
- s = p.parsePad(s, PadAfterPrefix)
- s = p.parse(p.number, s)
- p.updateGrouping()
- s = p.parsePad(s, PadBeforeSuffix)
- s = p.parseAffix(s)
- s = p.parsePad(s, PadAfterSuffix)
- return s
- }
- func (p *parser) parsePad(s string, f PatternFlag) (tail string) {
- if len(s) >= 2 && s[0] == '*' {
- r, sz := utf8.DecodeRuneInString(s[1:])
- if p.PadRune != 0 {
- p.err = errMultiplePadSpecifiers
- } else {
- p.Flags |= f
- p.PadRune = r
- }
- return s[1+sz:]
- }
- return s
- }
- func (p *parser) parseAffix(s string) string {
- x := len(p.buf)
- p.buf = append(p.buf, 0) // placeholder for affix length
- s = p.parse(p.affix, s)
- n := len(p.buf) - x - 1
- if n > 0xFF {
- p.setError(errAffixTooLarge)
- }
- p.buf[x] = uint8(n)
- return s
- }
- // state implements a state transition. It returns the new state. A state
- // function may set an error on the parser or may simply return on an incorrect
- // token and let the next phase fail.
- type state func(r rune) state
- // parse repeatedly applies a state function on the given string until a
- // termination condition is reached.
- func (p *parser) parse(fn state, s string) (tail string) {
- for i, r := range s {
- p.doNotTerminate = false
- if fn = fn(r); fn == nil || p.err != nil {
- return s[i:]
- }
- p.FormatWidth++
- }
- if p.doNotTerminate {
- p.setError(errUnexpectedEnd)
- }
- return ""
- }
- func (p *parser) affix(r rune) state {
- switch r {
- case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
- '#', '@', '.', '*', ',', ';':
- return nil
- case '\'':
- p.FormatWidth--
- return p.escapeFirst
- case '%':
- if p.DigitShift != 0 {
- p.setError(errDuplicatePercentSign)
- }
- p.DigitShift = 2
- case '\u2030': // ‰ Per mille
- if p.DigitShift != 0 {
- p.setError(errDuplicatePermilleSign)
- }
- p.DigitShift = 3
- // TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤
- }
- p.buf = append(p.buf, string(r)...)
- return p.affix
- }
- func (p *parser) escapeFirst(r rune) state {
- switch r {
- case '\'':
- p.buf = append(p.buf, "\\'"...)
- return p.affix
- default:
- p.buf = append(p.buf, '\'')
- p.buf = append(p.buf, string(r)...)
- }
- return p.escape
- }
- func (p *parser) escape(r rune) state {
- switch r {
- case '\'':
- p.FormatWidth--
- p.buf = append(p.buf, '\'')
- return p.affix
- default:
- p.buf = append(p.buf, string(r)...)
- }
- return p.escape
- }
- // number parses a number. The BNF says the integer part should always have
- // a '0', but that does not appear to be the case according to the rest of the
- // documentation. We will allow having only '#' numbers.
- func (p *parser) number(r rune) state {
- switch r {
- case '#':
- p.groupingCount++
- p.leadingSharps++
- case '@':
- p.groupingCount++
- p.leadingSharps = 0
- p.MaxFractionDigits = -1
- return p.sigDigits(r)
- case ',':
- if p.leadingSharps == 0 { // no leading commas
- return nil
- }
- p.updateGrouping()
- case 'E':
- p.MaxIntegerDigits = uint8(p.leadingSharps)
- return p.exponent
- case '.': // allow ".##" etc.
- p.updateGrouping()
- return p.fraction
- case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- return p.integer(r)
- default:
- return nil
- }
- return p.number
- }
- func (p *parser) integer(r rune) state {
- if !('0' <= r && r <= '9') {
- var next state
- switch r {
- case 'E':
- if p.leadingSharps > 0 {
- p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
- }
- next = p.exponent
- case '.':
- next = p.fraction
- case ',':
- next = p.integer
- }
- p.updateGrouping()
- return next
- }
- p.Increment = p.Increment*10 + uint32(r-'0')
- p.groupingCount++
- p.MinIntegerDigits++
- return p.integer
- }
- func (p *parser) sigDigits(r rune) state {
- switch r {
- case '@':
- p.groupingCount++
- p.MaxSignificantDigits++
- p.MinSignificantDigits++
- case '#':
- return p.sigDigitsFinal(r)
- case 'E':
- p.updateGrouping()
- return p.normalizeSigDigitsWithExponent()
- default:
- p.updateGrouping()
- return nil
- }
- return p.sigDigits
- }
- func (p *parser) sigDigitsFinal(r rune) state {
- switch r {
- case '#':
- p.groupingCount++
- p.MaxSignificantDigits++
- case 'E':
- p.updateGrouping()
- return p.normalizeSigDigitsWithExponent()
- default:
- p.updateGrouping()
- return nil
- }
- return p.sigDigitsFinal
- }
- func (p *parser) normalizeSigDigitsWithExponent() state {
- p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1
- p.MinFractionDigits = p.MinSignificantDigits - 1
- p.MaxFractionDigits = p.MaxSignificantDigits - 1
- p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0
- return p.exponent
- }
- func (p *parser) fraction(r rune) state {
- switch r {
- case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- p.Increment = p.Increment*10 + uint32(r-'0')
- p.IncrementScale++
- p.MinFractionDigits++
- p.MaxFractionDigits++
- case '#':
- p.MaxFractionDigits++
- case 'E':
- if p.leadingSharps > 0 {
- p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
- }
- return p.exponent
- default:
- return nil
- }
- return p.fraction
- }
- func (p *parser) exponent(r rune) state {
- switch r {
- case '+':
- // Set mode and check it wasn't already set.
- if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 {
- break
- }
- p.Flags |= AlwaysExpSign
- p.doNotTerminate = true
- return p.exponent
- case '0':
- p.MinExponentDigits++
- return p.exponent
- }
- // termination condition
- if p.MinExponentDigits == 0 {
- p.setError(errors.New("format: need at least one digit"))
- }
- return nil
- }
|