123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458 |
- // Copyright 2016 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // +build ignore
- package main
- import (
- "flag"
- "fmt"
- "log"
- "reflect"
- "strings"
- "unicode/utf8"
- "golang.org/x/text/internal/gen"
- "golang.org/x/text/internal/language"
- "golang.org/x/text/internal/language/compact"
- "golang.org/x/text/internal/number"
- "golang.org/x/text/internal/stringset"
- "golang.org/x/text/unicode/cldr"
- )
- var (
- test = flag.Bool("test", false,
- "test existing tables; can be used to compare web data with package data.")
- outputFile = flag.String("output", "tables.go", "output file")
- outputTestFile = flag.String("testoutput", "data_test.go", "output file")
- draft = flag.String("draft",
- "contributed",
- `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
- )
- func main() {
- gen.Init()
- const pkg = "number"
- gen.Repackage("gen_common.go", "common.go", pkg)
- // Read the CLDR zip file.
- r := gen.OpenCLDRCoreZip()
- defer r.Close()
- d := &cldr.Decoder{}
- d.SetDirFilter("supplemental", "main")
- d.SetSectionFilter("numbers", "numberingSystem")
- data, err := d.DecodeZip(r)
- if err != nil {
- log.Fatalf("DecodeZip: %v", err)
- }
- w := gen.NewCodeWriter()
- defer w.WriteGoFile(*outputFile, pkg)
- fmt.Fprintln(w, `import "golang.org/x/text/internal/stringset"`)
- gen.WriteCLDRVersion(w)
- genNumSystem(w, data)
- genSymbols(w, data)
- genFormats(w, data)
- }
- var systemMap = map[string]system{"latn": 0}
- func getNumberSystem(str string) system {
- ns, ok := systemMap[str]
- if !ok {
- log.Fatalf("No index for numbering system %q", str)
- }
- return ns
- }
- func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) {
- numSysData := []systemData{
- {digitSize: 1, zero: [4]byte{'0'}},
- }
- for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
- if len(ns.Digits) == 0 {
- continue
- }
- switch ns.Id {
- case "latn":
- // hard-wired
- continue
- case "hanidec":
- // non-consecutive digits: treat as "algorithmic"
- continue
- }
- zero, sz := utf8.DecodeRuneInString(ns.Digits)
- if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte
- log.Fatalf("Last byte of zero value overflows for %s", ns.Id)
- }
- i := rune(0)
- for _, r := range ns.Digits {
- // Verify that we can do simple math on the UTF-8 byte sequence
- // of zero to get the digit.
- if zero+i != r {
- // Runes not consecutive.
- log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r)
- }
- i++
- }
- var x [utf8.UTFMax]byte
- utf8.EncodeRune(x[:], zero)
- id := system(len(numSysData))
- systemMap[ns.Id] = id
- numSysData = append(numSysData, systemData{
- id: id,
- digitSize: byte(sz),
- zero: x,
- })
- }
- w.WriteVar("numSysData", numSysData)
- algoID := system(len(numSysData))
- fmt.Fprintln(w, "const (")
- for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
- id, ok := systemMap[ns.Id]
- if !ok {
- id = algoID
- systemMap[ns.Id] = id
- algoID++
- }
- fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id)
- }
- fmt.Fprintln(w, "numNumberSystems")
- fmt.Fprintln(w, ")")
- fmt.Fprintln(w, "var systemMap = map[string]system{")
- for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
- fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id))
- w.Size += len(ns.Id) + 16 + 1 // very coarse approximation
- }
- fmt.Fprintln(w, "}")
- }
- func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
- d, err := cldr.ParseDraft(*draft)
- if err != nil {
- log.Fatalf("invalid draft level: %v", err)
- }
- nNumberSystems := system(len(systemMap))
- type symbols [NumSymbolTypes]string
- type key struct {
- tag compact.ID
- system system
- }
- symbolMap := map[key]*symbols{}
- defaults := map[compact.ID]system{}
- for _, lang := range data.Locales() {
- ldml := data.RawLDML(lang)
- if ldml.Numbers == nil {
- continue
- }
- langIndex, ok := compact.FromTag(language.MustParse(lang))
- if !ok {
- log.Fatalf("No compact index for language %s", lang)
- }
- if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 {
- defaults[langIndex] = getNumberSystem(d[0].Data())
- }
- syms := cldr.MakeSlice(&ldml.Numbers.Symbols)
- syms.SelectDraft(d)
- getFirst := func(name string, x interface{}) string {
- v := reflect.ValueOf(x)
- slice := cldr.MakeSlice(x)
- slice.SelectAnyOf("alt", "", "alt")
- if reflect.Indirect(v).Len() == 0 {
- return ""
- } else if reflect.Indirect(v).Len() > 1 {
- log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name)
- }
- return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String()
- }
- for _, sym := range ldml.Numbers.Symbols {
- if sym.NumberSystem == "" {
- // This is just linking the default of root to "latn".
- continue
- }
- symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
- SymDecimal: getFirst("decimal", &sym.Decimal),
- SymGroup: getFirst("group", &sym.Group),
- SymList: getFirst("list", &sym.List),
- SymPercentSign: getFirst("percentSign", &sym.PercentSign),
- SymPlusSign: getFirst("plusSign", &sym.PlusSign),
- SymMinusSign: getFirst("minusSign", &sym.MinusSign),
- SymExponential: getFirst("exponential", &sym.Exponential),
- SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent),
- SymPerMille: getFirst("perMille", &sym.PerMille),
- SymInfinity: getFirst("infinity", &sym.Infinity),
- SymNan: getFirst("nan", &sym.Nan),
- SymTimeSeparator: getFirst("timeSeparator", &sym.TimeSeparator),
- }
- }
- }
- // Expand all values.
- for k, syms := range symbolMap {
- for t := SymDecimal; t < NumSymbolTypes; t++ {
- p := k.tag
- for syms[t] == "" {
- p = p.Parent()
- if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" {
- syms[t] = (*pSyms)[t]
- break
- }
- if p == 0 /* und */ {
- // Default to root, latn.
- syms[t] = (*symbolMap[key{}])[t]
- }
- }
- }
- }
- // Unique the symbol sets and write the string data.
- m := map[symbols]int{}
- sb := stringset.NewBuilder()
- symIndex := [][NumSymbolTypes]byte{}
- for ns := system(0); ns < nNumberSystems; ns++ {
- for _, l := range data.Locales() {
- langIndex, _ := compact.FromTag(language.MustParse(l))
- s := symbolMap[key{langIndex, ns}]
- if s == nil {
- continue
- }
- if _, ok := m[*s]; !ok {
- m[*s] = len(symIndex)
- sb.Add(s[:]...)
- var x [NumSymbolTypes]byte
- for i := SymDecimal; i < NumSymbolTypes; i++ {
- x[i] = byte(sb.Index((*s)[i]))
- }
- symIndex = append(symIndex, x)
- }
- }
- }
- w.WriteVar("symIndex", symIndex)
- w.WriteVar("symData", sb.Set())
- // resolveSymbolIndex gets the index from the closest matching locale,
- // including the locale itself.
- resolveSymbolIndex := func(langIndex compact.ID, ns system) symOffset {
- for {
- if sym := symbolMap[key{langIndex, ns}]; sym != nil {
- return symOffset(m[*sym])
- }
- if langIndex == 0 {
- return 0 // und, latn
- }
- langIndex = langIndex.Parent()
- }
- }
- // Create an index with the symbols for each locale for the latn numbering
- // system. If this is not the default, or the only one, for a locale, we
- // will overwrite the value later.
- var langToDefaults [compact.NumCompactTags]symOffset
- for _, l := range data.Locales() {
- langIndex, _ := compact.FromTag(language.MustParse(l))
- langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0)
- }
- // Delete redundant entries.
- for _, l := range data.Locales() {
- langIndex, _ := compact.FromTag(language.MustParse(l))
- def := defaults[langIndex]
- syms := symbolMap[key{langIndex, def}]
- if syms == nil {
- continue
- }
- for ns := system(0); ns < nNumberSystems; ns++ {
- if ns == def {
- continue
- }
- if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms {
- delete(symbolMap, key{langIndex, ns})
- }
- }
- }
- // Create a sorted list of alternatives per language. This will only need to
- // be referenced if a user specified an alternative numbering system.
- var langToAlt []altSymData
- for _, l := range data.Locales() {
- langIndex, _ := compact.FromTag(language.MustParse(l))
- start := len(langToAlt)
- if start >= hasNonLatnMask {
- log.Fatalf("Number of alternative assignments >= %x", hasNonLatnMask)
- }
- // Create the entry for the default value.
- def := defaults[langIndex]
- langToAlt = append(langToAlt, altSymData{
- compactTag: langIndex,
- system: def,
- symIndex: resolveSymbolIndex(langIndex, def),
- })
- for ns := system(0); ns < nNumberSystems; ns++ {
- if def == ns {
- continue
- }
- if sym := symbolMap[key{langIndex, ns}]; sym != nil {
- langToAlt = append(langToAlt, altSymData{
- compactTag: langIndex,
- system: ns,
- symIndex: resolveSymbolIndex(langIndex, ns),
- })
- }
- }
- if def == 0 && len(langToAlt) == start+1 {
- // No additional data: erase the entry.
- langToAlt = langToAlt[:start]
- } else {
- // Overwrite the entry in langToDefaults.
- langToDefaults[langIndex] = hasNonLatnMask | symOffset(start)
- }
- }
- w.WriteComment(`
- langToDefaults maps a compact language index to the default numbering system
- and default symbol set`)
- w.WriteVar("langToDefaults", langToDefaults)
- w.WriteComment(`
- langToAlt is a list of numbering system and symbol set pairs, sorted and
- marked by compact language index.`)
- w.WriteVar("langToAlt", langToAlt)
- }
- // genFormats generates the lookup table for decimal, scientific and percent
- // patterns.
- //
- // CLDR allows for patterns to be different per language for different numbering
- // systems. In practice the patterns are set to be consistent for a language
- // independent of the numbering system. genFormats verifies that no language
- // deviates from this.
- func genFormats(w *gen.CodeWriter, data *cldr.CLDR) {
- d, err := cldr.ParseDraft(*draft)
- if err != nil {
- log.Fatalf("invalid draft level: %v", err)
- }
- // Fill the first slot with a dummy so we can identify unspecified tags.
- formats := []number.Pattern{{}}
- patterns := map[string]int{}
- // TODO: It would be possible to eliminate two of these slices by having
- // another indirection and store a reference to the combination of patterns.
- decimal := make([]byte, compact.NumCompactTags)
- scientific := make([]byte, compact.NumCompactTags)
- percent := make([]byte, compact.NumCompactTags)
- for _, lang := range data.Locales() {
- ldml := data.RawLDML(lang)
- if ldml.Numbers == nil {
- continue
- }
- langIndex, ok := compact.FromTag(language.MustParse(lang))
- if !ok {
- log.Fatalf("No compact index for language %s", lang)
- }
- type patternSlice []*struct {
- cldr.Common
- Numbers string `xml:"numbers,attr"`
- Count string `xml:"count,attr"`
- }
- add := func(name string, tags []byte, ps patternSlice) {
- sl := cldr.MakeSlice(&ps)
- sl.SelectDraft(d)
- if len(ps) == 0 {
- return
- }
- if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] {
- log.Fatalf("Inconsistent %d patterns for language %s", name, lang)
- }
- s := ps[0].Data()
- index, ok := patterns[s]
- if !ok {
- nf, err := number.ParsePattern(s)
- if err != nil {
- log.Fatal(err)
- }
- index = len(formats)
- patterns[s] = index
- formats = append(formats, *nf)
- }
- tags[langIndex] = byte(index)
- }
- for _, df := range ldml.Numbers.DecimalFormats {
- for _, l := range df.DecimalFormatLength {
- if l.Type != "" {
- continue
- }
- for _, f := range l.DecimalFormat {
- add("decimal", decimal, f.Pattern)
- }
- }
- }
- for _, df := range ldml.Numbers.ScientificFormats {
- for _, l := range df.ScientificFormatLength {
- if l.Type != "" {
- continue
- }
- for _, f := range l.ScientificFormat {
- add("scientific", scientific, f.Pattern)
- }
- }
- }
- for _, df := range ldml.Numbers.PercentFormats {
- for _, l := range df.PercentFormatLength {
- if l.Type != "" {
- continue
- }
- for _, f := range l.PercentFormat {
- add("percent", percent, f.Pattern)
- }
- }
- }
- }
- // Complete the parent tag array to reflect inheritance. An index of 0
- // indicates an unspecified value.
- for _, data := range [][]byte{decimal, scientific, percent} {
- for i := range data {
- p := compact.ID(i)
- for ; data[p] == 0; p = p.Parent() {
- }
- data[i] = data[p]
- }
- }
- w.WriteVar("tagToDecimal", decimal)
- w.WriteVar("tagToScientific", scientific)
- w.WriteVar("tagToPercent", percent)
- value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1)
- // Break up the lines. This won't give ideal perfect formatting, but it is
- // better than one huge line.
- value = strings.Replace(value, ", ", ",\n", -1)
- fmt.Fprintf(w, "var formats = %s\n", value)
- }
|