123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603 |
- // Copyright 2014 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- //go:build ignore
- // +build ignore
- // Generator for display name tables.
- package main
- import (
- "bytes"
- "flag"
- "fmt"
- "log"
- "reflect"
- "sort"
- "strings"
- "golang.org/x/text/internal/gen"
- "golang.org/x/text/language"
- "golang.org/x/text/unicode/cldr"
- )
- var (
- test = flag.Bool("test", false,
- "test existing tables; can be used to compare web data with package data.")
- outputFile = flag.String("output", "tables.go", "output file")
- stats = flag.Bool("stats", false, "prints statistics to stderr")
- short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
- draft = flag.String("draft",
- "contributed",
- `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
- pkg = flag.String("package",
- "display",
- "the name of the package in which the generated file is to be included")
- tags = newTagSet("tags",
- []language.Tag{},
- "space-separated list of tags to include or empty for all")
- dict = newTagSet("dict",
- dictTags(),
- "space-separated list or tags for which to include a Dictionary. "+
- `"" means the common list from go.text/language.`)
- )
- func dictTags() (tag []language.Tag) {
- // TODO: replace with language.Common.Tags() once supported.
- const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " +
- "es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " +
- "ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " +
- "pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " +
- "zh zh-Hans zh-Hant zu"
- for _, s := range strings.Split(str, " ") {
- tag = append(tag, language.MustParse(s))
- }
- return tag
- }
- func main() {
- gen.Init()
- // Read the CLDR zip file.
- r := gen.OpenCLDRCoreZip()
- defer r.Close()
- d := &cldr.Decoder{}
- d.SetDirFilter("main", "supplemental")
- d.SetSectionFilter("localeDisplayNames")
- data, err := d.DecodeZip(r)
- if err != nil {
- log.Fatalf("DecodeZip: %v", err)
- }
- w := gen.NewCodeWriter()
- defer w.WriteGoFile(*outputFile, "display")
- gen.WriteCLDRVersion(w)
- b := builder{
- w: w,
- data: data,
- group: make(map[string]*group),
- }
- b.generate()
- }
- const tagForm = language.All
- // tagSet is used to parse command line flags of tags. It implements the
- // flag.Value interface.
- type tagSet map[language.Tag]bool
- func newTagSet(name string, tags []language.Tag, usage string) tagSet {
- f := tagSet(make(map[language.Tag]bool))
- for _, t := range tags {
- f[t] = true
- }
- flag.Var(f, name, usage)
- return f
- }
- // String implements the String method of the flag.Value interface.
- func (f tagSet) String() string {
- tags := []string{}
- for t := range f {
- tags = append(tags, t.String())
- }
- sort.Strings(tags)
- return strings.Join(tags, " ")
- }
- // Set implements Set from the flag.Value interface.
- func (f tagSet) Set(s string) error {
- if s != "" {
- for _, s := range strings.Split(s, " ") {
- if s != "" {
- tag, err := tagForm.Parse(s)
- if err != nil {
- return err
- }
- f[tag] = true
- }
- }
- }
- return nil
- }
- func (f tagSet) contains(t language.Tag) bool {
- if len(f) == 0 {
- return true
- }
- return f[t]
- }
- // builder is used to create all tables with display name information.
- type builder struct {
- w *gen.CodeWriter
- data *cldr.CLDR
- fromLocs []string
- // destination tags for the current locale.
- toTags []string
- toTagIndex map[string]int
- // list of supported tags
- supported []language.Tag
- // key-value pairs per group
- group map[string]*group
- // statistics
- sizeIndex int // total size of all indexes of headers
- sizeData int // total size of all data of headers
- totalSize int
- }
- type group struct {
- // Maps from a given language to the Namer data for this language.
- lang map[language.Tag]keyValues
- headers []header
- toTags []string
- threeStart int
- fourPlusStart int
- }
- // set sets the typ to the name for locale loc.
- func (g *group) set(t language.Tag, typ, name string) {
- kv := g.lang[t]
- if kv == nil {
- kv = make(keyValues)
- g.lang[t] = kv
- }
- if kv[typ] == "" {
- kv[typ] = name
- }
- }
- type keyValues map[string]string
- type header struct {
- tag language.Tag
- data string
- index []uint16
- }
- var versionInfo = `// Version is deprecated. Use CLDRVersion.
- const Version = %#v
- `
- var self = language.MustParse("mul")
- // generate builds and writes all tables.
- func (b *builder) generate() {
- fmt.Fprintf(b.w, versionInfo, cldr.Version)
- b.filter()
- b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
- if ldn.Languages != nil {
- for _, v := range ldn.Languages.Language {
- lang := v.Type
- if lang == "root" {
- // We prefer the data from "und"
- // TODO: allow both the data for root and und somehow.
- continue
- }
- tag := tagForm.MustParse(lang)
- if tags.contains(tag) {
- g.set(loc, tag.String(), v.Data())
- }
- }
- }
- })
- b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
- if ldn.Scripts != nil {
- for _, v := range ldn.Scripts.Script {
- code := language.MustParseScript(v.Type)
- if code.IsPrivateUse() { // Qaaa..Qabx
- // TODO: data currently appears to be very meager.
- // Reconsider if we have data for English.
- if loc == language.English {
- log.Fatal("Consider including data for private use scripts.")
- }
- continue
- }
- g.set(loc, code.String(), v.Data())
- }
- }
- })
- b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
- if ldn.Territories != nil {
- for _, v := range ldn.Territories.Territory {
- g.set(loc, language.MustParseRegion(v.Type).String(), v.Data())
- }
- }
- })
- b.makeSupported()
- b.writeParents()
- b.writeGroup("lang")
- b.writeGroup("script")
- b.writeGroup("region")
- b.w.WriteConst("numSupported", len(b.supported))
- buf := bytes.Buffer{}
- for _, tag := range b.supported {
- fmt.Fprint(&buf, tag.String(), "|")
- }
- b.w.WriteConst("supported", buf.String())
- b.writeDictionaries()
- b.supported = []language.Tag{self}
- // Compute the names of locales in their own language. Some of these names
- // may be specified in their parent locales. We iterate the maximum depth
- // of the parent three times to match successive parents of tags until a
- // possible match is found.
- for i := 0; i < 4; i++ {
- b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) {
- parent := tag
- if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) {
- parent, _ = language.Raw.Compose(b)
- }
- if ldn.Languages != nil {
- for _, v := range ldn.Languages.Language {
- key := tagForm.MustParse(v.Type)
- saved := key
- if key == parent {
- g.set(self, tag.String(), v.Data())
- }
- for k := 0; k < i; k++ {
- key = key.Parent()
- }
- if key == tag {
- g.set(self, saved.String(), v.Data()) // set does not overwrite a value.
- }
- }
- }
- })
- }
- b.writeGroup("self")
- }
- func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) {
- b.sizeIndex = 0
- b.sizeData = 0
- b.toTags = nil
- b.fromLocs = nil
- b.toTagIndex = make(map[string]int)
- g := b.group[name]
- if g == nil {
- g = &group{lang: make(map[language.Tag]keyValues)}
- b.group[name] = g
- }
- for _, loc := range b.data.Locales() {
- // We use RawLDML instead of LDML as we are managing our own inheritance
- // in this implementation.
- ldml := b.data.RawLDML(loc)
- // We do not support the POSIX variant (it is not a supported BCP 47
- // variant). This locale also doesn't happen to contain any data, so
- // we'll skip it by checking for this.
- tag, err := tagForm.Parse(loc)
- if err != nil {
- if ldml.LocaleDisplayNames != nil {
- log.Fatalf("setData: %v", err)
- }
- continue
- }
- if ldml.LocaleDisplayNames != nil && tags.contains(tag) {
- f(g, tag, ldml.LocaleDisplayNames)
- }
- }
- }
- func (b *builder) filter() {
- filter := func(s *cldr.Slice) {
- if *short {
- s.SelectOnePerGroup("alt", []string{"short", ""})
- } else {
- s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
- }
- d, err := cldr.ParseDraft(*draft)
- if err != nil {
- log.Fatalf("filter: %v", err)
- }
- s.SelectDraft(d)
- }
- for _, loc := range b.data.Locales() {
- if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
- if ldn.Languages != nil {
- s := cldr.MakeSlice(&ldn.Languages.Language)
- if filter(&s); len(ldn.Languages.Language) == 0 {
- ldn.Languages = nil
- }
- }
- if ldn.Scripts != nil {
- s := cldr.MakeSlice(&ldn.Scripts.Script)
- if filter(&s); len(ldn.Scripts.Script) == 0 {
- ldn.Scripts = nil
- }
- }
- if ldn.Territories != nil {
- s := cldr.MakeSlice(&ldn.Territories.Territory)
- if filter(&s); len(ldn.Territories.Territory) == 0 {
- ldn.Territories = nil
- }
- }
- }
- }
- }
- // makeSupported creates a list of all supported locales.
- func (b *builder) makeSupported() {
- // tags across groups
- for _, g := range b.group {
- for t, _ := range g.lang {
- b.supported = append(b.supported, t)
- }
- }
- b.supported = b.supported[:unique(tagsSorter(b.supported))]
- }
- type tagsSorter []language.Tag
- func (a tagsSorter) Len() int { return len(a) }
- func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
- func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() }
- func (b *builder) writeGroup(name string) {
- g := b.group[name]
- for _, kv := range g.lang {
- for t, _ := range kv {
- g.toTags = append(g.toTags, t)
- }
- }
- g.toTags = g.toTags[:unique(tagsBySize(g.toTags))]
- // Allocate header per supported value.
- g.headers = make([]header, len(b.supported))
- for i, sup := range b.supported {
- kv, ok := g.lang[sup]
- if !ok {
- g.headers[i].tag = sup
- continue
- }
- data := []byte{}
- index := make([]uint16, len(g.toTags), len(g.toTags)+1)
- for j, t := range g.toTags {
- index[j] = uint16(len(data))
- data = append(data, kv[t]...)
- }
- index = append(index, uint16(len(data)))
- // Trim the tail of the index.
- // TODO: indexes can be reduced in size quite a bit more.
- n := len(index)
- for ; n >= 2 && index[n-2] == index[n-1]; n-- {
- }
- index = index[:n]
- // Workaround for a bug in CLDR 26.
- // See https://unicode.org/cldr/trac/ticket/8042.
- if cldr.Version == "26" && sup.String() == "hsb" {
- data = bytes.Replace(data, []byte{'"'}, nil, 1)
- }
- g.headers[i] = header{sup, string(data), index}
- }
- g.writeTable(b.w, name)
- }
- type tagsBySize []string
- func (l tagsBySize) Len() int { return len(l) }
- func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
- func (l tagsBySize) Less(i, j int) bool {
- a, b := l[i], l[j]
- // Sort single-tag entries based on size first. Otherwise alphabetic.
- if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) {
- return len(a) < len(b)
- }
- return a < b
- }
- // parentIndices returns slice a of len(tags) where tags[a[i]] is the parent
- // of tags[i].
- func parentIndices(tags []language.Tag) []int16 {
- index := make(map[language.Tag]int16)
- for i, t := range tags {
- index[t] = int16(i)
- }
- // Construct default parents.
- parents := make([]int16, len(tags))
- for i, t := range tags {
- parents[i] = -1
- for t = t.Parent(); t != language.Und; t = t.Parent() {
- if j, ok := index[t]; ok {
- parents[i] = j
- break
- }
- }
- }
- return parents
- }
- func (b *builder) writeParents() {
- parents := parentIndices(b.supported)
- fmt.Fprintf(b.w, "var parents = ")
- b.w.WriteArray(parents)
- }
- // writeKeys writes keys to a special index used by the display package.
- // tags are assumed to be sorted by length.
- func writeKeys(w *gen.CodeWriter, name string, keys []string) {
- w.Size += int(3 * reflect.TypeOf("").Size())
- w.WriteComment("Number of keys: %d", len(keys))
- fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name)
- for i := 2; i <= 4; i++ {
- sub := []string{}
- for _, t := range keys {
- if len(t) != i {
- break
- }
- sub = append(sub, t)
- }
- s := strings.Join(sub, "")
- w.WriteString(s)
- fmt.Fprintf(w, ",\n")
- keys = keys[len(sub):]
- }
- fmt.Fprintln(w, "\t}")
- if len(keys) > 0 {
- w.Size += int(reflect.TypeOf([]string{}).Size())
- fmt.Fprintf(w, "\t%sTagsLong = ", name)
- w.WriteSlice(keys)
- }
- fmt.Fprintln(w, ")\n")
- }
- // identifier creates an identifier from the given tag.
- func identifier(t language.Tag) string {
- return strings.Replace(t.String(), "-", "", -1)
- }
- func (h *header) writeEntry(w *gen.CodeWriter, name string) {
- if len(dict) > 0 && dict.contains(h.tag) {
- fmt.Fprintf(w, "\t{ // %s\n", h.tag)
- fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name)
- fmt.Fprintln(w, "\t},")
- } else if len(h.data) == 0 {
- fmt.Fprintln(w, "\t\t{}, //", h.tag)
- } else {
- fmt.Fprintf(w, "\t{ // %s\n", h.tag)
- w.WriteString(h.data)
- fmt.Fprintln(w, ",")
- w.WriteSlice(h.index)
- fmt.Fprintln(w, ",\n\t},")
- }
- }
- // write the data for the given header as single entries. The size for this data
- // was already accounted for in writeEntry.
- func (h *header) writeSingle(w *gen.CodeWriter, name string) {
- if len(dict) > 0 && dict.contains(h.tag) {
- tag := identifier(h.tag)
- w.WriteConst(tag+name+"Str", h.data)
- // Note that we create a slice instead of an array. If we use an array
- // we need to refer to it as a[:] in other tables, which will cause the
- // array to always be included by the linker. See Issue 7651.
- w.WriteVar(tag+name+"Idx", h.index)
- }
- }
- // WriteTable writes an entry for a single Namer.
- func (g *group) writeTable(w *gen.CodeWriter, name string) {
- start := w.Size
- writeKeys(w, name, g.toTags)
- w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size())
- fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers))
- title := strings.Title(name)
- for _, h := range g.headers {
- h.writeEntry(w, title)
- }
- fmt.Fprintln(w, "}\n")
- for _, h := range g.headers {
- h.writeSingle(w, title)
- }
- n := w.Size - start
- fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000)
- }
- func (b *builder) writeDictionaries() {
- fmt.Fprintln(b.w, "// Dictionary entries of frequent languages")
- fmt.Fprintln(b.w, "var (")
- parents := parentIndices(b.supported)
- for i, t := range b.supported {
- if dict.contains(t) {
- ident := identifier(t)
- fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t)
- if p := parents[i]; p == -1 {
- fmt.Fprintln(b.w, "\t\tnil,")
- } else {
- fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p]))
- }
- fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident)
- fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident)
- fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident)
- fmt.Fprintln(b.w, "\t}")
- }
- }
- fmt.Fprintln(b.w, ")")
- var s string
- var a []uint16
- sz := reflect.TypeOf(s).Size()
- sz += reflect.TypeOf(a).Size()
- sz *= 3
- sz += reflect.TypeOf(&a).Size()
- n := int(sz) * len(dict)
- fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000)
- b.w.Size += n
- }
- // unique sorts the given lists and removes duplicate entries by swapping them
- // past position k, where k is the number of unique values. It returns k.
- func unique(a sort.Interface) int {
- if a.Len() == 0 {
- return 0
- }
- sort.Sort(a)
- k := 1
- for i := 1; i < a.Len(); i++ {
- if a.Less(k-1, i) {
- if k != i {
- a.Swap(k, i)
- }
- k++
- }
- }
- return k
- }
|