maketables.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. // Copyright 2014 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:build ignore
  5. // +build ignore
  6. // Generator for display name tables.
  7. package main
  8. import (
  9. "bytes"
  10. "flag"
  11. "fmt"
  12. "log"
  13. "reflect"
  14. "sort"
  15. "strings"
  16. "golang.org/x/text/internal/gen"
  17. "golang.org/x/text/language"
  18. "golang.org/x/text/unicode/cldr"
  19. )
  20. var (
  21. test = flag.Bool("test", false,
  22. "test existing tables; can be used to compare web data with package data.")
  23. outputFile = flag.String("output", "tables.go", "output file")
  24. stats = flag.Bool("stats", false, "prints statistics to stderr")
  25. short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
  26. draft = flag.String("draft",
  27. "contributed",
  28. `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
  29. pkg = flag.String("package",
  30. "display",
  31. "the name of the package in which the generated file is to be included")
  32. tags = newTagSet("tags",
  33. []language.Tag{},
  34. "space-separated list of tags to include or empty for all")
  35. dict = newTagSet("dict",
  36. dictTags(),
  37. "space-separated list or tags for which to include a Dictionary. "+
  38. `"" means the common list from go.text/language.`)
  39. )
  40. func dictTags() (tag []language.Tag) {
  41. // TODO: replace with language.Common.Tags() once supported.
  42. const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " +
  43. "es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " +
  44. "ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " +
  45. "pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " +
  46. "zh zh-Hans zh-Hant zu"
  47. for _, s := range strings.Split(str, " ") {
  48. tag = append(tag, language.MustParse(s))
  49. }
  50. return tag
  51. }
  52. func main() {
  53. gen.Init()
  54. // Read the CLDR zip file.
  55. r := gen.OpenCLDRCoreZip()
  56. defer r.Close()
  57. d := &cldr.Decoder{}
  58. d.SetDirFilter("main", "supplemental")
  59. d.SetSectionFilter("localeDisplayNames")
  60. data, err := d.DecodeZip(r)
  61. if err != nil {
  62. log.Fatalf("DecodeZip: %v", err)
  63. }
  64. w := gen.NewCodeWriter()
  65. defer w.WriteGoFile(*outputFile, "display")
  66. gen.WriteCLDRVersion(w)
  67. b := builder{
  68. w: w,
  69. data: data,
  70. group: make(map[string]*group),
  71. }
  72. b.generate()
  73. }
  74. const tagForm = language.All
  75. // tagSet is used to parse command line flags of tags. It implements the
  76. // flag.Value interface.
  77. type tagSet map[language.Tag]bool
  78. func newTagSet(name string, tags []language.Tag, usage string) tagSet {
  79. f := tagSet(make(map[language.Tag]bool))
  80. for _, t := range tags {
  81. f[t] = true
  82. }
  83. flag.Var(f, name, usage)
  84. return f
  85. }
  86. // String implements the String method of the flag.Value interface.
  87. func (f tagSet) String() string {
  88. tags := []string{}
  89. for t := range f {
  90. tags = append(tags, t.String())
  91. }
  92. sort.Strings(tags)
  93. return strings.Join(tags, " ")
  94. }
  95. // Set implements Set from the flag.Value interface.
  96. func (f tagSet) Set(s string) error {
  97. if s != "" {
  98. for _, s := range strings.Split(s, " ") {
  99. if s != "" {
  100. tag, err := tagForm.Parse(s)
  101. if err != nil {
  102. return err
  103. }
  104. f[tag] = true
  105. }
  106. }
  107. }
  108. return nil
  109. }
  110. func (f tagSet) contains(t language.Tag) bool {
  111. if len(f) == 0 {
  112. return true
  113. }
  114. return f[t]
  115. }
  116. // builder is used to create all tables with display name information.
  117. type builder struct {
  118. w *gen.CodeWriter
  119. data *cldr.CLDR
  120. fromLocs []string
  121. // destination tags for the current locale.
  122. toTags []string
  123. toTagIndex map[string]int
  124. // list of supported tags
  125. supported []language.Tag
  126. // key-value pairs per group
  127. group map[string]*group
  128. // statistics
  129. sizeIndex int // total size of all indexes of headers
  130. sizeData int // total size of all data of headers
  131. totalSize int
  132. }
  133. type group struct {
  134. // Maps from a given language to the Namer data for this language.
  135. lang map[language.Tag]keyValues
  136. headers []header
  137. toTags []string
  138. threeStart int
  139. fourPlusStart int
  140. }
  141. // set sets the typ to the name for locale loc.
  142. func (g *group) set(t language.Tag, typ, name string) {
  143. kv := g.lang[t]
  144. if kv == nil {
  145. kv = make(keyValues)
  146. g.lang[t] = kv
  147. }
  148. if kv[typ] == "" {
  149. kv[typ] = name
  150. }
  151. }
  152. type keyValues map[string]string
  153. type header struct {
  154. tag language.Tag
  155. data string
  156. index []uint16
  157. }
  158. var versionInfo = `// Version is deprecated. Use CLDRVersion.
  159. const Version = %#v
  160. `
  161. var self = language.MustParse("mul")
  162. // generate builds and writes all tables.
  163. func (b *builder) generate() {
  164. fmt.Fprintf(b.w, versionInfo, cldr.Version)
  165. b.filter()
  166. b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
  167. if ldn.Languages != nil {
  168. for _, v := range ldn.Languages.Language {
  169. lang := v.Type
  170. if lang == "root" {
  171. // We prefer the data from "und"
  172. // TODO: allow both the data for root and und somehow.
  173. continue
  174. }
  175. tag := tagForm.MustParse(lang)
  176. if tags.contains(tag) {
  177. g.set(loc, tag.String(), v.Data())
  178. }
  179. }
  180. }
  181. })
  182. b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
  183. if ldn.Scripts != nil {
  184. for _, v := range ldn.Scripts.Script {
  185. code := language.MustParseScript(v.Type)
  186. if code.IsPrivateUse() { // Qaaa..Qabx
  187. // TODO: data currently appears to be very meager.
  188. // Reconsider if we have data for English.
  189. if loc == language.English {
  190. log.Fatal("Consider including data for private use scripts.")
  191. }
  192. continue
  193. }
  194. g.set(loc, code.String(), v.Data())
  195. }
  196. }
  197. })
  198. b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
  199. if ldn.Territories != nil {
  200. for _, v := range ldn.Territories.Territory {
  201. g.set(loc, language.MustParseRegion(v.Type).String(), v.Data())
  202. }
  203. }
  204. })
  205. b.makeSupported()
  206. b.writeParents()
  207. b.writeGroup("lang")
  208. b.writeGroup("script")
  209. b.writeGroup("region")
  210. b.w.WriteConst("numSupported", len(b.supported))
  211. buf := bytes.Buffer{}
  212. for _, tag := range b.supported {
  213. fmt.Fprint(&buf, tag.String(), "|")
  214. }
  215. b.w.WriteConst("supported", buf.String())
  216. b.writeDictionaries()
  217. b.supported = []language.Tag{self}
  218. // Compute the names of locales in their own language. Some of these names
  219. // may be specified in their parent locales. We iterate the maximum depth
  220. // of the parent three times to match successive parents of tags until a
  221. // possible match is found.
  222. for i := 0; i < 4; i++ {
  223. b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) {
  224. parent := tag
  225. if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) {
  226. parent, _ = language.Raw.Compose(b)
  227. }
  228. if ldn.Languages != nil {
  229. for _, v := range ldn.Languages.Language {
  230. key := tagForm.MustParse(v.Type)
  231. saved := key
  232. if key == parent {
  233. g.set(self, tag.String(), v.Data())
  234. }
  235. for k := 0; k < i; k++ {
  236. key = key.Parent()
  237. }
  238. if key == tag {
  239. g.set(self, saved.String(), v.Data()) // set does not overwrite a value.
  240. }
  241. }
  242. }
  243. })
  244. }
  245. b.writeGroup("self")
  246. }
  247. func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) {
  248. b.sizeIndex = 0
  249. b.sizeData = 0
  250. b.toTags = nil
  251. b.fromLocs = nil
  252. b.toTagIndex = make(map[string]int)
  253. g := b.group[name]
  254. if g == nil {
  255. g = &group{lang: make(map[language.Tag]keyValues)}
  256. b.group[name] = g
  257. }
  258. for _, loc := range b.data.Locales() {
  259. // We use RawLDML instead of LDML as we are managing our own inheritance
  260. // in this implementation.
  261. ldml := b.data.RawLDML(loc)
  262. // We do not support the POSIX variant (it is not a supported BCP 47
  263. // variant). This locale also doesn't happen to contain any data, so
  264. // we'll skip it by checking for this.
  265. tag, err := tagForm.Parse(loc)
  266. if err != nil {
  267. if ldml.LocaleDisplayNames != nil {
  268. log.Fatalf("setData: %v", err)
  269. }
  270. continue
  271. }
  272. if ldml.LocaleDisplayNames != nil && tags.contains(tag) {
  273. f(g, tag, ldml.LocaleDisplayNames)
  274. }
  275. }
  276. }
  277. func (b *builder) filter() {
  278. filter := func(s *cldr.Slice) {
  279. if *short {
  280. s.SelectOnePerGroup("alt", []string{"short", ""})
  281. } else {
  282. s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
  283. }
  284. d, err := cldr.ParseDraft(*draft)
  285. if err != nil {
  286. log.Fatalf("filter: %v", err)
  287. }
  288. s.SelectDraft(d)
  289. }
  290. for _, loc := range b.data.Locales() {
  291. if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
  292. if ldn.Languages != nil {
  293. s := cldr.MakeSlice(&ldn.Languages.Language)
  294. if filter(&s); len(ldn.Languages.Language) == 0 {
  295. ldn.Languages = nil
  296. }
  297. }
  298. if ldn.Scripts != nil {
  299. s := cldr.MakeSlice(&ldn.Scripts.Script)
  300. if filter(&s); len(ldn.Scripts.Script) == 0 {
  301. ldn.Scripts = nil
  302. }
  303. }
  304. if ldn.Territories != nil {
  305. s := cldr.MakeSlice(&ldn.Territories.Territory)
  306. if filter(&s); len(ldn.Territories.Territory) == 0 {
  307. ldn.Territories = nil
  308. }
  309. }
  310. }
  311. }
  312. }
  313. // makeSupported creates a list of all supported locales.
  314. func (b *builder) makeSupported() {
  315. // tags across groups
  316. for _, g := range b.group {
  317. for t, _ := range g.lang {
  318. b.supported = append(b.supported, t)
  319. }
  320. }
  321. b.supported = b.supported[:unique(tagsSorter(b.supported))]
  322. }
  323. type tagsSorter []language.Tag
  324. func (a tagsSorter) Len() int { return len(a) }
  325. func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
  326. func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() }
  327. func (b *builder) writeGroup(name string) {
  328. g := b.group[name]
  329. for _, kv := range g.lang {
  330. for t, _ := range kv {
  331. g.toTags = append(g.toTags, t)
  332. }
  333. }
  334. g.toTags = g.toTags[:unique(tagsBySize(g.toTags))]
  335. // Allocate header per supported value.
  336. g.headers = make([]header, len(b.supported))
  337. for i, sup := range b.supported {
  338. kv, ok := g.lang[sup]
  339. if !ok {
  340. g.headers[i].tag = sup
  341. continue
  342. }
  343. data := []byte{}
  344. index := make([]uint16, len(g.toTags), len(g.toTags)+1)
  345. for j, t := range g.toTags {
  346. index[j] = uint16(len(data))
  347. data = append(data, kv[t]...)
  348. }
  349. index = append(index, uint16(len(data)))
  350. // Trim the tail of the index.
  351. // TODO: indexes can be reduced in size quite a bit more.
  352. n := len(index)
  353. for ; n >= 2 && index[n-2] == index[n-1]; n-- {
  354. }
  355. index = index[:n]
  356. // Workaround for a bug in CLDR 26.
  357. // See https://unicode.org/cldr/trac/ticket/8042.
  358. if cldr.Version == "26" && sup.String() == "hsb" {
  359. data = bytes.Replace(data, []byte{'"'}, nil, 1)
  360. }
  361. g.headers[i] = header{sup, string(data), index}
  362. }
  363. g.writeTable(b.w, name)
  364. }
  365. type tagsBySize []string
  366. func (l tagsBySize) Len() int { return len(l) }
  367. func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
  368. func (l tagsBySize) Less(i, j int) bool {
  369. a, b := l[i], l[j]
  370. // Sort single-tag entries based on size first. Otherwise alphabetic.
  371. if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) {
  372. return len(a) < len(b)
  373. }
  374. return a < b
  375. }
  376. // parentIndices returns slice a of len(tags) where tags[a[i]] is the parent
  377. // of tags[i].
  378. func parentIndices(tags []language.Tag) []int16 {
  379. index := make(map[language.Tag]int16)
  380. for i, t := range tags {
  381. index[t] = int16(i)
  382. }
  383. // Construct default parents.
  384. parents := make([]int16, len(tags))
  385. for i, t := range tags {
  386. parents[i] = -1
  387. for t = t.Parent(); t != language.Und; t = t.Parent() {
  388. if j, ok := index[t]; ok {
  389. parents[i] = j
  390. break
  391. }
  392. }
  393. }
  394. return parents
  395. }
  396. func (b *builder) writeParents() {
  397. parents := parentIndices(b.supported)
  398. fmt.Fprintf(b.w, "var parents = ")
  399. b.w.WriteArray(parents)
  400. }
  401. // writeKeys writes keys to a special index used by the display package.
  402. // tags are assumed to be sorted by length.
  403. func writeKeys(w *gen.CodeWriter, name string, keys []string) {
  404. w.Size += int(3 * reflect.TypeOf("").Size())
  405. w.WriteComment("Number of keys: %d", len(keys))
  406. fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name)
  407. for i := 2; i <= 4; i++ {
  408. sub := []string{}
  409. for _, t := range keys {
  410. if len(t) != i {
  411. break
  412. }
  413. sub = append(sub, t)
  414. }
  415. s := strings.Join(sub, "")
  416. w.WriteString(s)
  417. fmt.Fprintf(w, ",\n")
  418. keys = keys[len(sub):]
  419. }
  420. fmt.Fprintln(w, "\t}")
  421. if len(keys) > 0 {
  422. w.Size += int(reflect.TypeOf([]string{}).Size())
  423. fmt.Fprintf(w, "\t%sTagsLong = ", name)
  424. w.WriteSlice(keys)
  425. }
  426. fmt.Fprintln(w, ")\n")
  427. }
  428. // identifier creates an identifier from the given tag.
  429. func identifier(t language.Tag) string {
  430. return strings.Replace(t.String(), "-", "", -1)
  431. }
  432. func (h *header) writeEntry(w *gen.CodeWriter, name string) {
  433. if len(dict) > 0 && dict.contains(h.tag) {
  434. fmt.Fprintf(w, "\t{ // %s\n", h.tag)
  435. fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name)
  436. fmt.Fprintln(w, "\t},")
  437. } else if len(h.data) == 0 {
  438. fmt.Fprintln(w, "\t\t{}, //", h.tag)
  439. } else {
  440. fmt.Fprintf(w, "\t{ // %s\n", h.tag)
  441. w.WriteString(h.data)
  442. fmt.Fprintln(w, ",")
  443. w.WriteSlice(h.index)
  444. fmt.Fprintln(w, ",\n\t},")
  445. }
  446. }
  447. // write the data for the given header as single entries. The size for this data
  448. // was already accounted for in writeEntry.
  449. func (h *header) writeSingle(w *gen.CodeWriter, name string) {
  450. if len(dict) > 0 && dict.contains(h.tag) {
  451. tag := identifier(h.tag)
  452. w.WriteConst(tag+name+"Str", h.data)
  453. // Note that we create a slice instead of an array. If we use an array
  454. // we need to refer to it as a[:] in other tables, which will cause the
  455. // array to always be included by the linker. See Issue 7651.
  456. w.WriteVar(tag+name+"Idx", h.index)
  457. }
  458. }
  459. // WriteTable writes an entry for a single Namer.
  460. func (g *group) writeTable(w *gen.CodeWriter, name string) {
  461. start := w.Size
  462. writeKeys(w, name, g.toTags)
  463. w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size())
  464. fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers))
  465. title := strings.Title(name)
  466. for _, h := range g.headers {
  467. h.writeEntry(w, title)
  468. }
  469. fmt.Fprintln(w, "}\n")
  470. for _, h := range g.headers {
  471. h.writeSingle(w, title)
  472. }
  473. n := w.Size - start
  474. fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000)
  475. }
  476. func (b *builder) writeDictionaries() {
  477. fmt.Fprintln(b.w, "// Dictionary entries of frequent languages")
  478. fmt.Fprintln(b.w, "var (")
  479. parents := parentIndices(b.supported)
  480. for i, t := range b.supported {
  481. if dict.contains(t) {
  482. ident := identifier(t)
  483. fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t)
  484. if p := parents[i]; p == -1 {
  485. fmt.Fprintln(b.w, "\t\tnil,")
  486. } else {
  487. fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p]))
  488. }
  489. fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident)
  490. fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident)
  491. fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident)
  492. fmt.Fprintln(b.w, "\t}")
  493. }
  494. }
  495. fmt.Fprintln(b.w, ")")
  496. var s string
  497. var a []uint16
  498. sz := reflect.TypeOf(s).Size()
  499. sz += reflect.TypeOf(a).Size()
  500. sz *= 3
  501. sz += reflect.TypeOf(&a).Size()
  502. n := int(sz) * len(dict)
  503. fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000)
  504. b.w.Size += n
  505. }
  506. // unique sorts the given lists and removes duplicate entries by swapping them
  507. // past position k, where k is the number of unique values. It returns k.
  508. func unique(a sort.Interface) int {
  509. if a.Len() == 0 {
  510. return 0
  511. }
  512. sort.Sort(a)
  513. k := 1
  514. for i := 1; i < a.Len(); i++ {
  515. if a.Less(k-1, i) {
  516. if k != i {
  517. a.Swap(k, i)
  518. }
  519. k++
  520. }
  521. }
  522. return k
  523. }