gen.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:build ignore
  5. // +build ignore
  6. package main
  7. import (
  8. "flag"
  9. "fmt"
  10. "log"
  11. "reflect"
  12. "strings"
  13. "unicode/utf8"
  14. "golang.org/x/text/internal/gen"
  15. "golang.org/x/text/internal/language"
  16. "golang.org/x/text/internal/language/compact"
  17. "golang.org/x/text/internal/number"
  18. "golang.org/x/text/internal/stringset"
  19. "golang.org/x/text/unicode/cldr"
  20. )
  21. var (
  22. test = flag.Bool("test", false,
  23. "test existing tables; can be used to compare web data with package data.")
  24. outputFile = flag.String("output", "tables.go", "output file")
  25. outputTestFile = flag.String("testoutput", "data_test.go", "output file")
  26. draft = flag.String("draft",
  27. "contributed",
  28. `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
  29. )
  30. func main() {
  31. gen.Init()
  32. const pkg = "number"
  33. gen.Repackage("gen_common.go", "common.go", pkg)
  34. // Read the CLDR zip file.
  35. r := gen.OpenCLDRCoreZip()
  36. defer r.Close()
  37. d := &cldr.Decoder{}
  38. d.SetDirFilter("supplemental", "main")
  39. d.SetSectionFilter("numbers", "numberingSystem")
  40. data, err := d.DecodeZip(r)
  41. if err != nil {
  42. log.Fatalf("DecodeZip: %v", err)
  43. }
  44. w := gen.NewCodeWriter()
  45. defer w.WriteGoFile(*outputFile, pkg)
  46. fmt.Fprintln(w, `import "golang.org/x/text/internal/stringset"`)
  47. gen.WriteCLDRVersion(w)
  48. genNumSystem(w, data)
  49. genSymbols(w, data)
  50. genFormats(w, data)
  51. }
  52. var systemMap = map[string]system{"latn": 0}
  53. func getNumberSystem(str string) system {
  54. ns, ok := systemMap[str]
  55. if !ok {
  56. log.Fatalf("No index for numbering system %q", str)
  57. }
  58. return ns
  59. }
  60. func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) {
  61. numSysData := []systemData{
  62. {digitSize: 1, zero: [4]byte{'0'}},
  63. }
  64. for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
  65. if len(ns.Digits) == 0 {
  66. continue
  67. }
  68. switch ns.Id {
  69. case "latn":
  70. // hard-wired
  71. continue
  72. case "hanidec":
  73. // non-consecutive digits: treat as "algorithmic"
  74. continue
  75. }
  76. zero, sz := utf8.DecodeRuneInString(ns.Digits)
  77. if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte
  78. log.Fatalf("Last byte of zero value overflows for %s", ns.Id)
  79. }
  80. i := rune(0)
  81. for _, r := range ns.Digits {
  82. // Verify that we can do simple math on the UTF-8 byte sequence
  83. // of zero to get the digit.
  84. if zero+i != r {
  85. // Runes not consecutive.
  86. log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r)
  87. }
  88. i++
  89. }
  90. var x [utf8.UTFMax]byte
  91. utf8.EncodeRune(x[:], zero)
  92. id := system(len(numSysData))
  93. systemMap[ns.Id] = id
  94. numSysData = append(numSysData, systemData{
  95. id: id,
  96. digitSize: byte(sz),
  97. zero: x,
  98. })
  99. }
  100. w.WriteVar("numSysData", numSysData)
  101. algoID := system(len(numSysData))
  102. fmt.Fprintln(w, "const (")
  103. for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
  104. id, ok := systemMap[ns.Id]
  105. if !ok {
  106. id = algoID
  107. systemMap[ns.Id] = id
  108. algoID++
  109. }
  110. fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id)
  111. }
  112. fmt.Fprintln(w, "numNumberSystems")
  113. fmt.Fprintln(w, ")")
  114. fmt.Fprintln(w, "var systemMap = map[string]system{")
  115. for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
  116. fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id))
  117. w.Size += len(ns.Id) + 16 + 1 // very coarse approximation
  118. }
  119. fmt.Fprintln(w, "}")
  120. }
  121. func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
  122. d, err := cldr.ParseDraft(*draft)
  123. if err != nil {
  124. log.Fatalf("invalid draft level: %v", err)
  125. }
  126. nNumberSystems := system(len(systemMap))
  127. type symbols [NumSymbolTypes]string
  128. type key struct {
  129. tag compact.ID
  130. system system
  131. }
  132. symbolMap := map[key]*symbols{}
  133. defaults := map[compact.ID]system{}
  134. for _, lang := range data.Locales() {
  135. ldml := data.RawLDML(lang)
  136. if ldml.Numbers == nil {
  137. continue
  138. }
  139. langIndex, ok := compact.FromTag(language.MustParse(lang))
  140. if !ok {
  141. log.Fatalf("No compact index for language %s", lang)
  142. }
  143. if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 {
  144. defaults[langIndex] = getNumberSystem(d[0].Data())
  145. }
  146. syms := cldr.MakeSlice(&ldml.Numbers.Symbols)
  147. syms.SelectDraft(d)
  148. getFirst := func(name string, x interface{}) string {
  149. v := reflect.ValueOf(x)
  150. slice := cldr.MakeSlice(x)
  151. slice.SelectAnyOf("alt", "", "alt")
  152. if reflect.Indirect(v).Len() == 0 {
  153. return ""
  154. } else if reflect.Indirect(v).Len() > 1 {
  155. log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name)
  156. }
  157. return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String()
  158. }
  159. for _, sym := range ldml.Numbers.Symbols {
  160. if sym.NumberSystem == "" {
  161. // This is just linking the default of root to "latn".
  162. continue
  163. }
  164. symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
  165. SymDecimal: getFirst("decimal", &sym.Decimal),
  166. SymGroup: getFirst("group", &sym.Group),
  167. SymList: getFirst("list", &sym.List),
  168. SymPercentSign: getFirst("percentSign", &sym.PercentSign),
  169. SymPlusSign: getFirst("plusSign", &sym.PlusSign),
  170. SymMinusSign: getFirst("minusSign", &sym.MinusSign),
  171. SymExponential: getFirst("exponential", &sym.Exponential),
  172. SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent),
  173. SymPerMille: getFirst("perMille", &sym.PerMille),
  174. SymInfinity: getFirst("infinity", &sym.Infinity),
  175. SymNan: getFirst("nan", &sym.Nan),
  176. SymTimeSeparator: getFirst("timeSeparator", &sym.TimeSeparator),
  177. }
  178. }
  179. }
  180. // Expand all values.
  181. for k, syms := range symbolMap {
  182. for t := SymDecimal; t < NumSymbolTypes; t++ {
  183. p := k.tag
  184. for syms[t] == "" {
  185. p = p.Parent()
  186. if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" {
  187. syms[t] = (*pSyms)[t]
  188. break
  189. }
  190. if p == 0 /* und */ {
  191. // Default to root, latn.
  192. syms[t] = (*symbolMap[key{}])[t]
  193. }
  194. }
  195. }
  196. }
  197. // Unique the symbol sets and write the string data.
  198. m := map[symbols]int{}
  199. sb := stringset.NewBuilder()
  200. symIndex := [][NumSymbolTypes]byte{}
  201. for ns := system(0); ns < nNumberSystems; ns++ {
  202. for _, l := range data.Locales() {
  203. langIndex, _ := compact.FromTag(language.MustParse(l))
  204. s := symbolMap[key{langIndex, ns}]
  205. if s == nil {
  206. continue
  207. }
  208. if _, ok := m[*s]; !ok {
  209. m[*s] = len(symIndex)
  210. sb.Add(s[:]...)
  211. var x [NumSymbolTypes]byte
  212. for i := SymDecimal; i < NumSymbolTypes; i++ {
  213. x[i] = byte(sb.Index((*s)[i]))
  214. }
  215. symIndex = append(symIndex, x)
  216. }
  217. }
  218. }
  219. w.WriteVar("symIndex", symIndex)
  220. w.WriteVar("symData", sb.Set())
  221. // resolveSymbolIndex gets the index from the closest matching locale,
  222. // including the locale itself.
  223. resolveSymbolIndex := func(langIndex compact.ID, ns system) symOffset {
  224. for {
  225. if sym := symbolMap[key{langIndex, ns}]; sym != nil {
  226. return symOffset(m[*sym])
  227. }
  228. if langIndex == 0 {
  229. return 0 // und, latn
  230. }
  231. langIndex = langIndex.Parent()
  232. }
  233. }
  234. // Create an index with the symbols for each locale for the latn numbering
  235. // system. If this is not the default, or the only one, for a locale, we
  236. // will overwrite the value later.
  237. var langToDefaults [compact.NumCompactTags]symOffset
  238. for _, l := range data.Locales() {
  239. langIndex, _ := compact.FromTag(language.MustParse(l))
  240. langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0)
  241. }
  242. // Delete redundant entries.
  243. for _, l := range data.Locales() {
  244. langIndex, _ := compact.FromTag(language.MustParse(l))
  245. def := defaults[langIndex]
  246. syms := symbolMap[key{langIndex, def}]
  247. if syms == nil {
  248. continue
  249. }
  250. for ns := system(0); ns < nNumberSystems; ns++ {
  251. if ns == def {
  252. continue
  253. }
  254. if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms {
  255. delete(symbolMap, key{langIndex, ns})
  256. }
  257. }
  258. }
  259. // Create a sorted list of alternatives per language. This will only need to
  260. // be referenced if a user specified an alternative numbering system.
  261. var langToAlt []altSymData
  262. for _, l := range data.Locales() {
  263. langIndex, _ := compact.FromTag(language.MustParse(l))
  264. start := len(langToAlt)
  265. if start >= hasNonLatnMask {
  266. log.Fatalf("Number of alternative assignments >= %x", hasNonLatnMask)
  267. }
  268. // Create the entry for the default value.
  269. def := defaults[langIndex]
  270. langToAlt = append(langToAlt, altSymData{
  271. compactTag: langIndex,
  272. system: def,
  273. symIndex: resolveSymbolIndex(langIndex, def),
  274. })
  275. for ns := system(0); ns < nNumberSystems; ns++ {
  276. if def == ns {
  277. continue
  278. }
  279. if sym := symbolMap[key{langIndex, ns}]; sym != nil {
  280. langToAlt = append(langToAlt, altSymData{
  281. compactTag: langIndex,
  282. system: ns,
  283. symIndex: resolveSymbolIndex(langIndex, ns),
  284. })
  285. }
  286. }
  287. if def == 0 && len(langToAlt) == start+1 {
  288. // No additional data: erase the entry.
  289. langToAlt = langToAlt[:start]
  290. } else {
  291. // Overwrite the entry in langToDefaults.
  292. langToDefaults[langIndex] = hasNonLatnMask | symOffset(start)
  293. }
  294. }
  295. w.WriteComment(`
  296. langToDefaults maps a compact language index to the default numbering system
  297. and default symbol set`)
  298. w.WriteVar("langToDefaults", langToDefaults)
  299. w.WriteComment(`
  300. langToAlt is a list of numbering system and symbol set pairs, sorted and
  301. marked by compact language index.`)
  302. w.WriteVar("langToAlt", langToAlt)
  303. }
  304. // genFormats generates the lookup table for decimal, scientific and percent
  305. // patterns.
  306. //
  307. // CLDR allows for patterns to be different per language for different numbering
  308. // systems. In practice the patterns are set to be consistent for a language
  309. // independent of the numbering system. genFormats verifies that no language
  310. // deviates from this.
  311. func genFormats(w *gen.CodeWriter, data *cldr.CLDR) {
  312. d, err := cldr.ParseDraft(*draft)
  313. if err != nil {
  314. log.Fatalf("invalid draft level: %v", err)
  315. }
  316. // Fill the first slot with a dummy so we can identify unspecified tags.
  317. formats := []number.Pattern{{}}
  318. patterns := map[string]int{}
  319. // TODO: It would be possible to eliminate two of these slices by having
  320. // another indirection and store a reference to the combination of patterns.
  321. decimal := make([]byte, compact.NumCompactTags)
  322. scientific := make([]byte, compact.NumCompactTags)
  323. percent := make([]byte, compact.NumCompactTags)
  324. for _, lang := range data.Locales() {
  325. ldml := data.RawLDML(lang)
  326. if ldml.Numbers == nil {
  327. continue
  328. }
  329. langIndex, ok := compact.FromTag(language.MustParse(lang))
  330. if !ok {
  331. log.Fatalf("No compact index for language %s", lang)
  332. }
  333. type patternSlice []*struct {
  334. cldr.Common
  335. Numbers string `xml:"numbers,attr"`
  336. Count string `xml:"count,attr"`
  337. }
  338. add := func(name string, tags []byte, ps patternSlice) {
  339. sl := cldr.MakeSlice(&ps)
  340. sl.SelectDraft(d)
  341. if len(ps) == 0 {
  342. return
  343. }
  344. if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] {
  345. log.Fatalf("Inconsistent %d patterns for language %s", name, lang)
  346. }
  347. s := ps[0].Data()
  348. index, ok := patterns[s]
  349. if !ok {
  350. nf, err := number.ParsePattern(s)
  351. if err != nil {
  352. log.Fatal(err)
  353. }
  354. index = len(formats)
  355. patterns[s] = index
  356. formats = append(formats, *nf)
  357. }
  358. tags[langIndex] = byte(index)
  359. }
  360. for _, df := range ldml.Numbers.DecimalFormats {
  361. for _, l := range df.DecimalFormatLength {
  362. if l.Type != "" {
  363. continue
  364. }
  365. for _, f := range l.DecimalFormat {
  366. add("decimal", decimal, f.Pattern)
  367. }
  368. }
  369. }
  370. for _, df := range ldml.Numbers.ScientificFormats {
  371. for _, l := range df.ScientificFormatLength {
  372. if l.Type != "" {
  373. continue
  374. }
  375. for _, f := range l.ScientificFormat {
  376. add("scientific", scientific, f.Pattern)
  377. }
  378. }
  379. }
  380. for _, df := range ldml.Numbers.PercentFormats {
  381. for _, l := range df.PercentFormatLength {
  382. if l.Type != "" {
  383. continue
  384. }
  385. for _, f := range l.PercentFormat {
  386. add("percent", percent, f.Pattern)
  387. }
  388. }
  389. }
  390. }
  391. // Complete the parent tag array to reflect inheritance. An index of 0
  392. // indicates an unspecified value.
  393. for _, data := range [][]byte{decimal, scientific, percent} {
  394. for i := range data {
  395. p := compact.ID(i)
  396. for ; data[p] == 0; p = p.Parent() {
  397. }
  398. data[i] = data[p]
  399. }
  400. }
  401. w.WriteVar("tagToDecimal", decimal)
  402. w.WriteVar("tagToScientific", scientific)
  403. w.WriteVar("tagToPercent", percent)
  404. value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1)
  405. // Break up the lines. This won't give ideal perfect formatting, but it is
  406. // better than one huge line.
  407. value = strings.Replace(value, ", ", ",\n", -1)
  408. fmt.Fprintf(w, "var formats = %s\n", value)
  409. }