gen.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // +build ignore
  5. package main
  6. import (
  7. "flag"
  8. "fmt"
  9. "log"
  10. "reflect"
  11. "strings"
  12. "unicode/utf8"
  13. "golang.org/x/text/internal/gen"
  14. "golang.org/x/text/internal/language"
  15. "golang.org/x/text/internal/language/compact"
  16. "golang.org/x/text/internal/number"
  17. "golang.org/x/text/internal/stringset"
  18. "golang.org/x/text/unicode/cldr"
  19. )
  20. var (
  21. test = flag.Bool("test", false,
  22. "test existing tables; can be used to compare web data with package data.")
  23. outputFile = flag.String("output", "tables.go", "output file")
  24. outputTestFile = flag.String("testoutput", "data_test.go", "output file")
  25. draft = flag.String("draft",
  26. "contributed",
  27. `Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
  28. )
  29. func main() {
  30. gen.Init()
  31. const pkg = "number"
  32. gen.Repackage("gen_common.go", "common.go", pkg)
  33. // Read the CLDR zip file.
  34. r := gen.OpenCLDRCoreZip()
  35. defer r.Close()
  36. d := &cldr.Decoder{}
  37. d.SetDirFilter("supplemental", "main")
  38. d.SetSectionFilter("numbers", "numberingSystem")
  39. data, err := d.DecodeZip(r)
  40. if err != nil {
  41. log.Fatalf("DecodeZip: %v", err)
  42. }
  43. w := gen.NewCodeWriter()
  44. defer w.WriteGoFile(*outputFile, pkg)
  45. fmt.Fprintln(w, `import "golang.org/x/text/internal/stringset"`)
  46. gen.WriteCLDRVersion(w)
  47. genNumSystem(w, data)
  48. genSymbols(w, data)
  49. genFormats(w, data)
  50. }
  51. var systemMap = map[string]system{"latn": 0}
  52. func getNumberSystem(str string) system {
  53. ns, ok := systemMap[str]
  54. if !ok {
  55. log.Fatalf("No index for numbering system %q", str)
  56. }
  57. return ns
  58. }
  59. func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) {
  60. numSysData := []systemData{
  61. {digitSize: 1, zero: [4]byte{'0'}},
  62. }
  63. for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
  64. if len(ns.Digits) == 0 {
  65. continue
  66. }
  67. switch ns.Id {
  68. case "latn":
  69. // hard-wired
  70. continue
  71. case "hanidec":
  72. // non-consecutive digits: treat as "algorithmic"
  73. continue
  74. }
  75. zero, sz := utf8.DecodeRuneInString(ns.Digits)
  76. if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte
  77. log.Fatalf("Last byte of zero value overflows for %s", ns.Id)
  78. }
  79. i := rune(0)
  80. for _, r := range ns.Digits {
  81. // Verify that we can do simple math on the UTF-8 byte sequence
  82. // of zero to get the digit.
  83. if zero+i != r {
  84. // Runes not consecutive.
  85. log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r)
  86. }
  87. i++
  88. }
  89. var x [utf8.UTFMax]byte
  90. utf8.EncodeRune(x[:], zero)
  91. id := system(len(numSysData))
  92. systemMap[ns.Id] = id
  93. numSysData = append(numSysData, systemData{
  94. id: id,
  95. digitSize: byte(sz),
  96. zero: x,
  97. })
  98. }
  99. w.WriteVar("numSysData", numSysData)
  100. algoID := system(len(numSysData))
  101. fmt.Fprintln(w, "const (")
  102. for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
  103. id, ok := systemMap[ns.Id]
  104. if !ok {
  105. id = algoID
  106. systemMap[ns.Id] = id
  107. algoID++
  108. }
  109. fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id)
  110. }
  111. fmt.Fprintln(w, "numNumberSystems")
  112. fmt.Fprintln(w, ")")
  113. fmt.Fprintln(w, "var systemMap = map[string]system{")
  114. for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
  115. fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id))
  116. w.Size += len(ns.Id) + 16 + 1 // very coarse approximation
  117. }
  118. fmt.Fprintln(w, "}")
  119. }
  120. func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
  121. d, err := cldr.ParseDraft(*draft)
  122. if err != nil {
  123. log.Fatalf("invalid draft level: %v", err)
  124. }
  125. nNumberSystems := system(len(systemMap))
  126. type symbols [NumSymbolTypes]string
  127. type key struct {
  128. tag compact.ID
  129. system system
  130. }
  131. symbolMap := map[key]*symbols{}
  132. defaults := map[compact.ID]system{}
  133. for _, lang := range data.Locales() {
  134. ldml := data.RawLDML(lang)
  135. if ldml.Numbers == nil {
  136. continue
  137. }
  138. langIndex, ok := compact.FromTag(language.MustParse(lang))
  139. if !ok {
  140. log.Fatalf("No compact index for language %s", lang)
  141. }
  142. if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 {
  143. defaults[langIndex] = getNumberSystem(d[0].Data())
  144. }
  145. syms := cldr.MakeSlice(&ldml.Numbers.Symbols)
  146. syms.SelectDraft(d)
  147. getFirst := func(name string, x interface{}) string {
  148. v := reflect.ValueOf(x)
  149. slice := cldr.MakeSlice(x)
  150. slice.SelectAnyOf("alt", "", "alt")
  151. if reflect.Indirect(v).Len() == 0 {
  152. return ""
  153. } else if reflect.Indirect(v).Len() > 1 {
  154. log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name)
  155. }
  156. return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String()
  157. }
  158. for _, sym := range ldml.Numbers.Symbols {
  159. if sym.NumberSystem == "" {
  160. // This is just linking the default of root to "latn".
  161. continue
  162. }
  163. symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
  164. SymDecimal: getFirst("decimal", &sym.Decimal),
  165. SymGroup: getFirst("group", &sym.Group),
  166. SymList: getFirst("list", &sym.List),
  167. SymPercentSign: getFirst("percentSign", &sym.PercentSign),
  168. SymPlusSign: getFirst("plusSign", &sym.PlusSign),
  169. SymMinusSign: getFirst("minusSign", &sym.MinusSign),
  170. SymExponential: getFirst("exponential", &sym.Exponential),
  171. SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent),
  172. SymPerMille: getFirst("perMille", &sym.PerMille),
  173. SymInfinity: getFirst("infinity", &sym.Infinity),
  174. SymNan: getFirst("nan", &sym.Nan),
  175. SymTimeSeparator: getFirst("timeSeparator", &sym.TimeSeparator),
  176. }
  177. }
  178. }
  179. // Expand all values.
  180. for k, syms := range symbolMap {
  181. for t := SymDecimal; t < NumSymbolTypes; t++ {
  182. p := k.tag
  183. for syms[t] == "" {
  184. p = p.Parent()
  185. if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" {
  186. syms[t] = (*pSyms)[t]
  187. break
  188. }
  189. if p == 0 /* und */ {
  190. // Default to root, latn.
  191. syms[t] = (*symbolMap[key{}])[t]
  192. }
  193. }
  194. }
  195. }
  196. // Unique the symbol sets and write the string data.
  197. m := map[symbols]int{}
  198. sb := stringset.NewBuilder()
  199. symIndex := [][NumSymbolTypes]byte{}
  200. for ns := system(0); ns < nNumberSystems; ns++ {
  201. for _, l := range data.Locales() {
  202. langIndex, _ := compact.FromTag(language.MustParse(l))
  203. s := symbolMap[key{langIndex, ns}]
  204. if s == nil {
  205. continue
  206. }
  207. if _, ok := m[*s]; !ok {
  208. m[*s] = len(symIndex)
  209. sb.Add(s[:]...)
  210. var x [NumSymbolTypes]byte
  211. for i := SymDecimal; i < NumSymbolTypes; i++ {
  212. x[i] = byte(sb.Index((*s)[i]))
  213. }
  214. symIndex = append(symIndex, x)
  215. }
  216. }
  217. }
  218. w.WriteVar("symIndex", symIndex)
  219. w.WriteVar("symData", sb.Set())
  220. // resolveSymbolIndex gets the index from the closest matching locale,
  221. // including the locale itself.
  222. resolveSymbolIndex := func(langIndex compact.ID, ns system) symOffset {
  223. for {
  224. if sym := symbolMap[key{langIndex, ns}]; sym != nil {
  225. return symOffset(m[*sym])
  226. }
  227. if langIndex == 0 {
  228. return 0 // und, latn
  229. }
  230. langIndex = langIndex.Parent()
  231. }
  232. }
  233. // Create an index with the symbols for each locale for the latn numbering
  234. // system. If this is not the default, or the only one, for a locale, we
  235. // will overwrite the value later.
  236. var langToDefaults [compact.NumCompactTags]symOffset
  237. for _, l := range data.Locales() {
  238. langIndex, _ := compact.FromTag(language.MustParse(l))
  239. langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0)
  240. }
  241. // Delete redundant entries.
  242. for _, l := range data.Locales() {
  243. langIndex, _ := compact.FromTag(language.MustParse(l))
  244. def := defaults[langIndex]
  245. syms := symbolMap[key{langIndex, def}]
  246. if syms == nil {
  247. continue
  248. }
  249. for ns := system(0); ns < nNumberSystems; ns++ {
  250. if ns == def {
  251. continue
  252. }
  253. if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms {
  254. delete(symbolMap, key{langIndex, ns})
  255. }
  256. }
  257. }
  258. // Create a sorted list of alternatives per language. This will only need to
  259. // be referenced if a user specified an alternative numbering system.
  260. var langToAlt []altSymData
  261. for _, l := range data.Locales() {
  262. langIndex, _ := compact.FromTag(language.MustParse(l))
  263. start := len(langToAlt)
  264. if start >= hasNonLatnMask {
  265. log.Fatalf("Number of alternative assignments >= %x", hasNonLatnMask)
  266. }
  267. // Create the entry for the default value.
  268. def := defaults[langIndex]
  269. langToAlt = append(langToAlt, altSymData{
  270. compactTag: langIndex,
  271. system: def,
  272. symIndex: resolveSymbolIndex(langIndex, def),
  273. })
  274. for ns := system(0); ns < nNumberSystems; ns++ {
  275. if def == ns {
  276. continue
  277. }
  278. if sym := symbolMap[key{langIndex, ns}]; sym != nil {
  279. langToAlt = append(langToAlt, altSymData{
  280. compactTag: langIndex,
  281. system: ns,
  282. symIndex: resolveSymbolIndex(langIndex, ns),
  283. })
  284. }
  285. }
  286. if def == 0 && len(langToAlt) == start+1 {
  287. // No additional data: erase the entry.
  288. langToAlt = langToAlt[:start]
  289. } else {
  290. // Overwrite the entry in langToDefaults.
  291. langToDefaults[langIndex] = hasNonLatnMask | symOffset(start)
  292. }
  293. }
  294. w.WriteComment(`
  295. langToDefaults maps a compact language index to the default numbering system
  296. and default symbol set`)
  297. w.WriteVar("langToDefaults", langToDefaults)
  298. w.WriteComment(`
  299. langToAlt is a list of numbering system and symbol set pairs, sorted and
  300. marked by compact language index.`)
  301. w.WriteVar("langToAlt", langToAlt)
  302. }
  303. // genFormats generates the lookup table for decimal, scientific and percent
  304. // patterns.
  305. //
  306. // CLDR allows for patterns to be different per language for different numbering
  307. // systems. In practice the patterns are set to be consistent for a language
  308. // independent of the numbering system. genFormats verifies that no language
  309. // deviates from this.
  310. func genFormats(w *gen.CodeWriter, data *cldr.CLDR) {
  311. d, err := cldr.ParseDraft(*draft)
  312. if err != nil {
  313. log.Fatalf("invalid draft level: %v", err)
  314. }
  315. // Fill the first slot with a dummy so we can identify unspecified tags.
  316. formats := []number.Pattern{{}}
  317. patterns := map[string]int{}
  318. // TODO: It would be possible to eliminate two of these slices by having
  319. // another indirection and store a reference to the combination of patterns.
  320. decimal := make([]byte, compact.NumCompactTags)
  321. scientific := make([]byte, compact.NumCompactTags)
  322. percent := make([]byte, compact.NumCompactTags)
  323. for _, lang := range data.Locales() {
  324. ldml := data.RawLDML(lang)
  325. if ldml.Numbers == nil {
  326. continue
  327. }
  328. langIndex, ok := compact.FromTag(language.MustParse(lang))
  329. if !ok {
  330. log.Fatalf("No compact index for language %s", lang)
  331. }
  332. type patternSlice []*struct {
  333. cldr.Common
  334. Numbers string `xml:"numbers,attr"`
  335. Count string `xml:"count,attr"`
  336. }
  337. add := func(name string, tags []byte, ps patternSlice) {
  338. sl := cldr.MakeSlice(&ps)
  339. sl.SelectDraft(d)
  340. if len(ps) == 0 {
  341. return
  342. }
  343. if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] {
  344. log.Fatalf("Inconsistent %d patterns for language %s", name, lang)
  345. }
  346. s := ps[0].Data()
  347. index, ok := patterns[s]
  348. if !ok {
  349. nf, err := number.ParsePattern(s)
  350. if err != nil {
  351. log.Fatal(err)
  352. }
  353. index = len(formats)
  354. patterns[s] = index
  355. formats = append(formats, *nf)
  356. }
  357. tags[langIndex] = byte(index)
  358. }
  359. for _, df := range ldml.Numbers.DecimalFormats {
  360. for _, l := range df.DecimalFormatLength {
  361. if l.Type != "" {
  362. continue
  363. }
  364. for _, f := range l.DecimalFormat {
  365. add("decimal", decimal, f.Pattern)
  366. }
  367. }
  368. }
  369. for _, df := range ldml.Numbers.ScientificFormats {
  370. for _, l := range df.ScientificFormatLength {
  371. if l.Type != "" {
  372. continue
  373. }
  374. for _, f := range l.ScientificFormat {
  375. add("scientific", scientific, f.Pattern)
  376. }
  377. }
  378. }
  379. for _, df := range ldml.Numbers.PercentFormats {
  380. for _, l := range df.PercentFormatLength {
  381. if l.Type != "" {
  382. continue
  383. }
  384. for _, f := range l.PercentFormat {
  385. add("percent", percent, f.Pattern)
  386. }
  387. }
  388. }
  389. }
  390. // Complete the parent tag array to reflect inheritance. An index of 0
  391. // indicates an unspecified value.
  392. for _, data := range [][]byte{decimal, scientific, percent} {
  393. for i := range data {
  394. p := compact.ID(i)
  395. for ; data[p] == 0; p = p.Parent() {
  396. }
  397. data[i] = data[p]
  398. }
  399. }
  400. w.WriteVar("tagToDecimal", decimal)
  401. w.WriteVar("tagToScientific", scientific)
  402. w.WriteVar("tagToPercent", percent)
  403. value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1)
  404. // Break up the lines. This won't give ideal perfect formatting, but it is
  405. // better than one huge line.
  406. value = strings.Replace(value, ", ", ",\n", -1)
  407. fmt.Fprintf(w, "var formats = %s\n", value)
  408. }