makexml.go 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:build ignore
  5. // +build ignore
  6. // This tool generates types for the various XML formats of CLDR.
  7. package main
  8. import (
  9. "archive/zip"
  10. "bytes"
  11. "encoding/xml"
  12. "flag"
  13. "fmt"
  14. "io"
  15. "io/ioutil"
  16. "log"
  17. "os"
  18. "regexp"
  19. "strings"
  20. "golang.org/x/text/internal/gen"
  21. )
  22. var outputFile = flag.String("output", "xml.go", "output file name")
  23. func main() {
  24. flag.Parse()
  25. r := gen.OpenCLDRCoreZip()
  26. buffer, err := ioutil.ReadAll(r)
  27. if err != nil {
  28. log.Fatal("Could not read zip file")
  29. }
  30. r.Close()
  31. z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
  32. if err != nil {
  33. log.Fatalf("Could not read zip archive: %v", err)
  34. }
  35. var buf bytes.Buffer
  36. version := gen.CLDRVersion()
  37. for _, dtd := range files {
  38. for _, f := range z.File {
  39. if strings.HasSuffix(f.Name, dtd.file+".dtd") {
  40. r, err := f.Open()
  41. failOnError(err)
  42. b := makeBuilder(&buf, dtd)
  43. b.parseDTD(r)
  44. b.resolve(b.index[dtd.top[0]])
  45. b.write()
  46. if b.version != "" && version != b.version {
  47. println(f.Name)
  48. log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
  49. }
  50. break
  51. }
  52. }
  53. }
  54. fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
  55. fmt.Fprintf(&buf, "const Version = %q\n", version)
  56. gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
  57. }
  58. func failOnError(err error) {
  59. if err != nil {
  60. log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
  61. os.Exit(1)
  62. }
  63. }
  64. // configuration data per DTD type
  65. type dtd struct {
  66. file string // base file name
  67. root string // Go name of the root XML element
  68. top []string // create a different type for this section
  69. skipElem []string // hard-coded or deprecated elements
  70. skipAttr []string // attributes to exclude
  71. predefined []string // hard-coded elements exist of the form <name>Elem
  72. forceRepeat []string // elements to make slices despite DTD
  73. }
  74. var files = []dtd{
  75. {
  76. file: "ldmlBCP47",
  77. root: "LDMLBCP47",
  78. top: []string{"ldmlBCP47"},
  79. skipElem: []string{
  80. "cldrVersion", // deprecated, not used
  81. },
  82. },
  83. {
  84. file: "ldmlSupplemental",
  85. root: "SupplementalData",
  86. top: []string{"supplementalData"},
  87. skipElem: []string{
  88. "cldrVersion", // deprecated, not used
  89. },
  90. forceRepeat: []string{
  91. "plurals", // data defined in plurals.xml and ordinals.xml
  92. },
  93. },
  94. {
  95. file: "ldml",
  96. root: "LDML",
  97. top: []string{
  98. "ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
  99. },
  100. skipElem: []string{
  101. "cp", // not used anywhere
  102. "special", // not used anywhere
  103. "fallback", // deprecated, not used
  104. "alias", // in Common
  105. "default", // in Common
  106. },
  107. skipAttr: []string{
  108. "hiraganaQuarternary", // typo in DTD, correct version included as well
  109. },
  110. predefined: []string{"rules"},
  111. },
  112. }
  113. var comments = map[string]string{
  114. "ldmlBCP47": `
  115. // LDMLBCP47 holds information on allowable values for various variables in LDML.
  116. `,
  117. "supplementalData": `
  118. // SupplementalData holds information relevant for internationalization
  119. // and proper use of CLDR, but that is not contained in the locale hierarchy.
  120. `,
  121. "ldml": `
  122. // LDML is the top-level type for locale-specific data.
  123. `,
  124. "collation": `
  125. // Collation contains rules that specify a certain sort-order,
  126. // as a tailoring of the root order.
  127. // The parsed rules are obtained by passing a RuleProcessor to Collation's
  128. // Process method.
  129. `,
  130. "calendar": `
  131. // Calendar specifies the fields used for formatting and parsing dates and times.
  132. // The month and quarter names are identified numerically, starting at 1.
  133. // The day (of the week) names are identified with short strings, since there is
  134. // no universally-accepted numeric designation.
  135. `,
  136. "dates": `
  137. // Dates contains information regarding the format and parsing of dates and times.
  138. `,
  139. "localeDisplayNames": `
  140. // LocaleDisplayNames specifies localized display names for scripts, languages,
  141. // countries, currencies, and variants.
  142. `,
  143. "numbers": `
  144. // Numbers supplies information for formatting and parsing numbers and currencies.
  145. `,
  146. }
  147. type element struct {
  148. name string // XML element name
  149. category string // elements contained by this element
  150. signature string // category + attrKey*
  151. attr []*attribute // attributes supported by this element.
  152. sub []struct { // parsed and evaluated sub elements of this element.
  153. e *element
  154. repeat bool // true if the element needs to be a slice
  155. }
  156. resolved bool // prevent multiple resolutions of this element.
  157. }
  158. type attribute struct {
  159. name string
  160. key string
  161. list []string
  162. tag string // Go tag
  163. }
  164. var (
  165. reHead = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
  166. reAttr = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
  167. reElem = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
  168. reToken = regexp.MustCompile(`\w\-`)
  169. )
  170. // builder is used to read in the DTD files from CLDR and generate Go code
  171. // to be used with the encoding/xml package.
  172. type builder struct {
  173. w io.Writer
  174. index map[string]*element
  175. elem []*element
  176. info dtd
  177. version string
  178. }
  179. func makeBuilder(w io.Writer, d dtd) builder {
  180. return builder{
  181. w: w,
  182. index: make(map[string]*element),
  183. elem: []*element{},
  184. info: d,
  185. }
  186. }
  187. // parseDTD parses a DTD file.
  188. func (b *builder) parseDTD(r io.Reader) {
  189. for d := xml.NewDecoder(r); ; {
  190. t, err := d.Token()
  191. if t == nil {
  192. break
  193. }
  194. failOnError(err)
  195. dir, ok := t.(xml.Directive)
  196. if !ok {
  197. continue
  198. }
  199. m := reHead.FindSubmatch(dir)
  200. dir = dir[len(m[0]):]
  201. ename := string(m[2])
  202. el, elementFound := b.index[ename]
  203. switch string(m[1]) {
  204. case "ELEMENT":
  205. if elementFound {
  206. log.Fatal("parseDTD: duplicate entry for element %q", ename)
  207. }
  208. m := reElem.FindSubmatch(dir)
  209. if m == nil {
  210. log.Fatalf("parseDTD: invalid element %q", string(dir))
  211. }
  212. if len(m[0]) != len(dir) {
  213. log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
  214. }
  215. s := string(m[1])
  216. el = &element{
  217. name: ename,
  218. category: s,
  219. }
  220. b.index[ename] = el
  221. case "ATTLIST":
  222. if !elementFound {
  223. log.Fatalf("parseDTD: unknown element %q", ename)
  224. }
  225. s := string(dir)
  226. m := reAttr.FindStringSubmatch(s)
  227. if m == nil {
  228. log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
  229. }
  230. if m[4] == "FIXED" {
  231. b.version = m[5]
  232. } else {
  233. switch m[1] {
  234. case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
  235. case "type", "choice":
  236. default:
  237. el.attr = append(el.attr, &attribute{
  238. name: m[1],
  239. key: s,
  240. list: reToken.FindAllString(m[3], -1),
  241. })
  242. el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
  243. }
  244. }
  245. }
  246. }
  247. }
  248. var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
  249. // resolve takes a parsed element and converts it into structured data
  250. // that can be used to generate the XML code.
  251. func (b *builder) resolve(e *element) {
  252. if e.resolved {
  253. return
  254. }
  255. b.elem = append(b.elem, e)
  256. e.resolved = true
  257. s := e.category
  258. found := make(map[string]bool)
  259. sequenceStart := []int{}
  260. for len(s) > 0 {
  261. m := reCat.FindStringSubmatch(s)
  262. if m == nil {
  263. log.Fatalf("%s: invalid category string %q", e.name, s)
  264. }
  265. repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
  266. switch m[1] {
  267. case "":
  268. case "(":
  269. sequenceStart = append(sequenceStart, len(e.sub))
  270. case ")":
  271. if len(sequenceStart) == 0 {
  272. log.Fatalf("%s: unmatched closing parenthesis", e.name)
  273. }
  274. for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
  275. e.sub[i].repeat = e.sub[i].repeat || repeat
  276. }
  277. sequenceStart = sequenceStart[:len(sequenceStart)-1]
  278. default:
  279. if in(b.info.skipElem, m[1]) {
  280. } else if sub, ok := b.index[m[1]]; ok {
  281. if !found[sub.name] {
  282. e.sub = append(e.sub, struct {
  283. e *element
  284. repeat bool
  285. }{sub, repeat})
  286. found[sub.name] = true
  287. b.resolve(sub)
  288. }
  289. } else if m[1] == "#PCDATA" || m[1] == "ANY" {
  290. } else if m[1] != "EMPTY" {
  291. log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
  292. }
  293. }
  294. s = s[len(m[0]):]
  295. }
  296. }
  297. // return true if s is contained in set.
  298. func in(set []string, s string) bool {
  299. for _, v := range set {
  300. if v == s {
  301. return true
  302. }
  303. }
  304. return false
  305. }
  306. var repl = strings.NewReplacer("-", " ", "_", " ")
  307. // title puts the first character or each character following '_' in title case and
  308. // removes all occurrences of '_'.
  309. func title(s string) string {
  310. return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
  311. }
  312. // writeElem generates Go code for a single element, recursively.
  313. func (b *builder) writeElem(tab int, e *element) {
  314. p := func(f string, x ...interface{}) {
  315. f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
  316. fmt.Fprintf(b.w, f, x...)
  317. }
  318. if len(e.sub) == 0 && len(e.attr) == 0 {
  319. p("Common")
  320. return
  321. }
  322. p("struct {")
  323. tab++
  324. p("\nCommon")
  325. for _, attr := range e.attr {
  326. if !in(b.info.skipAttr, attr.name) {
  327. p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
  328. }
  329. }
  330. for _, sub := range e.sub {
  331. if in(b.info.predefined, sub.e.name) {
  332. p("\n%sElem", sub.e.name)
  333. continue
  334. }
  335. if in(b.info.skipElem, sub.e.name) {
  336. continue
  337. }
  338. p("\n%s ", title(sub.e.name))
  339. if sub.repeat {
  340. p("[]")
  341. }
  342. p("*")
  343. if in(b.info.top, sub.e.name) {
  344. p(title(sub.e.name))
  345. } else {
  346. b.writeElem(tab, sub.e)
  347. }
  348. p(" `xml:\"%s\"`", sub.e.name)
  349. }
  350. tab--
  351. p("\n}")
  352. }
  353. // write generates the Go XML code.
  354. func (b *builder) write() {
  355. for i, name := range b.info.top {
  356. e := b.index[name]
  357. if e != nil {
  358. fmt.Fprintf(b.w, comments[name])
  359. name := title(e.name)
  360. if i == 0 {
  361. name = b.info.root
  362. }
  363. fmt.Fprintf(b.w, "type %s ", name)
  364. b.writeElem(0, e)
  365. fmt.Fprint(b.w, "\n")
  366. }
  367. }
  368. }