123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400 |
- // Copyright 2013 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // +build ignore
- // This tool generates types for the various XML formats of CLDR.
- package main
- import (
- "archive/zip"
- "bytes"
- "encoding/xml"
- "flag"
- "fmt"
- "io"
- "io/ioutil"
- "log"
- "os"
- "regexp"
- "strings"
- "golang.org/x/text/internal/gen"
- )
- var outputFile = flag.String("output", "xml.go", "output file name")
- func main() {
- flag.Parse()
- r := gen.OpenCLDRCoreZip()
- buffer, err := ioutil.ReadAll(r)
- if err != nil {
- log.Fatal("Could not read zip file")
- }
- r.Close()
- z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
- if err != nil {
- log.Fatalf("Could not read zip archive: %v", err)
- }
- var buf bytes.Buffer
- version := gen.CLDRVersion()
- for _, dtd := range files {
- for _, f := range z.File {
- if strings.HasSuffix(f.Name, dtd.file+".dtd") {
- r, err := f.Open()
- failOnError(err)
- b := makeBuilder(&buf, dtd)
- b.parseDTD(r)
- b.resolve(b.index[dtd.top[0]])
- b.write()
- if b.version != "" && version != b.version {
- println(f.Name)
- log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
- }
- break
- }
- }
- }
- fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
- fmt.Fprintf(&buf, "const Version = %q\n", version)
- gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
- }
- func failOnError(err error) {
- if err != nil {
- log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
- os.Exit(1)
- }
- }
- // configuration data per DTD type
- type dtd struct {
- file string // base file name
- root string // Go name of the root XML element
- top []string // create a different type for this section
- skipElem []string // hard-coded or deprecated elements
- skipAttr []string // attributes to exclude
- predefined []string // hard-coded elements exist of the form <name>Elem
- forceRepeat []string // elements to make slices despite DTD
- }
- var files = []dtd{
- {
- file: "ldmlBCP47",
- root: "LDMLBCP47",
- top: []string{"ldmlBCP47"},
- skipElem: []string{
- "cldrVersion", // deprecated, not used
- },
- },
- {
- file: "ldmlSupplemental",
- root: "SupplementalData",
- top: []string{"supplementalData"},
- skipElem: []string{
- "cldrVersion", // deprecated, not used
- },
- forceRepeat: []string{
- "plurals", // data defined in plurals.xml and ordinals.xml
- },
- },
- {
- file: "ldml",
- root: "LDML",
- top: []string{
- "ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
- },
- skipElem: []string{
- "cp", // not used anywhere
- "special", // not used anywhere
- "fallback", // deprecated, not used
- "alias", // in Common
- "default", // in Common
- },
- skipAttr: []string{
- "hiraganaQuarternary", // typo in DTD, correct version included as well
- },
- predefined: []string{"rules"},
- },
- }
- var comments = map[string]string{
- "ldmlBCP47": `
- // LDMLBCP47 holds information on allowable values for various variables in LDML.
- `,
- "supplementalData": `
- // SupplementalData holds information relevant for internationalization
- // and proper use of CLDR, but that is not contained in the locale hierarchy.
- `,
- "ldml": `
- // LDML is the top-level type for locale-specific data.
- `,
- "collation": `
- // Collation contains rules that specify a certain sort-order,
- // as a tailoring of the root order.
- // The parsed rules are obtained by passing a RuleProcessor to Collation's
- // Process method.
- `,
- "calendar": `
- // Calendar specifies the fields used for formatting and parsing dates and times.
- // The month and quarter names are identified numerically, starting at 1.
- // The day (of the week) names are identified with short strings, since there is
- // no universally-accepted numeric designation.
- `,
- "dates": `
- // Dates contains information regarding the format and parsing of dates and times.
- `,
- "localeDisplayNames": `
- // LocaleDisplayNames specifies localized display names for scripts, languages,
- // countries, currencies, and variants.
- `,
- "numbers": `
- // Numbers supplies information for formatting and parsing numbers and currencies.
- `,
- }
- type element struct {
- name string // XML element name
- category string // elements contained by this element
- signature string // category + attrKey*
- attr []*attribute // attributes supported by this element.
- sub []struct { // parsed and evaluated sub elements of this element.
- e *element
- repeat bool // true if the element needs to be a slice
- }
- resolved bool // prevent multiple resolutions of this element.
- }
- type attribute struct {
- name string
- key string
- list []string
- tag string // Go tag
- }
- var (
- reHead = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
- reAttr = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
- reElem = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
- reToken = regexp.MustCompile(`\w\-`)
- )
- // builder is used to read in the DTD files from CLDR and generate Go code
- // to be used with the encoding/xml package.
- type builder struct {
- w io.Writer
- index map[string]*element
- elem []*element
- info dtd
- version string
- }
- func makeBuilder(w io.Writer, d dtd) builder {
- return builder{
- w: w,
- index: make(map[string]*element),
- elem: []*element{},
- info: d,
- }
- }
- // parseDTD parses a DTD file.
- func (b *builder) parseDTD(r io.Reader) {
- for d := xml.NewDecoder(r); ; {
- t, err := d.Token()
- if t == nil {
- break
- }
- failOnError(err)
- dir, ok := t.(xml.Directive)
- if !ok {
- continue
- }
- m := reHead.FindSubmatch(dir)
- dir = dir[len(m[0]):]
- ename := string(m[2])
- el, elementFound := b.index[ename]
- switch string(m[1]) {
- case "ELEMENT":
- if elementFound {
- log.Fatal("parseDTD: duplicate entry for element %q", ename)
- }
- m := reElem.FindSubmatch(dir)
- if m == nil {
- log.Fatalf("parseDTD: invalid element %q", string(dir))
- }
- if len(m[0]) != len(dir) {
- log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
- }
- s := string(m[1])
- el = &element{
- name: ename,
- category: s,
- }
- b.index[ename] = el
- case "ATTLIST":
- if !elementFound {
- log.Fatalf("parseDTD: unknown element %q", ename)
- }
- s := string(dir)
- m := reAttr.FindStringSubmatch(s)
- if m == nil {
- log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
- }
- if m[4] == "FIXED" {
- b.version = m[5]
- } else {
- switch m[1] {
- case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
- case "type", "choice":
- default:
- el.attr = append(el.attr, &attribute{
- name: m[1],
- key: s,
- list: reToken.FindAllString(m[3], -1),
- })
- el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
- }
- }
- }
- }
- }
- var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
- // resolve takes a parsed element and converts it into structured data
- // that can be used to generate the XML code.
- func (b *builder) resolve(e *element) {
- if e.resolved {
- return
- }
- b.elem = append(b.elem, e)
- e.resolved = true
- s := e.category
- found := make(map[string]bool)
- sequenceStart := []int{}
- for len(s) > 0 {
- m := reCat.FindStringSubmatch(s)
- if m == nil {
- log.Fatalf("%s: invalid category string %q", e.name, s)
- }
- repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
- switch m[1] {
- case "":
- case "(":
- sequenceStart = append(sequenceStart, len(e.sub))
- case ")":
- if len(sequenceStart) == 0 {
- log.Fatalf("%s: unmatched closing parenthesis", e.name)
- }
- for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
- e.sub[i].repeat = e.sub[i].repeat || repeat
- }
- sequenceStart = sequenceStart[:len(sequenceStart)-1]
- default:
- if in(b.info.skipElem, m[1]) {
- } else if sub, ok := b.index[m[1]]; ok {
- if !found[sub.name] {
- e.sub = append(e.sub, struct {
- e *element
- repeat bool
- }{sub, repeat})
- found[sub.name] = true
- b.resolve(sub)
- }
- } else if m[1] == "#PCDATA" || m[1] == "ANY" {
- } else if m[1] != "EMPTY" {
- log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
- }
- }
- s = s[len(m[0]):]
- }
- }
- // return true if s is contained in set.
- func in(set []string, s string) bool {
- for _, v := range set {
- if v == s {
- return true
- }
- }
- return false
- }
- var repl = strings.NewReplacer("-", " ", "_", " ")
- // title puts the first character or each character following '_' in title case and
- // removes all occurrences of '_'.
- func title(s string) string {
- return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
- }
- // writeElem generates Go code for a single element, recursively.
- func (b *builder) writeElem(tab int, e *element) {
- p := func(f string, x ...interface{}) {
- f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
- fmt.Fprintf(b.w, f, x...)
- }
- if len(e.sub) == 0 && len(e.attr) == 0 {
- p("Common")
- return
- }
- p("struct {")
- tab++
- p("\nCommon")
- for _, attr := range e.attr {
- if !in(b.info.skipAttr, attr.name) {
- p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
- }
- }
- for _, sub := range e.sub {
- if in(b.info.predefined, sub.e.name) {
- p("\n%sElem", sub.e.name)
- continue
- }
- if in(b.info.skipElem, sub.e.name) {
- continue
- }
- p("\n%s ", title(sub.e.name))
- if sub.repeat {
- p("[]")
- }
- p("*")
- if in(b.info.top, sub.e.name) {
- p(title(sub.e.name))
- } else {
- b.writeElem(tab, sub.e)
- }
- p(" `xml:\"%s\"`", sub.e.name)
- }
- tab--
- p("\n}")
- }
- // write generates the Go XML code.
- func (b *builder) write() {
- for i, name := range b.info.top {
- e := b.index[name]
- if e != nil {
- fmt.Fprintf(b.w, comments[name])
- name := title(e.name)
- if i == 0 {
- name = b.info.root
- }
- fmt.Fprintf(b.w, "type %s ", name)
- b.writeElem(0, e)
- fmt.Fprint(b.w, "\n")
- }
- }
- }
|