decode.go 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package cldr
  5. import (
  6. "archive/zip"
  7. "bytes"
  8. "encoding/xml"
  9. "fmt"
  10. "io"
  11. "io/ioutil"
  12. "log"
  13. "os"
  14. "path/filepath"
  15. "regexp"
  16. )
  17. // A Decoder loads an archive of CLDR data.
  18. type Decoder struct {
  19. dirFilter []string
  20. sectionFilter []string
  21. loader Loader
  22. cldr *CLDR
  23. curLocale string
  24. }
  25. // SetSectionFilter takes a list top-level LDML element names to which
  26. // evaluation of LDML should be limited. It automatically calls SetDirFilter.
  27. func (d *Decoder) SetSectionFilter(filter ...string) {
  28. d.sectionFilter = filter
  29. // TODO: automatically set dir filter
  30. }
  31. // SetDirFilter limits the loading of LDML XML files of the specied directories.
  32. // Note that sections may be split across directories differently for different CLDR versions.
  33. // For more robust code, use SetSectionFilter.
  34. func (d *Decoder) SetDirFilter(dir ...string) {
  35. d.dirFilter = dir
  36. }
  37. // A Loader provides access to the files of a CLDR archive.
  38. type Loader interface {
  39. Len() int
  40. Path(i int) string
  41. Reader(i int) (io.ReadCloser, error)
  42. }
  43. var fileRe = regexp.MustCompile(`.*[/\\](.*)[/\\](.*)\.xml`)
  44. // Decode loads and decodes the files represented by l.
  45. func (d *Decoder) Decode(l Loader) (cldr *CLDR, err error) {
  46. d.cldr = makeCLDR()
  47. for i := 0; i < l.Len(); i++ {
  48. fname := l.Path(i)
  49. if m := fileRe.FindStringSubmatch(fname); m != nil {
  50. if len(d.dirFilter) > 0 && !in(d.dirFilter, m[1]) {
  51. continue
  52. }
  53. var r io.ReadCloser
  54. if r, err = l.Reader(i); err == nil {
  55. err = d.decode(m[1], m[2], r)
  56. r.Close()
  57. }
  58. if err != nil {
  59. return nil, err
  60. }
  61. }
  62. }
  63. d.cldr.finalize(d.sectionFilter)
  64. return d.cldr, nil
  65. }
  66. func (d *Decoder) decode(dir, id string, r io.Reader) error {
  67. var v interface{}
  68. var l *LDML
  69. cldr := d.cldr
  70. switch {
  71. case dir == "supplemental":
  72. v = cldr.supp
  73. case dir == "transforms":
  74. return nil
  75. case dir == "bcp47":
  76. v = cldr.bcp47
  77. case dir == "validity":
  78. return nil
  79. default:
  80. ok := false
  81. if v, ok = cldr.locale[id]; !ok {
  82. l = &LDML{}
  83. v, cldr.locale[id] = l, l
  84. }
  85. }
  86. x := xml.NewDecoder(r)
  87. if err := x.Decode(v); err != nil {
  88. log.Printf("%s/%s: %v", dir, id, err)
  89. return err
  90. }
  91. if l != nil {
  92. if l.Identity == nil {
  93. return fmt.Errorf("%s/%s: missing identity element", dir, id)
  94. }
  95. // TODO: verify when CLDR bug https://unicode.org/cldr/trac/ticket/8970
  96. // is resolved.
  97. // path := strings.Split(id, "_")
  98. // if lang := l.Identity.Language.Type; lang != path[0] {
  99. // return fmt.Errorf("%s/%s: language was %s; want %s", dir, id, lang, path[0])
  100. // }
  101. }
  102. return nil
  103. }
  104. type pathLoader []string
  105. func makePathLoader(path string) (pl pathLoader, err error) {
  106. err = filepath.Walk(path, func(path string, _ os.FileInfo, err error) error {
  107. pl = append(pl, path)
  108. return err
  109. })
  110. return pl, err
  111. }
  112. func (pl pathLoader) Len() int {
  113. return len(pl)
  114. }
  115. func (pl pathLoader) Path(i int) string {
  116. return pl[i]
  117. }
  118. func (pl pathLoader) Reader(i int) (io.ReadCloser, error) {
  119. return os.Open(pl[i])
  120. }
  121. // DecodePath loads CLDR data from the given path.
  122. func (d *Decoder) DecodePath(path string) (cldr *CLDR, err error) {
  123. loader, err := makePathLoader(path)
  124. if err != nil {
  125. return nil, err
  126. }
  127. return d.Decode(loader)
  128. }
  129. type zipLoader struct {
  130. r *zip.Reader
  131. }
  132. func (zl zipLoader) Len() int {
  133. return len(zl.r.File)
  134. }
  135. func (zl zipLoader) Path(i int) string {
  136. return zl.r.File[i].Name
  137. }
  138. func (zl zipLoader) Reader(i int) (io.ReadCloser, error) {
  139. return zl.r.File[i].Open()
  140. }
  141. // DecodeZip loads CLDR data from the zip archive for which r is the source.
  142. func (d *Decoder) DecodeZip(r io.Reader) (cldr *CLDR, err error) {
  143. buffer, err := ioutil.ReadAll(r)
  144. if err != nil {
  145. return nil, err
  146. }
  147. archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
  148. if err != nil {
  149. return nil, err
  150. }
  151. return d.Decode(zipLoader{archive})
  152. }