lookup.go 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. // Copyright 2014 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package display
  5. // This file contains common lookup code that is shared between the various
  6. // implementations of Namer and Dictionaries.
  7. import (
  8. "fmt"
  9. "sort"
  10. "strings"
  11. "golang.org/x/text/language"
  12. )
  13. type namer interface {
  14. // name gets the string for the given index. It should walk the
  15. // inheritance chain if a value is not present in the base index.
  16. name(idx int) string
  17. }
  18. func nameLanguage(n namer, x interface{}) string {
  19. t, _ := language.All.Compose(x)
  20. for {
  21. i, _, _ := langTagSet.index(t.Raw())
  22. if s := n.name(i); s != "" {
  23. return s
  24. }
  25. if t = t.Parent(); t == language.Und {
  26. return ""
  27. }
  28. }
  29. }
  30. func nameScript(n namer, x interface{}) string {
  31. t, _ := language.DeprecatedScript.Compose(x)
  32. _, s, _ := t.Raw()
  33. return n.name(scriptIndex.index(s.String()))
  34. }
  35. func nameRegion(n namer, x interface{}) string {
  36. t, _ := language.DeprecatedRegion.Compose(x)
  37. _, _, r := t.Raw()
  38. return n.name(regionIndex.index(r.String()))
  39. }
  40. func nameTag(langN, scrN, regN namer, x interface{}) string {
  41. t, ok := x.(language.Tag)
  42. if !ok {
  43. return ""
  44. }
  45. const form = language.All &^ language.SuppressScript
  46. if c, err := form.Canonicalize(t); err == nil {
  47. t = c
  48. }
  49. _, sRaw, rRaw := t.Raw()
  50. i, scr, reg := langTagSet.index(t.Raw())
  51. for i != -1 {
  52. if str := langN.name(i); str != "" {
  53. if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR {
  54. ss, sr := "", ""
  55. if hasS {
  56. ss = scrN.name(scriptIndex.index(scr.String()))
  57. }
  58. if hasR {
  59. sr = regN.name(regionIndex.index(reg.String()))
  60. }
  61. // TODO: use patterns in CLDR or at least confirm they are the
  62. // same for all languages.
  63. if ss != "" && sr != "" {
  64. return fmt.Sprintf("%s (%s, %s)", str, ss, sr)
  65. }
  66. if ss != "" || sr != "" {
  67. return fmt.Sprintf("%s (%s%s)", str, ss, sr)
  68. }
  69. }
  70. return str
  71. }
  72. scr, reg = sRaw, rRaw
  73. if t = t.Parent(); t == language.Und {
  74. return ""
  75. }
  76. i, _, _ = langTagSet.index(t.Raw())
  77. }
  78. return ""
  79. }
  80. // header contains the data and indexes for a single namer.
  81. // data contains a series of strings concatenated into one. index contains the
  82. // offsets for a string in data. For example, consider a header that defines
  83. // strings for the languages de, el, en, fi, and nl:
  84. //
  85. // header{
  86. // data: "GermanGreekEnglishDutch",
  87. // index: []uint16{ 0, 6, 11, 18, 18, 23 },
  88. // }
  89. //
  90. // For a language with index i, the string is defined by
  91. // data[index[i]:index[i+1]]. So the number of elements in index is always one
  92. // greater than the number of languages for which header defines a value.
  93. // A string for a language may be empty, which means the name is undefined. In
  94. // the above example, the name for fi (Finnish) is undefined.
  95. type header struct {
  96. data string
  97. index []uint16
  98. }
  99. // name looks up the name for a tag in the dictionary, given its index.
  100. func (h *header) name(i int) string {
  101. if 0 <= i && i < len(h.index)-1 {
  102. return h.data[h.index[i]:h.index[i+1]]
  103. }
  104. return ""
  105. }
  106. // tagSet is used to find the index of a language in a set of tags.
  107. type tagSet struct {
  108. single tagIndex
  109. long []string
  110. }
  111. var (
  112. langTagSet = tagSet{
  113. single: langIndex,
  114. long: langTagsLong,
  115. }
  116. // selfTagSet is used for indexing the language strings in their own
  117. // language.
  118. selfTagSet = tagSet{
  119. single: selfIndex,
  120. long: selfTagsLong,
  121. }
  122. zzzz = language.MustParseScript("Zzzz")
  123. zz = language.MustParseRegion("ZZ")
  124. )
  125. // index returns the index of the tag for the given base, script and region or
  126. // its parent if the tag is not available. If the match is for a parent entry,
  127. // the excess script and region are returned.
  128. func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) {
  129. lang := base.String()
  130. index := -1
  131. if (scr != language.Script{} || reg != language.Region{}) {
  132. if scr == zzzz {
  133. scr = language.Script{}
  134. }
  135. if reg == zz {
  136. reg = language.Region{}
  137. }
  138. i := sort.SearchStrings(ts.long, lang)
  139. // All entries have either a script or a region and not both.
  140. scrStr, regStr := scr.String(), reg.String()
  141. for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ {
  142. if s := ts.long[i][len(lang)+1:]; s == scrStr {
  143. scr = language.Script{}
  144. index = i + ts.single.len()
  145. break
  146. } else if s == regStr {
  147. reg = language.Region{}
  148. index = i + ts.single.len()
  149. break
  150. }
  151. }
  152. }
  153. if index == -1 {
  154. index = ts.single.index(lang)
  155. }
  156. return index, scr, reg
  157. }
  158. func (ts *tagSet) Tags() []language.Tag {
  159. tags := make([]language.Tag, 0, ts.single.len()+len(ts.long))
  160. ts.single.keys(func(s string) {
  161. tags = append(tags, language.Raw.MustParse(s))
  162. })
  163. for _, s := range ts.long {
  164. tags = append(tags, language.Raw.MustParse(s))
  165. }
  166. return tags
  167. }
  168. func supportedScripts() []language.Script {
  169. scr := make([]language.Script, 0, scriptIndex.len())
  170. scriptIndex.keys(func(s string) {
  171. scr = append(scr, language.MustParseScript(s))
  172. })
  173. return scr
  174. }
  175. func supportedRegions() []language.Region {
  176. reg := make([]language.Region, 0, regionIndex.len())
  177. regionIndex.keys(func(s string) {
  178. reg = append(reg, language.MustParseRegion(s))
  179. })
  180. return reg
  181. }
  182. // tagIndex holds a concatenated lists of subtags of length 2 to 4, one string
  183. // for each length, which can be used in combination with binary search to get
  184. // the index associated with a tag.
  185. // For example, a tagIndex{
  186. // "arenesfrruzh", // 6 2-byte tags.
  187. // "barwae", // 2 3-byte tags.
  188. // "",
  189. // }
  190. // would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag
  191. // "wae" had an index of 7.
  192. type tagIndex [3]string
  193. func (t *tagIndex) index(s string) int {
  194. sz := len(s)
  195. if sz < 2 || 4 < sz {
  196. return -1
  197. }
  198. a := t[sz-2]
  199. index := sort.Search(len(a)/sz, func(i int) bool {
  200. p := i * sz
  201. return a[p:p+sz] >= s
  202. })
  203. p := index * sz
  204. if end := p + sz; end > len(a) || a[p:end] != s {
  205. return -1
  206. }
  207. // Add the number of tags for smaller sizes.
  208. for i := 0; i < sz-2; i++ {
  209. index += len(t[i]) / (i + 2)
  210. }
  211. return index
  212. }
  213. // len returns the number of tags that are contained in the tagIndex.
  214. func (t *tagIndex) len() (n int) {
  215. for i, s := range t {
  216. n += len(s) / (i + 2)
  217. }
  218. return n
  219. }
  220. // keys calls f for each tag.
  221. func (t *tagIndex) keys(f func(key string)) {
  222. for i, s := range *t {
  223. for ; s != ""; s = s[i+2:] {
  224. f(s[:i+2])
  225. }
  226. }
  227. }