language.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:generate go run gen.go gen_index.go -output tables.go
  5. //go:generate go run gen_parents.go
  6. package compact
  7. // TODO: Remove above NOTE after:
  8. // - verifying that tables are dropped correctly (most notably matcher tables).
  9. import (
  10. "strings"
  11. "golang.org/x/text/internal/language"
  12. )
  13. // Tag represents a BCP 47 language tag. It is used to specify an instance of a
  14. // specific language or locale. All language tag values are guaranteed to be
  15. // well-formed.
  16. type Tag struct {
  17. // NOTE: exported tags will become part of the public API.
  18. language ID
  19. locale ID
  20. full fullTag // always a language.Tag for now.
  21. }
  22. const _und = 0
  23. type fullTag interface {
  24. IsRoot() bool
  25. Parent() language.Tag
  26. }
  27. // Make a compact Tag from a fully specified internal language Tag.
  28. func Make(t language.Tag) (tag Tag) {
  29. if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
  30. if r, err := language.ParseRegion(region[:2]); err == nil {
  31. tFull := t
  32. t, _ = t.SetTypeForKey("rg", "")
  33. // TODO: should we not consider "va" for the language tag?
  34. var exact1, exact2 bool
  35. tag.language, exact1 = FromTag(t)
  36. t.RegionID = r
  37. tag.locale, exact2 = FromTag(t)
  38. if !exact1 || !exact2 {
  39. tag.full = tFull
  40. }
  41. return tag
  42. }
  43. }
  44. lang, ok := FromTag(t)
  45. tag.language = lang
  46. tag.locale = lang
  47. if !ok {
  48. tag.full = t
  49. }
  50. return tag
  51. }
  52. // Tag returns an internal language Tag version of this tag.
  53. func (t Tag) Tag() language.Tag {
  54. if t.full != nil {
  55. return t.full.(language.Tag)
  56. }
  57. tag := t.language.Tag()
  58. if t.language != t.locale {
  59. loc := t.locale.Tag()
  60. tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
  61. }
  62. return tag
  63. }
  64. // IsCompact reports whether this tag is fully defined in terms of ID.
  65. func (t *Tag) IsCompact() bool {
  66. return t.full == nil
  67. }
  68. // MayHaveVariants reports whether a tag may have variants. If it returns false
  69. // it is guaranteed the tag does not have variants.
  70. func (t Tag) MayHaveVariants() bool {
  71. return t.full != nil || int(t.language) >= len(coreTags)
  72. }
  73. // MayHaveExtensions reports whether a tag may have extensions. If it returns
  74. // false it is guaranteed the tag does not have them.
  75. func (t Tag) MayHaveExtensions() bool {
  76. return t.full != nil ||
  77. int(t.language) >= len(coreTags) ||
  78. t.language != t.locale
  79. }
  80. // IsRoot returns true if t is equal to language "und".
  81. func (t Tag) IsRoot() bool {
  82. if t.full != nil {
  83. return t.full.IsRoot()
  84. }
  85. return t.language == _und
  86. }
  87. // Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
  88. // specific language are substituted with fields from the parent language.
  89. // The parent for a language may change for newer versions of CLDR.
  90. func (t Tag) Parent() Tag {
  91. if t.full != nil {
  92. return Make(t.full.Parent())
  93. }
  94. if t.language != t.locale {
  95. // Simulate stripping -u-rg-xxxxxx
  96. return Tag{language: t.language, locale: t.language}
  97. }
  98. // TODO: use parent lookup table once cycle from internal package is
  99. // removed. Probably by internalizing the table and declaring this fast
  100. // enough.
  101. // lang := compactID(internal.Parent(uint16(t.language)))
  102. lang, _ := FromTag(t.language.Tag().Parent())
  103. return Tag{language: lang, locale: lang}
  104. }
  105. // returns token t and the rest of the string.
  106. func nextToken(s string) (t, tail string) {
  107. p := strings.Index(s[1:], "-")
  108. if p == -1 {
  109. return s[1:], ""
  110. }
  111. p++
  112. return s[1:p], s[p:]
  113. }
  114. // LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
  115. // for which data exists in the text repository.The index will change over time
  116. // and should not be stored in persistent storage. If t does not match a compact
  117. // index, exact will be false and the compact index will be returned for the
  118. // first match after repeatedly taking the Parent of t.
  119. func LanguageID(t Tag) (id ID, exact bool) {
  120. return t.language, t.full == nil
  121. }
  122. // RegionalID returns the ID for the regional variant of this tag. This index is
  123. // used to indicate region-specific overrides, such as default currency, default
  124. // calendar and week data, default time cycle, and default measurement system
  125. // and unit preferences.
  126. //
  127. // For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
  128. // settings for currency, number formatting, etc. The CompactIndex for this tag
  129. // will be that for en-GB, while the RegionalID will be the one corresponding to
  130. // en-US.
  131. func RegionalID(t Tag) (id ID, exact bool) {
  132. return t.locale, t.full == nil
  133. }
  134. // LanguageTag returns t stripped of regional variant indicators.
  135. //
  136. // At the moment this means it is stripped of a regional and variant subtag "rg"
  137. // and "va" in the "u" extension.
  138. func (t Tag) LanguageTag() Tag {
  139. if t.full == nil {
  140. return Tag{language: t.language, locale: t.language}
  141. }
  142. tt := t.Tag()
  143. tt.SetTypeForKey("rg", "")
  144. tt.SetTypeForKey("va", "")
  145. return Make(tt)
  146. }
  147. // RegionalTag returns the regional variant of the tag.
  148. //
  149. // At the moment this means that the region is set from the regional subtag
  150. // "rg" in the "u" extension.
  151. func (t Tag) RegionalTag() Tag {
  152. rt := Tag{language: t.locale, locale: t.locale}
  153. if t.full == nil {
  154. return rt
  155. }
  156. b := language.Builder{}
  157. tag := t.Tag()
  158. // tag, _ = tag.SetTypeForKey("rg", "")
  159. b.SetTag(t.locale.Tag())
  160. if v := tag.Variants(); v != "" {
  161. for _, v := range strings.Split(v, "-") {
  162. b.AddVariant(v)
  163. }
  164. }
  165. for _, e := range tag.Extensions() {
  166. b.AddExt(e)
  167. }
  168. return t
  169. }
  170. // FromTag reports closest matching ID for an internal language Tag.
  171. func FromTag(t language.Tag) (id ID, exact bool) {
  172. // TODO: perhaps give more frequent tags a lower index.
  173. // TODO: we could make the indexes stable. This will excluded some
  174. // possibilities for optimization, so don't do this quite yet.
  175. exact = true
  176. b, s, r := t.Raw()
  177. if t.HasString() {
  178. if t.IsPrivateUse() {
  179. // We have no entries for user-defined tags.
  180. return 0, false
  181. }
  182. hasExtra := false
  183. if t.HasVariants() {
  184. if t.HasExtensions() {
  185. build := language.Builder{}
  186. build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
  187. build.AddVariant(t.Variants())
  188. exact = false
  189. t = build.Make()
  190. }
  191. hasExtra = true
  192. } else if _, ok := t.Extension('u'); ok {
  193. // TODO: va may mean something else. Consider not considering it.
  194. // Strip all but the 'va' entry.
  195. old := t
  196. variant := t.TypeForKey("va")
  197. t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
  198. if variant != "" {
  199. t, _ = t.SetTypeForKey("va", variant)
  200. hasExtra = true
  201. }
  202. exact = old == t
  203. } else {
  204. exact = false
  205. }
  206. if hasExtra {
  207. // We have some variants.
  208. for i, s := range specialTags {
  209. if s == t {
  210. return ID(i + len(coreTags)), exact
  211. }
  212. }
  213. exact = false
  214. }
  215. }
  216. if x, ok := getCoreIndex(t); ok {
  217. return x, exact
  218. }
  219. exact = false
  220. if r != 0 && s == 0 {
  221. // Deal with cases where an extra script is inserted for the region.
  222. t, _ := t.Maximize()
  223. if x, ok := getCoreIndex(t); ok {
  224. return x, exact
  225. }
  226. }
  227. for t = t.Parent(); t != root; t = t.Parent() {
  228. // No variants specified: just compare core components.
  229. // The key has the form lllssrrr, where l, s, and r are nibbles for
  230. // respectively the langID, scriptID, and regionID.
  231. if x, ok := getCoreIndex(t); ok {
  232. return x, exact
  233. }
  234. }
  235. return 0, exact
  236. }
  237. var root = language.Tag{}