match.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package language
  5. import "errors"
  6. type scriptRegionFlags uint8
  7. const (
  8. isList = 1 << iota
  9. scriptInFrom
  10. regionInFrom
  11. )
  12. func (t *Tag) setUndefinedLang(id Language) {
  13. if t.LangID == 0 {
  14. t.LangID = id
  15. }
  16. }
  17. func (t *Tag) setUndefinedScript(id Script) {
  18. if t.ScriptID == 0 {
  19. t.ScriptID = id
  20. }
  21. }
  22. func (t *Tag) setUndefinedRegion(id Region) {
  23. if t.RegionID == 0 || t.RegionID.Contains(id) {
  24. t.RegionID = id
  25. }
  26. }
  27. // ErrMissingLikelyTagsData indicates no information was available
  28. // to compute likely values of missing tags.
  29. var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
  30. // addLikelySubtags sets subtags to their most likely value, given the locale.
  31. // In most cases this means setting fields for unknown values, but in some
  32. // cases it may alter a value. It returns an ErrMissingLikelyTagsData error
  33. // if the given locale cannot be expanded.
  34. func (t Tag) addLikelySubtags() (Tag, error) {
  35. id, err := addTags(t)
  36. if err != nil {
  37. return t, err
  38. } else if id.equalTags(t) {
  39. return t, nil
  40. }
  41. id.RemakeString()
  42. return id, nil
  43. }
  44. // specializeRegion attempts to specialize a group region.
  45. func specializeRegion(t *Tag) bool {
  46. if i := regionInclusion[t.RegionID]; i < nRegionGroups {
  47. x := likelyRegionGroup[i]
  48. if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
  49. t.RegionID = Region(x.region)
  50. }
  51. return true
  52. }
  53. return false
  54. }
  55. // Maximize returns a new tag with missing tags filled in.
  56. func (t Tag) Maximize() (Tag, error) {
  57. return addTags(t)
  58. }
  59. func addTags(t Tag) (Tag, error) {
  60. // We leave private use identifiers alone.
  61. if t.IsPrivateUse() {
  62. return t, nil
  63. }
  64. if t.ScriptID != 0 && t.RegionID != 0 {
  65. if t.LangID != 0 {
  66. // already fully specified
  67. specializeRegion(&t)
  68. return t, nil
  69. }
  70. // Search matches for und-script-region. Note that for these cases
  71. // region will never be a group so there is no need to check for this.
  72. list := likelyRegion[t.RegionID : t.RegionID+1]
  73. if x := list[0]; x.flags&isList != 0 {
  74. list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
  75. }
  76. for _, x := range list {
  77. // Deviating from the spec. See match_test.go for details.
  78. if Script(x.script) == t.ScriptID {
  79. t.setUndefinedLang(Language(x.lang))
  80. return t, nil
  81. }
  82. }
  83. }
  84. if t.LangID != 0 {
  85. // Search matches for lang-script and lang-region, where lang != und.
  86. if t.LangID < langNoIndexOffset {
  87. x := likelyLang[t.LangID]
  88. if x.flags&isList != 0 {
  89. list := likelyLangList[x.region : x.region+uint16(x.script)]
  90. if t.ScriptID != 0 {
  91. for _, x := range list {
  92. if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
  93. t.setUndefinedRegion(Region(x.region))
  94. return t, nil
  95. }
  96. }
  97. } else if t.RegionID != 0 {
  98. count := 0
  99. goodScript := true
  100. tt := t
  101. for _, x := range list {
  102. // We visit all entries for which the script was not
  103. // defined, including the ones where the region was not
  104. // defined. This allows for proper disambiguation within
  105. // regions.
  106. if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
  107. tt.RegionID = Region(x.region)
  108. tt.setUndefinedScript(Script(x.script))
  109. goodScript = goodScript && tt.ScriptID == Script(x.script)
  110. count++
  111. }
  112. }
  113. if count == 1 {
  114. return tt, nil
  115. }
  116. // Even if we fail to find a unique Region, we might have
  117. // an unambiguous script.
  118. if goodScript {
  119. t.ScriptID = tt.ScriptID
  120. }
  121. }
  122. }
  123. }
  124. } else {
  125. // Search matches for und-script.
  126. if t.ScriptID != 0 {
  127. x := likelyScript[t.ScriptID]
  128. if x.region != 0 {
  129. t.setUndefinedRegion(Region(x.region))
  130. t.setUndefinedLang(Language(x.lang))
  131. return t, nil
  132. }
  133. }
  134. // Search matches for und-region. If und-script-region exists, it would
  135. // have been found earlier.
  136. if t.RegionID != 0 {
  137. if i := regionInclusion[t.RegionID]; i < nRegionGroups {
  138. x := likelyRegionGroup[i]
  139. if x.region != 0 {
  140. t.setUndefinedLang(Language(x.lang))
  141. t.setUndefinedScript(Script(x.script))
  142. t.RegionID = Region(x.region)
  143. }
  144. } else {
  145. x := likelyRegion[t.RegionID]
  146. if x.flags&isList != 0 {
  147. x = likelyRegionList[x.lang]
  148. }
  149. if x.script != 0 && x.flags != scriptInFrom {
  150. t.setUndefinedLang(Language(x.lang))
  151. t.setUndefinedScript(Script(x.script))
  152. return t, nil
  153. }
  154. }
  155. }
  156. }
  157. // Search matches for lang.
  158. if t.LangID < langNoIndexOffset {
  159. x := likelyLang[t.LangID]
  160. if x.flags&isList != 0 {
  161. x = likelyLangList[x.region]
  162. }
  163. if x.region != 0 {
  164. t.setUndefinedScript(Script(x.script))
  165. t.setUndefinedRegion(Region(x.region))
  166. }
  167. specializeRegion(&t)
  168. if t.LangID == 0 {
  169. t.LangID = _en // default language
  170. }
  171. return t, nil
  172. }
  173. return t, ErrMissingLikelyTagsData
  174. }
  175. func (t *Tag) setTagsFrom(id Tag) {
  176. t.LangID = id.LangID
  177. t.ScriptID = id.ScriptID
  178. t.RegionID = id.RegionID
  179. }
  180. // minimize removes the region or script subtags from t such that
  181. // t.addLikelySubtags() == t.minimize().addLikelySubtags().
  182. func (t Tag) minimize() (Tag, error) {
  183. t, err := minimizeTags(t)
  184. if err != nil {
  185. return t, err
  186. }
  187. t.RemakeString()
  188. return t, nil
  189. }
  190. // minimizeTags mimics the behavior of the ICU 51 C implementation.
  191. func minimizeTags(t Tag) (Tag, error) {
  192. if t.equalTags(Und) {
  193. return t, nil
  194. }
  195. max, err := addTags(t)
  196. if err != nil {
  197. return t, err
  198. }
  199. for _, id := range [...]Tag{
  200. {LangID: t.LangID},
  201. {LangID: t.LangID, RegionID: t.RegionID},
  202. {LangID: t.LangID, ScriptID: t.ScriptID},
  203. } {
  204. if x, err := addTags(id); err == nil && max.equalTags(x) {
  205. t.setTagsFrom(id)
  206. break
  207. }
  208. }
  209. return t, nil
  210. }