lookup_test.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package language
  5. import (
  6. "testing"
  7. "golang.org/x/text/internal/tag"
  8. )
  9. func b(s string) []byte {
  10. return []byte(s)
  11. }
  12. func TestLangID(t *testing.T) {
  13. tests := []struct {
  14. id, bcp47, iso3, norm string
  15. err error
  16. }{
  17. {id: "", bcp47: "und", iso3: "und", err: ErrSyntax},
  18. {id: " ", bcp47: "und", iso3: "und", err: ErrSyntax},
  19. {id: " ", bcp47: "und", iso3: "und", err: ErrSyntax},
  20. {id: " ", bcp47: "und", iso3: "und", err: ErrSyntax},
  21. {id: "xxx", bcp47: "und", iso3: "und", err: NewValueError([]byte("xxx"))},
  22. {id: "und", bcp47: "und", iso3: "und"},
  23. {id: "aju", bcp47: "aju", iso3: "aju", norm: "jrb"},
  24. {id: "jrb", bcp47: "jrb", iso3: "jrb"},
  25. {id: "es", bcp47: "es", iso3: "spa"},
  26. {id: "spa", bcp47: "es", iso3: "spa"},
  27. {id: "ji", bcp47: "ji", iso3: "yid-", norm: "yi"},
  28. {id: "jw", bcp47: "jw", iso3: "jav-", norm: "jv"},
  29. {id: "ar", bcp47: "ar", iso3: "ara"},
  30. {id: "kw", bcp47: "kw", iso3: "cor"},
  31. {id: "arb", bcp47: "arb", iso3: "arb", norm: "ar"},
  32. {id: "ar", bcp47: "ar", iso3: "ara"},
  33. {id: "kur", bcp47: "ku", iso3: "kur"},
  34. {id: "nl", bcp47: "nl", iso3: "nld"},
  35. {id: "NL", bcp47: "nl", iso3: "nld"},
  36. {id: "gsw", bcp47: "gsw", iso3: "gsw"},
  37. {id: "gSW", bcp47: "gsw", iso3: "gsw"},
  38. {id: "und", bcp47: "und", iso3: "und"},
  39. {id: "sh", bcp47: "sh", iso3: "hbs", norm: "sr"},
  40. {id: "hbs", bcp47: "sh", iso3: "hbs", norm: "sr"},
  41. {id: "no", bcp47: "no", iso3: "nor", norm: "no"},
  42. {id: "nor", bcp47: "no", iso3: "nor", norm: "no"},
  43. {id: "cmn", bcp47: "cmn", iso3: "cmn", norm: "zh"},
  44. }
  45. for i, tt := range tests {
  46. want, err := getLangID(b(tt.id))
  47. if err != tt.err {
  48. t.Errorf("%d:err(%s): found %q; want %q", i, tt.id, err, tt.err)
  49. }
  50. if err != nil {
  51. continue
  52. }
  53. if id, _ := getLangISO2(b(tt.bcp47)); len(tt.bcp47) == 2 && want != id {
  54. t.Errorf("%d:getISO2(%s): found %v; want %v", i, tt.bcp47, id, want)
  55. }
  56. if len(tt.iso3) == 3 {
  57. if id, _ := getLangISO3(b(tt.iso3)); want != id {
  58. t.Errorf("%d:getISO3(%s): found %q; want %q", i, tt.iso3, id, want)
  59. }
  60. if id, _ := getLangID(b(tt.iso3)); want != id {
  61. t.Errorf("%d:getID3(%s): found %v; want %v", i, tt.iso3, id, want)
  62. }
  63. }
  64. norm := want
  65. if tt.norm != "" {
  66. norm, _ = getLangID(b(tt.norm))
  67. }
  68. id, _ := normLang(want)
  69. if id != norm {
  70. t.Errorf("%d:norm(%s): found %v; want %v", i, tt.id, id, norm)
  71. }
  72. if id := want.String(); tt.bcp47 != id {
  73. t.Errorf("%d:String(): found %s; want %s", i, id, tt.bcp47)
  74. }
  75. if id := want.ISO3(); tt.iso3[:3] != id {
  76. t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3[:3])
  77. }
  78. }
  79. }
  80. func TestGrandfathered(t *testing.T) {
  81. for _, tt := range []struct{ in, out string }{
  82. {"art-lojban", "jbo"},
  83. {"i-ami", "ami"},
  84. {"i-bnn", "bnn"},
  85. {"i-hak", "hak"},
  86. {"i-klingon", "tlh"},
  87. {"i-lux", "lb"},
  88. {"i-navajo", "nv"},
  89. {"i-pwn", "pwn"},
  90. {"i-tao", "tao"},
  91. {"i-tay", "tay"},
  92. {"i-tsu", "tsu"},
  93. {"no-bok", "nb"},
  94. {"no-nyn", "nn"},
  95. {"sgn-BE-FR", "sfb"},
  96. {"sgn-BE-NL", "vgt"},
  97. {"sgn-CH-DE", "sgg"},
  98. {"sgn-ch-de", "sgg"},
  99. {"zh-guoyu", "cmn"},
  100. {"zh-hakka", "hak"},
  101. {"zh-min-nan", "nan"},
  102. {"zh-xiang", "hsn"},
  103. // Grandfathered tags with no modern replacement will be converted as follows:
  104. {"cel-gaulish", "xtg-x-cel-gaulish"},
  105. {"en-GB-oed", "en-GB-oxendict"},
  106. {"en-gb-oed", "en-GB-oxendict"},
  107. {"i-default", "en-x-i-default"},
  108. {"i-enochian", "und-x-i-enochian"},
  109. {"i-mingo", "see-x-i-mingo"},
  110. {"zh-min", "nan-x-zh-min"},
  111. {"root", "und"},
  112. {"en_US_POSIX", "en-US-u-va-posix"},
  113. {"en_us_posix", "en-US-u-va-posix"},
  114. {"en-us-posix", "en-US-u-va-posix"},
  115. } {
  116. got := Make(tt.in)
  117. want := MustParse(tt.out)
  118. if got != want {
  119. t.Errorf("%s: got %q; want %q", tt.in, got, want)
  120. }
  121. }
  122. }
  123. func TestRegionID(t *testing.T) {
  124. tests := []struct {
  125. in, out string
  126. }{
  127. {"_ ", ""},
  128. {"_000", ""},
  129. {"419", "419"},
  130. {"AA", "AA"},
  131. {"ATF", "TF"},
  132. {"HV", "HV"},
  133. {"CT", "CT"},
  134. {"DY", "DY"},
  135. {"IC", "IC"},
  136. {"FQ", "FQ"},
  137. {"JT", "JT"},
  138. {"ZZ", "ZZ"},
  139. {"EU", "EU"},
  140. {"QO", "QO"},
  141. {"FX", "FX"},
  142. }
  143. for i, tt := range tests {
  144. if tt.in[0] == '_' {
  145. id := tt.in[1:]
  146. if _, err := getRegionID(b(id)); err == nil {
  147. t.Errorf("%d:err(%s): found nil; want error", i, id)
  148. }
  149. continue
  150. }
  151. want, _ := getRegionID(b(tt.in))
  152. if s := want.String(); s != tt.out {
  153. t.Errorf("%d:%s: found %q; want %q", i, tt.in, s, tt.out)
  154. }
  155. if len(tt.in) == 2 {
  156. want, _ := getRegionISO2(b(tt.in))
  157. if s := want.String(); s != tt.out {
  158. t.Errorf("%d:getISO2(%s): found %q; want %q", i, tt.in, s, tt.out)
  159. }
  160. }
  161. }
  162. }
  163. func TestRegionType(t *testing.T) {
  164. for _, tt := range []struct {
  165. r string
  166. t byte
  167. }{
  168. {"NL", bcp47Region | ccTLD},
  169. {"EU", bcp47Region | ccTLD}, // exceptionally reserved
  170. {"AN", bcp47Region | ccTLD}, // transitionally reserved
  171. {"DD", bcp47Region}, // deleted in ISO, deprecated in BCP 47
  172. {"NT", bcp47Region}, // transitionally reserved, deprecated in BCP 47
  173. {"XA", iso3166UserAssigned | bcp47Region},
  174. {"ZZ", iso3166UserAssigned | bcp47Region},
  175. {"AA", iso3166UserAssigned | bcp47Region},
  176. {"QO", iso3166UserAssigned | bcp47Region},
  177. {"QM", iso3166UserAssigned | bcp47Region},
  178. {"XK", iso3166UserAssigned | bcp47Region},
  179. {"CT", 0}, // deleted in ISO, not in BCP 47, canonicalized in CLDR
  180. } {
  181. r := MustParseRegion(tt.r)
  182. if tp := r.typ(); tp != tt.t {
  183. t.Errorf("Type(%s): got %x; want %x", tt.r, tp, tt.t)
  184. }
  185. }
  186. }
  187. func TestRegionISO3(t *testing.T) {
  188. tests := []struct {
  189. from, iso3, to string
  190. }{
  191. {" ", "ZZZ", "ZZ"},
  192. {"000", "ZZZ", "ZZ"},
  193. {"AA", "AAA", ""},
  194. {"CT", "CTE", ""},
  195. {"DY", "DHY", ""},
  196. {"EU", "QUU", ""},
  197. {"HV", "HVO", ""},
  198. {"IC", "ZZZ", "ZZ"},
  199. {"JT", "JTN", ""},
  200. {"PZ", "PCZ", ""},
  201. {"QU", "QUU", "EU"},
  202. {"QO", "QOO", ""},
  203. {"YD", "YMD", ""},
  204. {"FQ", "ATF", "TF"},
  205. {"TF", "ATF", ""},
  206. {"FX", "FXX", ""},
  207. {"ZZ", "ZZZ", ""},
  208. {"419", "ZZZ", "ZZ"},
  209. }
  210. for _, tt := range tests {
  211. r, _ := getRegionID(b(tt.from))
  212. if s := r.ISO3(); s != tt.iso3 {
  213. t.Errorf("iso3(%q): found %q; want %q", tt.from, s, tt.iso3)
  214. }
  215. if tt.iso3 == "" {
  216. continue
  217. }
  218. want := tt.to
  219. if tt.to == "" {
  220. want = tt.from
  221. }
  222. r, _ = getRegionID(b(want))
  223. if id, _ := getRegionISO3(b(tt.iso3)); id != r {
  224. t.Errorf("%s: found %q; want %q", tt.iso3, id, want)
  225. }
  226. }
  227. }
  228. func TestRegionM49(t *testing.T) {
  229. fromTests := []struct {
  230. m49 int
  231. id string
  232. }{
  233. {0, ""},
  234. {-1, ""},
  235. {1000, ""},
  236. {10000, ""},
  237. {001, "001"},
  238. {104, "MM"},
  239. {180, "CD"},
  240. {230, "ET"},
  241. {231, "ET"},
  242. {249, "FX"},
  243. {250, "FR"},
  244. {276, "DE"},
  245. {278, "DD"},
  246. {280, "DE"},
  247. {419, "419"},
  248. {626, "TL"},
  249. {736, "SD"},
  250. {840, "US"},
  251. {854, "BF"},
  252. {891, "CS"},
  253. {899, ""},
  254. {958, "AA"},
  255. {966, "QT"},
  256. {967, "EU"},
  257. {999, "ZZ"},
  258. }
  259. for _, tt := range fromTests {
  260. id, err := getRegionM49(tt.m49)
  261. if want, have := err != nil, tt.id == ""; want != have {
  262. t.Errorf("error(%d): have %v; want %v", tt.m49, have, want)
  263. continue
  264. }
  265. r, _ := getRegionID(b(tt.id))
  266. if r != id {
  267. t.Errorf("region(%d): have %s; want %s", tt.m49, id, r)
  268. }
  269. }
  270. toTests := []struct {
  271. m49 int
  272. id string
  273. }{
  274. {0, "000"},
  275. {0, "IC"}, // Some codes don't have an ID
  276. {001, "001"},
  277. {104, "MM"},
  278. {104, "BU"},
  279. {180, "CD"},
  280. {180, "ZR"},
  281. {231, "ET"},
  282. {250, "FR"},
  283. {249, "FX"},
  284. {276, "DE"},
  285. {278, "DD"},
  286. {419, "419"},
  287. {626, "TL"},
  288. {626, "TP"},
  289. {729, "SD"},
  290. {826, "GB"},
  291. {840, "US"},
  292. {854, "BF"},
  293. {891, "YU"},
  294. {891, "CS"},
  295. {958, "AA"},
  296. {966, "QT"},
  297. {967, "EU"},
  298. {967, "QU"},
  299. {999, "ZZ"},
  300. // For codes that don't have an M49 code use the replacement value,
  301. // if available.
  302. {854, "HV"}, // maps to Burkino Faso
  303. }
  304. for _, tt := range toTests {
  305. r, _ := getRegionID(b(tt.id))
  306. if r.M49() != tt.m49 {
  307. t.Errorf("m49(%q): have %d; want %d", tt.id, r.M49(), tt.m49)
  308. }
  309. }
  310. }
  311. func TestRegionDeprecation(t *testing.T) {
  312. tests := []struct{ in, out string }{
  313. {"BU", "MM"},
  314. {"BUR", "MM"},
  315. {"CT", "KI"},
  316. {"DD", "DE"},
  317. {"DDR", "DE"},
  318. {"DY", "BJ"},
  319. {"FX", "FR"},
  320. {"HV", "BF"},
  321. {"JT", "UM"},
  322. {"MI", "UM"},
  323. {"NH", "VU"},
  324. {"NQ", "AQ"},
  325. {"PU", "UM"},
  326. {"PZ", "PA"},
  327. {"QU", "EU"},
  328. {"RH", "ZW"},
  329. {"TP", "TL"},
  330. {"UK", "GB"},
  331. {"VD", "VN"},
  332. {"WK", "UM"},
  333. {"YD", "YE"},
  334. {"NL", "NL"},
  335. }
  336. for _, tt := range tests {
  337. rIn, _ := getRegionID([]byte(tt.in))
  338. rOut, _ := getRegionISO2([]byte(tt.out))
  339. r := normRegion(rIn)
  340. if rOut == rIn && r != 0 {
  341. t.Errorf("%s: was %q; want %q", tt.in, r, tt.in)
  342. }
  343. if rOut != rIn && r != rOut {
  344. t.Errorf("%s: was %q; want %q", tt.in, r, tt.out)
  345. }
  346. }
  347. }
  348. func TestGetScriptID(t *testing.T) {
  349. idx := tag.Index("0000BbbbDdddEeeeZzzz\xff\xff\xff\xff")
  350. tests := []struct {
  351. in string
  352. out Script
  353. }{
  354. {" ", 0},
  355. {" ", 0},
  356. {" ", 0},
  357. {"", 0},
  358. {"Aaaa", 0},
  359. {"Bbbb", 1},
  360. {"Dddd", 2},
  361. {"dddd", 2},
  362. {"dDDD", 2},
  363. {"Eeee", 3},
  364. {"Zzzz", 4},
  365. }
  366. for i, tt := range tests {
  367. if id, err := getScriptID(idx, b(tt.in)); id != tt.out {
  368. t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
  369. } else if id == 0 && err == nil {
  370. t.Errorf("%d:%s: no error; expected one", i, tt.in)
  371. }
  372. }
  373. }
  374. func TestIsPrivateUse(t *testing.T) {
  375. type test struct {
  376. s string
  377. private bool
  378. }
  379. tests := []test{
  380. {"en", false},
  381. {"und", false},
  382. {"pzn", false},
  383. {"qaa", true},
  384. {"qtz", true},
  385. {"qua", false},
  386. }
  387. for i, tt := range tests {
  388. x, _ := getLangID([]byte(tt.s))
  389. if b := x.IsPrivateUse(); b != tt.private {
  390. t.Errorf("%d: langID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
  391. }
  392. }
  393. tests = []test{
  394. {"001", false},
  395. {"419", false},
  396. {"899", false},
  397. {"900", false},
  398. {"957", false},
  399. {"958", true},
  400. {"AA", true},
  401. {"AC", false},
  402. {"EU", false}, // CLDR grouping, exceptionally reserved in ISO.
  403. {"QU", true}, // Canonicalizes to EU, User-assigned in ISO.
  404. {"QO", true}, // CLDR grouping, User-assigned in ISO.
  405. {"QA", false},
  406. {"QM", true},
  407. {"QZ", true},
  408. {"XA", true},
  409. {"XK", true}, // Assigned to Kosovo in CLDR, User-assigned in ISO.
  410. {"XZ", true},
  411. {"ZW", false},
  412. {"ZZ", true},
  413. }
  414. for i, tt := range tests {
  415. x, _ := getRegionID([]byte(tt.s))
  416. if b := x.IsPrivateUse(); b != tt.private {
  417. t.Errorf("%d: regionID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
  418. }
  419. }
  420. tests = []test{
  421. {"Latn", false},
  422. {"Laaa", false}, // invalid
  423. {"Qaaa", true},
  424. {"Qabx", true},
  425. {"Qaby", false},
  426. {"Zyyy", false},
  427. {"Zzzz", false},
  428. }
  429. for i, tt := range tests {
  430. x, _ := getScriptID(script, []byte(tt.s))
  431. if b := x.IsPrivateUse(); b != tt.private {
  432. t.Errorf("%d: scriptID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
  433. }
  434. }
  435. }