language_test.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package language
  5. import (
  6. "reflect"
  7. "testing"
  8. )
  9. func TestTagSize(t *testing.T) {
  10. id := Tag{}
  11. typ := reflect.TypeOf(id)
  12. if typ.Size() > 24 {
  13. t.Errorf("size of Tag was %d; want 24", typ.Size())
  14. }
  15. }
  16. func TestIsRoot(t *testing.T) {
  17. loc := Tag{}
  18. if !loc.IsRoot() {
  19. t.Errorf("unspecified should be root.")
  20. }
  21. for i, tt := range parseTests() {
  22. loc, _ := Parse(tt.in)
  23. undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
  24. if loc.IsRoot() != undef {
  25. t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
  26. }
  27. }
  28. }
  29. func TestEquality(t *testing.T) {
  30. for i, tt := range parseTests() {
  31. s := tt.in
  32. tag := Make(s)
  33. t1 := Make(tag.String())
  34. if tag != t1 {
  35. t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
  36. }
  37. t2, _ := Compose(tag)
  38. if tag != t2 {
  39. t.Errorf("%d:%s: equality test 2 failed\n got: %#v\nwant: %#v", i, s, t2, tag)
  40. }
  41. }
  42. }
  43. func TestString(t *testing.T) {
  44. tests := []string{
  45. "no-u-rg-dkzzzz",
  46. }
  47. for i, s := range tests {
  48. tag := Make(s)
  49. if tag.String() != s {
  50. t.Errorf("%d:%s: got %s: want %s (%#v)", i, s, tag.String(), s, tag)
  51. }
  52. }
  53. }
  54. func TestMarshal(t *testing.T) {
  55. testCases := []string{
  56. // TODO: these values will change with each CLDR update. This issue
  57. // will be solved if we decide to fix the indexes.
  58. "und",
  59. "ca-ES-valencia",
  60. "ca-ES-valencia-u-va-posix",
  61. "ca-ES-valencia-u-co-phonebk",
  62. "ca-ES-valencia-u-co-phonebk-va-posix",
  63. "x-klingon",
  64. "en-US",
  65. "en-US-u-va-posix",
  66. "en",
  67. "en-u-co-phonebk",
  68. "en-001",
  69. "sh",
  70. "en-GB-u-rg-uszzzz",
  71. "en-GB-u-rg-uszzzz-va-posix",
  72. "en-GB-u-co-phonebk-rg-uszzzz",
  73. // Invalid tags should also roundtrip.
  74. "en-GB-u-co-phonebk-rg-uszz",
  75. }
  76. for _, tc := range testCases {
  77. var tag Tag
  78. err := tag.UnmarshalText([]byte(tc))
  79. if err != nil {
  80. t.Errorf("UnmarshalText(%q): unexpected error: %v", tc, err)
  81. }
  82. b, err := tag.MarshalText()
  83. if err != nil {
  84. t.Errorf("MarshalText(%q): unexpected error: %v", tc, err)
  85. }
  86. if got := string(b); got != tc {
  87. t.Errorf("%s: got %q; want %q", tc, got, tc)
  88. }
  89. }
  90. }
  91. func TestBase(t *testing.T) {
  92. tests := []struct {
  93. loc, lang string
  94. conf Confidence
  95. }{
  96. {"und", "en", Low},
  97. {"x-abc", "und", No},
  98. {"en", "en", Exact},
  99. {"und-Cyrl", "ru", High},
  100. // If a region is not included, the official language should be English.
  101. {"und-US", "en", High},
  102. // TODO: not-explicitly listed scripts should probably be und, No
  103. // Modify addTags to return info on how the match was derived.
  104. // {"und-Aghb", "und", No},
  105. }
  106. for i, tt := range tests {
  107. loc, _ := Parse(tt.loc)
  108. lang, conf := loc.Base()
  109. if lang.String() != tt.lang {
  110. t.Errorf("%d: language was %s; want %s", i, lang, tt.lang)
  111. }
  112. if conf != tt.conf {
  113. t.Errorf("%d: confidence was %d; want %d", i, conf, tt.conf)
  114. }
  115. }
  116. }
  117. func TestParseBase(t *testing.T) {
  118. tests := []struct {
  119. in string
  120. out string
  121. ok bool
  122. }{
  123. {"en", "en", true},
  124. {"EN", "en", true},
  125. {"nld", "nl", true},
  126. {"dut", "dut", true}, // bibliographic
  127. {"aaj", "und", false}, // unknown
  128. {"qaa", "qaa", true},
  129. {"a", "und", false},
  130. {"", "und", false},
  131. {"aaaa", "und", false},
  132. }
  133. for i, tt := range tests {
  134. x, err := ParseBase(tt.in)
  135. if x.String() != tt.out || err == nil != tt.ok {
  136. t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
  137. }
  138. if y, _, _ := Raw.Make(tt.out).Raw(); x != y {
  139. t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
  140. }
  141. }
  142. }
  143. func TestScript(t *testing.T) {
  144. tests := []struct {
  145. loc, scr string
  146. conf Confidence
  147. }{
  148. {"und", "Latn", Low},
  149. {"en-Latn", "Latn", Exact},
  150. {"en", "Latn", High},
  151. {"sr", "Cyrl", Low},
  152. {"kk", "Cyrl", High},
  153. {"kk-CN", "Arab", Low},
  154. {"cmn", "Hans", Low},
  155. {"ru", "Cyrl", High},
  156. {"ru-RU", "Cyrl", High},
  157. {"yue", "Hant", Low},
  158. {"x-abc", "Zzzz", Low},
  159. {"und-zyyy", "Zyyy", Exact},
  160. }
  161. for i, tt := range tests {
  162. loc, _ := Parse(tt.loc)
  163. sc, conf := loc.Script()
  164. if sc.String() != tt.scr {
  165. t.Errorf("%d:%s: script was %s; want %s", i, tt.loc, sc, tt.scr)
  166. }
  167. if conf != tt.conf {
  168. t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
  169. }
  170. }
  171. }
  172. func TestParseScript(t *testing.T) {
  173. tests := []struct {
  174. in string
  175. out string
  176. ok bool
  177. }{
  178. {"Latn", "Latn", true},
  179. {"zzzz", "Zzzz", true},
  180. {"zyyy", "Zyyy", true},
  181. {"Latm", "Zzzz", false},
  182. {"Zzz", "Zzzz", false},
  183. {"", "Zzzz", false},
  184. {"Zzzxx", "Zzzz", false},
  185. }
  186. for i, tt := range tests {
  187. x, err := ParseScript(tt.in)
  188. if x.String() != tt.out || err == nil != tt.ok {
  189. t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
  190. }
  191. if err == nil {
  192. if _, y, _ := Raw.Make("und-" + tt.out).Raw(); x != y {
  193. t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
  194. }
  195. }
  196. }
  197. }
  198. func TestRegion(t *testing.T) {
  199. tests := []struct {
  200. loc, reg string
  201. conf Confidence
  202. }{
  203. {"und", "US", Low},
  204. {"en", "US", Low},
  205. {"zh-Hant", "TW", Low},
  206. {"en-US", "US", Exact},
  207. {"cmn", "CN", Low},
  208. {"ru", "RU", Low},
  209. {"yue", "HK", Low},
  210. {"x-abc", "ZZ", Low},
  211. }
  212. for i, tt := range tests {
  213. loc, _ := Raw.Parse(tt.loc)
  214. reg, conf := loc.Region()
  215. if reg.String() != tt.reg {
  216. t.Errorf("%d:%s: region was %s; want %s", i, tt.loc, reg, tt.reg)
  217. }
  218. if conf != tt.conf {
  219. t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
  220. }
  221. }
  222. }
  223. func TestEncodeM49(t *testing.T) {
  224. tests := []struct {
  225. m49 int
  226. code string
  227. ok bool
  228. }{
  229. {1, "001", true},
  230. {840, "US", true},
  231. {899, "ZZ", false},
  232. }
  233. for i, tt := range tests {
  234. if r, err := EncodeM49(tt.m49); r.String() != tt.code || err == nil != tt.ok {
  235. t.Errorf("%d:%d: was %s, %v; want %s, %v", i, tt.m49, r, err == nil, tt.code, tt.ok)
  236. }
  237. }
  238. for i := 1; i <= 1000; i++ {
  239. if r, err := EncodeM49(i); err == nil && r.M49() == 0 {
  240. t.Errorf("%d has no error, but maps to undefined region", i)
  241. }
  242. }
  243. }
  244. func TestParseRegion(t *testing.T) {
  245. tests := []struct {
  246. in string
  247. out string
  248. ok bool
  249. }{
  250. {"001", "001", true},
  251. {"840", "US", true},
  252. {"899", "ZZ", false},
  253. {"USA", "US", true},
  254. {"US", "US", true},
  255. {"BC", "ZZ", false},
  256. {"C", "ZZ", false},
  257. {"CCCC", "ZZ", false},
  258. {"01", "ZZ", false},
  259. }
  260. for i, tt := range tests {
  261. r, err := ParseRegion(tt.in)
  262. if r.String() != tt.out || err == nil != tt.ok {
  263. t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
  264. }
  265. if err == nil {
  266. if _, _, y := Raw.Make("und-" + tt.out).Raw(); r != y {
  267. t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r, y)
  268. }
  269. }
  270. }
  271. }
  272. func TestIsCountry(t *testing.T) {
  273. tests := []struct {
  274. reg string
  275. country bool
  276. }{
  277. {"US", true},
  278. {"001", false},
  279. {"958", false},
  280. {"419", false},
  281. {"203", true},
  282. {"020", true},
  283. {"900", false},
  284. {"999", false},
  285. {"QO", false},
  286. {"EU", false},
  287. {"AA", false},
  288. {"XK", true},
  289. }
  290. for i, tt := range tests {
  291. r, _ := ParseRegion(tt.reg)
  292. if r.IsCountry() != tt.country {
  293. t.Errorf("%d: IsCountry(%s) was %v; want %v", i, tt.reg, r.IsCountry(), tt.country)
  294. }
  295. }
  296. }
  297. func TestIsGroup(t *testing.T) {
  298. tests := []struct {
  299. reg string
  300. group bool
  301. }{
  302. {"US", false},
  303. {"001", true},
  304. {"958", false},
  305. {"419", true},
  306. {"203", false},
  307. {"020", false},
  308. {"900", false},
  309. {"999", false},
  310. {"QO", true},
  311. {"EU", true},
  312. {"AA", false},
  313. {"XK", false},
  314. }
  315. for i, tt := range tests {
  316. r, _ := ParseRegion(tt.reg)
  317. if r.IsGroup() != tt.group {
  318. t.Errorf("%d: IsGroup(%s) was %v; want %v", i, tt.reg, r.IsGroup(), tt.group)
  319. }
  320. }
  321. }
  322. func TestContains(t *testing.T) {
  323. tests := []struct {
  324. enclosing, contained string
  325. contains bool
  326. }{
  327. // A region contains itself.
  328. {"US", "US", true},
  329. {"001", "001", true},
  330. // Direct containment.
  331. {"001", "002", true},
  332. {"039", "XK", true},
  333. {"150", "XK", true},
  334. {"EU", "AT", true},
  335. {"QO", "AQ", true},
  336. // Indirect containemnt.
  337. {"001", "US", true},
  338. {"001", "419", true},
  339. {"001", "013", true},
  340. // No containment.
  341. {"US", "001", false},
  342. {"155", "EU", false},
  343. }
  344. for i, tt := range tests {
  345. r := MustParseRegion(tt.enclosing)
  346. con := MustParseRegion(tt.contained)
  347. if got := r.Contains(con); got != tt.contains {
  348. t.Errorf("%d: %s.Contains(%s) was %v; want %v", i, tt.enclosing, tt.contained, got, tt.contains)
  349. }
  350. }
  351. }
  352. func TestRegionCanonicalize(t *testing.T) {
  353. for i, tt := range []struct{ in, out string }{
  354. {"UK", "GB"},
  355. {"TP", "TL"},
  356. {"QU", "EU"},
  357. {"SU", "SU"},
  358. {"VD", "VN"},
  359. {"DD", "DE"},
  360. } {
  361. r := MustParseRegion(tt.in)
  362. want := MustParseRegion(tt.out)
  363. if got := r.Canonicalize(); got != want {
  364. t.Errorf("%d: got %v; want %v", i, got, want)
  365. }
  366. }
  367. }
  368. func TestRegionTLD(t *testing.T) {
  369. for _, tt := range []struct {
  370. in, out string
  371. ok bool
  372. }{
  373. {"EH", "EH", true},
  374. {"FR", "FR", true},
  375. {"TL", "TL", true},
  376. // In ccTLD before in ISO.
  377. {"GG", "GG", true},
  378. // Non-standard assignment of ccTLD to ISO code.
  379. {"GB", "UK", true},
  380. // Exceptionally reserved in ISO and valid ccTLD.
  381. {"UK", "UK", true},
  382. {"AC", "AC", true},
  383. {"EU", "EU", true},
  384. {"SU", "SU", true},
  385. // Exceptionally reserved in ISO and invalid ccTLD.
  386. {"CP", "ZZ", false},
  387. {"DG", "ZZ", false},
  388. {"EA", "ZZ", false},
  389. {"FX", "ZZ", false},
  390. {"IC", "ZZ", false},
  391. {"TA", "ZZ", false},
  392. // Transitionally reserved in ISO (e.g. deprecated) but valid ccTLD as
  393. // it is still being phased out.
  394. {"AN", "AN", true},
  395. {"TP", "TP", true},
  396. // Transitionally reserved in ISO (e.g. deprecated) and invalid ccTLD.
  397. // Defined in package language as it has a mapping in CLDR.
  398. {"BU", "ZZ", false},
  399. {"CS", "ZZ", false},
  400. {"NT", "ZZ", false},
  401. {"YU", "ZZ", false},
  402. {"ZR", "ZZ", false},
  403. // Not defined in package: SF.
  404. // Indeterminately reserved in ISO.
  405. // Defined in package language as it has a legacy mapping in CLDR.
  406. {"DY", "ZZ", false},
  407. {"RH", "ZZ", false},
  408. {"VD", "ZZ", false},
  409. // Not defined in package: EW, FL, JA, LF, PI, RA, RB, RC, RI, RL, RM,
  410. // RN, RP, WG, WL, WV, and YV.
  411. // Not assigned in ISO, but legacy definitions in CLDR.
  412. {"DD", "ZZ", false},
  413. {"YD", "ZZ", false},
  414. // Normal mappings but somewhat special status in ccTLD.
  415. {"BL", "BL", true},
  416. {"MF", "MF", true},
  417. {"BV", "BV", true},
  418. {"SJ", "SJ", true},
  419. // Have values when normalized, but not as is.
  420. {"QU", "ZZ", false},
  421. // ISO Private Use.
  422. {"AA", "ZZ", false},
  423. {"QM", "ZZ", false},
  424. {"QO", "ZZ", false},
  425. {"XA", "ZZ", false},
  426. {"XK", "ZZ", false}, // Sometimes used for Kosovo, but invalid ccTLD.
  427. } {
  428. if tt.in == "" {
  429. continue
  430. }
  431. r := MustParseRegion(tt.in)
  432. var want Region
  433. if tt.out != "ZZ" {
  434. want = MustParseRegion(tt.out)
  435. }
  436. tld, err := r.TLD()
  437. if got := err == nil; got != tt.ok {
  438. t.Errorf("error(%v): got %v; want %v", r, got, tt.ok)
  439. }
  440. if tld != want {
  441. t.Errorf("TLD(%v): got %v; want %v", r, tld, want)
  442. }
  443. }
  444. }
  445. func TestCanonicalize(t *testing.T) {
  446. // TODO: do a full test using CLDR data in a separate regression test.
  447. tests := []struct {
  448. in, out string
  449. option CanonType
  450. }{
  451. {"en-Latn", "en", SuppressScript},
  452. {"sr-Cyrl", "sr-Cyrl", SuppressScript},
  453. {"sh", "sr-Latn", Legacy},
  454. {"sh-HR", "sr-Latn-HR", Legacy},
  455. {"sh-Cyrl-HR", "sr-Cyrl-HR", Legacy},
  456. {"tl", "fil", Legacy},
  457. {"no", "no", Legacy},
  458. {"no", "nb", Legacy | CLDR},
  459. {"cmn", "cmn", Legacy},
  460. {"cmn", "zh", Macro},
  461. {"cmn-u-co-stroke", "zh-u-co-stroke", Macro},
  462. {"yue", "yue", Macro},
  463. {"nb", "no", Macro},
  464. {"nb", "nb", Macro | CLDR},
  465. {"no", "no", Macro},
  466. {"no", "no", Macro | CLDR},
  467. {"iw", "he", DeprecatedBase},
  468. {"iw", "he", Deprecated | CLDR},
  469. {"mo", "ro-MD", Deprecated}, // Adopted by CLDR as of version 25.
  470. {"alb", "sq", Legacy}, // bibliographic
  471. {"dut", "nl", Legacy}, // bibliographic
  472. // As of CLDR 25, mo is no longer considered a legacy mapping.
  473. {"mo", "mo", Legacy | CLDR},
  474. {"und-AN", "und-AN", Deprecated},
  475. {"und-YD", "und-YE", DeprecatedRegion},
  476. {"und-YD", "und-YD", DeprecatedBase},
  477. {"und-Qaai", "und-Zinh", DeprecatedScript},
  478. {"und-Qaai", "und-Qaai", DeprecatedBase},
  479. {"drh", "mn", All}, // drh -> khk -> mn
  480. {"en-GB-u-rg-uszzzz", "en-GB-u-rg-uszzzz", Raw},
  481. {"en-GB-u-rg-USZZZZ", "en-GB-u-rg-uszzzz", Raw},
  482. // TODO: use different exact values for language and regional tag?
  483. {"en-GB-u-rg-uszzzz-va-posix", "en-GB-u-rg-uszzzz-va-posix", Raw},
  484. {"en-GB-u-rg-uszzzz-co-phonebk", "en-GB-u-co-phonebk-rg-uszzzz", Raw},
  485. // Invalid region specifications are left as is.
  486. {"en-GB-u-rg-usz", "en-GB-u-rg-usz", Raw},
  487. {"en-GB-u-rg-usz-va-posix", "en-GB-u-rg-usz-va-posix", Raw},
  488. {"en-GB-u-rg-usz-co-phonebk", "en-GB-u-co-phonebk-rg-usz", Raw},
  489. // CVE-2020-28851
  490. // invalid key-value pair of -u- extension.
  491. {"ES-u-000-00", "es-u-000-00", Raw},
  492. {"ES-u-000-00-v-00", "es-u-000-00-v-00", Raw},
  493. // reordered and unknown extension.
  494. {"ES-v-00-u-000-00", "es-u-000-00-v-00", Raw},
  495. }
  496. for i, tt := range tests {
  497. in, _ := Raw.Parse(tt.in)
  498. in, _ = tt.option.Canonicalize(in)
  499. if in.String() != tt.out {
  500. t.Errorf("%d:%s: was %s; want %s", i, tt.in, in.String(), tt.out)
  501. }
  502. }
  503. // Test idempotence.
  504. for _, base := range Supported.BaseLanguages() {
  505. tag, _ := Raw.Compose(base)
  506. got, _ := All.Canonicalize(tag)
  507. want, _ := All.Canonicalize(got)
  508. if got != want {
  509. t.Errorf("idem(%s): got %s; want %s", tag, got, want)
  510. }
  511. }
  512. }
  513. func TestTypeForKey(t *testing.T) {
  514. tests := []struct{ key, in, out string }{
  515. {"co", "en", ""},
  516. {"co", "en-u-abc", ""},
  517. {"co", "en-u-co-phonebk", "phonebk"},
  518. {"co", "en-u-co-phonebk-cu-aud", "phonebk"},
  519. {"co", "x-foo-u-co-phonebk", ""},
  520. {"va", "en-US-u-va-posix", "posix"},
  521. {"rg", "en-u-rg-gbzzzz", "gbzzzz"},
  522. {"nu", "en-u-co-phonebk-nu-arabic", "arabic"},
  523. {"kc", "cmn-u-co-stroke", ""},
  524. {"rg", "cmn-u-rg", ""},
  525. {"rg", "cmn-u-rg-co-stroke", ""},
  526. {"co", "cmn-u-rg-co-stroke", "stroke"},
  527. {"co", "cmn-u-co-rg-gbzzzz", ""},
  528. {"rg", "cmn-u-co-rg-gbzzzz", "gbzzzz"},
  529. {"rg", "cmn-u-rg-gbzzzz-nlzzzz", "gbzzzz"},
  530. }
  531. for _, tt := range tests {
  532. if v := Make(tt.in).TypeForKey(tt.key); v != tt.out {
  533. t.Errorf("%q[%q]: was %q; want %q", tt.in, tt.key, v, tt.out)
  534. }
  535. }
  536. }
  537. func TestParent(t *testing.T) {
  538. tests := []struct{ in, out string }{
  539. // Strip variants and extensions first
  540. {"de-u-co-phonebk", "de"},
  541. {"de-1994", "de"},
  542. {"de-Latn-1994", "de"}, // remove superfluous script.
  543. // Ensure the canonical Tag for an entry is in the chain for base-script
  544. // pairs.
  545. {"zh-Hans", "zh"},
  546. // Skip the script if it is the maximized version. CLDR files for the
  547. // skipped tag are always empty.
  548. {"zh-Hans-TW", "zh"},
  549. {"zh-Hans-CN", "zh"},
  550. // Insert the script if the maximized script is not the same as the
  551. // maximized script of the base language.
  552. {"zh-TW", "zh-Hant"},
  553. {"zh-HK", "zh-Hant"},
  554. {"zh-Hant-TW", "zh-Hant"},
  555. {"zh-Hant-HK", "zh-Hant"},
  556. // Non-default script skips to und.
  557. // CLDR
  558. {"az-Cyrl", "und"},
  559. {"bs-Cyrl", "und"},
  560. {"en-Dsrt", "und"},
  561. {"ha-Arab", "und"},
  562. {"mn-Mong", "und"},
  563. {"pa-Arab", "und"},
  564. {"shi-Latn", "und"},
  565. {"sr-Latn", "und"},
  566. {"uz-Arab", "und"},
  567. {"uz-Cyrl", "und"},
  568. {"vai-Latn", "und"},
  569. {"zh-Hant", "und"},
  570. // extra
  571. {"nl-Cyrl", "und"},
  572. // World english inherits from en-001.
  573. {"en-150", "en-001"},
  574. {"en-AU", "en-001"},
  575. {"en-BE", "en-001"},
  576. {"en-GG", "en-001"},
  577. {"en-GI", "en-001"},
  578. {"en-HK", "en-001"},
  579. {"en-IE", "en-001"},
  580. {"en-IM", "en-001"},
  581. {"en-IN", "en-001"},
  582. {"en-JE", "en-001"},
  583. {"en-MT", "en-001"},
  584. {"en-NZ", "en-001"},
  585. {"en-PK", "en-001"},
  586. {"en-SG", "en-001"},
  587. // Spanish in Latin-American countries have es-419 as parent.
  588. {"es-AR", "es-419"},
  589. {"es-BO", "es-419"},
  590. {"es-CL", "es-419"},
  591. {"es-CO", "es-419"},
  592. {"es-CR", "es-419"},
  593. {"es-CU", "es-419"},
  594. {"es-DO", "es-419"},
  595. {"es-EC", "es-419"},
  596. {"es-GT", "es-419"},
  597. {"es-HN", "es-419"},
  598. {"es-MX", "es-419"},
  599. {"es-NI", "es-419"},
  600. {"es-PA", "es-419"},
  601. {"es-PE", "es-419"},
  602. {"es-PR", "es-419"},
  603. {"es-PY", "es-419"},
  604. {"es-SV", "es-419"},
  605. {"es-US", "es-419"},
  606. {"es-UY", "es-419"},
  607. {"es-VE", "es-419"},
  608. // exceptions (according to CLDR)
  609. {"es-CW", "es"},
  610. // Inherit from pt-PT, instead of pt for these countries.
  611. {"pt-AO", "pt-PT"},
  612. {"pt-CV", "pt-PT"},
  613. {"pt-GW", "pt-PT"},
  614. {"pt-MO", "pt-PT"},
  615. {"pt-MZ", "pt-PT"},
  616. {"pt-ST", "pt-PT"},
  617. {"pt-TL", "pt-PT"},
  618. {"en-GB-u-co-phonebk-rg-uszzzz", "en-GB"},
  619. {"en-GB-u-rg-uszzzz", "en-GB"},
  620. {"en-US-u-va-posix", "en-US"},
  621. // Difference between language and regional tag.
  622. {"ca-ES-valencia", "ca-ES"},
  623. {"ca-ES-valencia-u-rg-ptzzzz", "ca-ES"},
  624. {"en-US-u-va-variant", "en-US"},
  625. {"en-u-va-variant", "en"},
  626. {"en-u-rg-gbzzzz", "en"},
  627. {"en-US-u-rg-gbzzzz", "en-US"},
  628. {"nl-US-u-rg-gbzzzz", "nl-US"},
  629. }
  630. for _, tt := range tests {
  631. tag := Raw.MustParse(tt.in)
  632. if p := Raw.MustParse(tt.out); p != tag.Parent() {
  633. t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
  634. }
  635. }
  636. }
  637. var (
  638. // Tags without error that don't need to be changed.
  639. benchBasic = []string{
  640. "en",
  641. "en-Latn",
  642. "en-GB",
  643. "za",
  644. "zh-Hant",
  645. "zh",
  646. "zh-HK",
  647. "ar-MK",
  648. "en-CA",
  649. "fr-CA",
  650. "fr-CH",
  651. "fr",
  652. "lv",
  653. "he-IT",
  654. "tlh",
  655. "ja",
  656. "ja-Jpan",
  657. "ja-Jpan-JP",
  658. "de-1996",
  659. "de-CH",
  660. "sr",
  661. "sr-Latn",
  662. }
  663. // Tags with extensions, not changes required.
  664. benchExt = []string{
  665. "x-a-b-c-d",
  666. "x-aa-bbbb-cccccccc-d",
  667. "en-x_cc-b-bbb-a-aaa",
  668. "en-c_cc-b-bbb-a-aaa-x-x",
  669. "en-u-co-phonebk",
  670. "en-Cyrl-u-co-phonebk",
  671. "en-US-u-co-phonebk-cu-xau",
  672. "en-nedix-u-co-phonebk",
  673. "en-t-t0-abcd",
  674. "en-t-nl-latn",
  675. "en-t-t0-abcd-x-a",
  676. }
  677. // Change, but not memory allocation required.
  678. benchSimpleChange = []string{
  679. "EN",
  680. "i-klingon",
  681. "en-latn",
  682. "zh-cmn-Hans-CN",
  683. "iw-NL",
  684. }
  685. // Change and memory allocation required.
  686. benchChangeAlloc = []string{
  687. "en-c_cc-b-bbb-a-aaa",
  688. "en-u-cu-xua-co-phonebk",
  689. "en-u-cu-xua-co-phonebk-a-cd",
  690. "en-u-def-abc-cu-xua-co-phonebk",
  691. "en-t-en-Cyrl-NL-1994",
  692. "en-t-en-Cyrl-NL-1994-t0-abc-def",
  693. }
  694. // Tags that result in errors.
  695. benchErr = []string{
  696. // IllFormed
  697. "x_A.-B-C_D",
  698. "en-u-cu-co-phonebk",
  699. "en-u-cu-xau-co",
  700. "en-t-nl-abcd",
  701. // Invalid
  702. "xx",
  703. "nl-Uuuu",
  704. "nl-QB",
  705. }
  706. benchChange = append(benchSimpleChange, benchChangeAlloc...)
  707. benchAll = append(append(append(benchBasic, benchExt...), benchChange...), benchErr...)
  708. )
  709. func doParse(b *testing.B, tag []string) {
  710. for i := 0; i < b.N; i++ {
  711. // Use the modulo instead of looping over all tags so that we get a somewhat
  712. // meaningful ns/op.
  713. Parse(tag[i%len(tag)])
  714. }
  715. }
  716. func BenchmarkParse(b *testing.B) {
  717. doParse(b, benchAll)
  718. }
  719. func BenchmarkParseBasic(b *testing.B) {
  720. doParse(b, benchBasic)
  721. }
  722. func BenchmarkParseError(b *testing.B) {
  723. doParse(b, benchErr)
  724. }
  725. func BenchmarkParseSimpleChange(b *testing.B) {
  726. doParse(b, benchSimpleChange)
  727. }
  728. func BenchmarkParseChangeAlloc(b *testing.B) {
  729. doParse(b, benchChangeAlloc)
  730. }