match_test.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package language
  5. import (
  6. "flag"
  7. "testing"
  8. )
  9. var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
  10. func TestAddLikelySubtags(t *testing.T) {
  11. tests := []struct{ in, out string }{
  12. {"aa", "aa-Latn-ET"},
  13. {"aa-Latn", "aa-Latn-ET"},
  14. {"aa-Arab", "aa-Arab-ET"},
  15. {"aa-Arab-ER", "aa-Arab-ER"},
  16. {"kk", "kk-Cyrl-KZ"},
  17. {"kk-CN", "kk-Arab-CN"},
  18. {"cmn", "cmn"},
  19. {"zh-AU", "zh-Hant-AU"},
  20. {"zh-VN", "zh-Hant-VN"},
  21. {"zh-SG", "zh-Hans-SG"},
  22. {"zh-Hant", "zh-Hant-TW"},
  23. {"zh-Hani", "zh-Hani-CN"},
  24. {"und-Hani", "zh-Hani-CN"},
  25. {"und", "en-Latn-US"},
  26. {"und-GB", "en-Latn-GB"},
  27. {"und-CW", "pap-Latn-CW"},
  28. {"und-YT", "fr-Latn-YT"},
  29. {"und-Arab", "ar-Arab-EG"},
  30. {"und-AM", "hy-Armn-AM"},
  31. {"und-TW", "zh-Hant-TW"},
  32. {"und-002", "en-Latn-NG"},
  33. {"und-Latn-002", "en-Latn-NG"},
  34. {"en-Latn-002", "en-Latn-NG"},
  35. {"en-002", "en-Latn-NG"},
  36. {"en-001", "en-Latn-US"},
  37. {"und-003", "en-Latn-US"},
  38. {"und-GB", "en-Latn-GB"},
  39. {"Latn-001", "en-Latn-US"},
  40. {"en-001", "en-Latn-US"},
  41. {"es-419", "es-Latn-419"},
  42. {"he-145", "he-Hebr-IL"},
  43. {"ky-145", "ky-Latn-TR"},
  44. {"kk", "kk-Cyrl-KZ"},
  45. // Don't specialize duplicate and ambiguous matches.
  46. {"kk-034", "kk-Arab-034"}, // Matches IR and AF. Both are Arab.
  47. {"ku-145", "ku-Latn-TR"}, // Matches IQ, TR, and LB, but kk -> TR.
  48. {"und-Arab-CC", "ms-Arab-CC"},
  49. {"und-Arab-GB", "ks-Arab-GB"},
  50. {"und-Hans-CC", "zh-Hans-CC"},
  51. {"und-CC", "en-Latn-CC"},
  52. {"sr", "sr-Cyrl-RS"},
  53. {"sr-151", "sr-Latn-151"}, // Matches RO and RU.
  54. // We would like addLikelySubtags to generate the same results if the input
  55. // only changes by adding tags that would otherwise have been added
  56. // by the expansion.
  57. // In other words:
  58. // und-AA -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
  59. // und-AA -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA
  60. // und-Scrp -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
  61. // und-Scrp -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
  62. // xx -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
  63. // xx -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA
  64. //
  65. // The algorithm specified in
  66. // https://unicode.org/reports/tr35/tr35-9.html#Supplemental_Data,
  67. // Section C.10, does not handle the first case. For example,
  68. // the CLDR data contains an entry und-BJ -> fr-Latn-BJ, but not
  69. // there is no rule for und-Latn-BJ. According to spec, und-Latn-BJ
  70. // would expand to en-Latn-BJ, violating the aforementioned principle.
  71. // We deviate from the spec by letting und-Scrp-AA expand to xx-Scrp-AA
  72. // if a rule of the form und-AA -> xx-Scrp-AA is defined.
  73. // Note that as of version 23, CLDR has some explicitly specified
  74. // entries that do not conform to these rules. The implementation
  75. // will not correct these explicit inconsistencies. A later versions of CLDR
  76. // is supposed to fix this.
  77. {"und-Latn-BJ", "fr-Latn-BJ"},
  78. {"und-Bugi-ID", "bug-Bugi-ID"},
  79. // regions, scripts and languages without definitions
  80. {"und-Arab-AA", "ar-Arab-AA"},
  81. {"und-Afak-RE", "fr-Afak-RE"},
  82. {"und-Arab-GB", "ks-Arab-GB"},
  83. {"abp-Arab-GB", "abp-Arab-GB"},
  84. // script has preference over region
  85. {"und-Arab-NL", "ar-Arab-NL"},
  86. {"zza", "zza-Latn-TR"},
  87. // preserve variants and extensions
  88. {"de-1901", "de-Latn-DE-1901"},
  89. {"de-x-abc", "de-Latn-DE-x-abc"},
  90. {"de-1901-x-abc", "de-Latn-DE-1901-x-abc"},
  91. {"x-abc", "x-abc"}, // TODO: is this the desired behavior?
  92. }
  93. for i, tt := range tests {
  94. in, _ := Parse(tt.in)
  95. out, _ := Parse(tt.out)
  96. in, _ = in.addLikelySubtags()
  97. if in.String() != out.String() {
  98. t.Errorf("%d: add(%s) was %s; want %s", i, tt.in, in, tt.out)
  99. }
  100. }
  101. }
  102. func TestMinimize(t *testing.T) {
  103. tests := []struct{ in, out string }{
  104. {"aa", "aa"},
  105. {"aa-Latn", "aa"},
  106. {"aa-Latn-ET", "aa"},
  107. {"aa-ET", "aa"},
  108. {"aa-Arab", "aa-Arab"},
  109. {"aa-Arab-ER", "aa-Arab-ER"},
  110. {"aa-Arab-ET", "aa-Arab"},
  111. {"und", "und"},
  112. {"und-Latn", "und"},
  113. {"und-Latn-US", "und"},
  114. {"en-Latn-US", "en"},
  115. {"cmn", "cmn"},
  116. {"cmn-Hans", "cmn-Hans"},
  117. {"cmn-Hant", "cmn-Hant"},
  118. {"zh-AU", "zh-AU"},
  119. {"zh-VN", "zh-VN"},
  120. {"zh-SG", "zh-SG"},
  121. {"zh-Hant", "zh-Hant"},
  122. {"zh-Hant-TW", "zh-TW"},
  123. {"zh-Hans", "zh"},
  124. {"zh-Hani", "zh-Hani"},
  125. {"und-Hans", "und-Hans"},
  126. {"und-Hani", "und-Hani"},
  127. {"und-CW", "und-CW"},
  128. {"und-YT", "und-YT"},
  129. {"und-Arab", "und-Arab"},
  130. {"und-AM", "und-AM"},
  131. {"und-Arab-CC", "und-Arab-CC"},
  132. {"und-CC", "und-CC"},
  133. {"und-Latn-BJ", "und-BJ"},
  134. {"und-Bugi-ID", "und-Bugi"},
  135. {"bug-Bugi-ID", "bug-Bugi"},
  136. // regions, scripts and languages without definitions
  137. {"und-Arab-AA", "und-Arab-AA"},
  138. // preserve variants and extensions
  139. {"de-Latn-1901", "de-1901"},
  140. {"de-Latn-x-abc", "de-x-abc"},
  141. {"de-DE-1901-x-abc", "de-1901-x-abc"},
  142. {"x-abc", "x-abc"}, // TODO: is this the desired behavior?
  143. }
  144. for i, tt := range tests {
  145. in, _ := Parse(tt.in)
  146. out, _ := Parse(tt.out)
  147. min, _ := in.minimize()
  148. if min.String() != out.String() {
  149. t.Errorf("%d: min(%s) was %s; want %s", i, tt.in, min, tt.out)
  150. }
  151. max, _ := min.addLikelySubtags()
  152. if x, _ := in.addLikelySubtags(); x.String() != max.String() {
  153. t.Errorf("%d: max(min(%s)) = %s; want %s", i, tt.in, max, x)
  154. }
  155. }
  156. }