GoLocaleMatcherTest.txt 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. # basics
  2. fr, en-GB, en ; en-GB ; en-GB
  3. fr, en-GB, en ; en-US ; en
  4. fr, en-GB, en ; fr-FR ; fr
  5. fr, en-GB, en ; ja-JP ; fr
  6. # script fallbacks
  7. zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
  8. zh-CN, zh-TW, iw ; zh ; zh-CN
  9. zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
  10. zh-CN, zh-TW, iw ; zh-Hant-HK ; zh-TW
  11. zh-CN, zh-TW, iw ; he-IT ; iw ; iw-u-rg-itzzzz
  12. # language-specific script fallbacks 1
  13. en, sr, nl ; sr-Latn ; sr
  14. en, sr, nl ; sh ; sr # different script, but seems okay and is as CLDR suggests
  15. en, sr, nl ; hr ; en
  16. en, sr, nl ; bs ; en
  17. en, sr, nl ; nl-Cyrl ; sr
  18. # language-specific script fallbacks 2
  19. en, sh ; sr ; sh
  20. en, sh ; sr-Cyrl ; sh
  21. en, sh ; hr ; sh
  22. # don't match hr to sr-Latn
  23. en, sr-Latn ; hr ; en
  24. # both deprecated and not
  25. fil, tl, iw, he ; he-IT ; he
  26. fil, tl, iw, he ; he ; he
  27. fil, tl, iw, he ; iw ; iw
  28. fil, tl, iw, he ; fil-IT ; fil
  29. fil, tl, iw, he ; fil ; fil
  30. fil, tl, iw, he ; tl ; tl
  31. # nearby languages
  32. en, fil, ro, nn ; tl ; fil
  33. en, fil, ro, nn ; mo ; ro
  34. en, fil, ro, nn ; nb ; nn
  35. en, fil, ro, nn ; ja ; en
  36. # nearby languages: Nynorsk to Bokmål
  37. en, nb ; nn ; nb
  38. # nearby languages: Danish does not match nn
  39. en, nn ; da ; en
  40. # nearby languages: Danish matches no
  41. en, no ; da ; no
  42. # nearby languages: Danish matches nb
  43. en, nb ; da ; nb
  44. # prefer matching languages over language variants.
  45. nn, en-GB ; no, en-US ; en-GB
  46. nn, en-GB ; nb, en-US ; en-GB
  47. # deprecated version is closer than same language with other differences
  48. nl, he, en-GB ; iw, en-US ; he
  49. # macro equivalent is closer than same language with other differences
  50. nl, zh, en-GB, no ; cmn, en-US ; zh
  51. nl, zh, en-GB, no ; nb, en-US ; no
  52. # legacy equivalent is closer than same language with other differences
  53. nl, fil, en-GB ; tl, en-US ; fil
  54. # distinguish near equivalents
  55. en, ro, mo, ro-MD ; ro ; ro
  56. en, ro, mo, ro-MD ; mo ; mo
  57. en, ro, mo, ro-MD ; ro-MD ; ro-MD
  58. # maximization of legacy
  59. sr-Cyrl, sr-Latn, ro, ro-MD ; sh ; sr-Latn
  60. sr-Cyrl, sr-Latn, ro, ro-MD ; mo ; ro-MD
  61. # empty
  62. ; fr ; und
  63. ; en ; und
  64. # private use subtags
  65. fr, en-GB, x-bork, es-ES, es-419 ; x-piglatin ; fr
  66. fr, en-GB, x-bork, es-ES, es-419 ; x-bork ; x-bork
  67. # grandfathered codes
  68. fr, i-klingon, en-Latn-US ; en-GB-oed ; en-Latn-US
  69. fr, i-klingon, en-Latn-US ; i-klingon ; tlh
  70. # simple variant match
  71. fr, en-GB, ja, es-ES, es-MX ; de, en-US ; en-GB
  72. fr, en-GB, ja, es-ES, es-MX ; de, zh ; fr
  73. # best match for traditional Chinese
  74. fr, zh-Hans-CN, en-US ; zh-TW ; zh-Hans-CN
  75. fr, zh-Hans-CN, en-US ; zh-Hant ; zh-Hans-CN
  76. fr, zh-Hans-CN, en-US ; zh-TW, en ; en-US
  77. fr, zh-Hans-CN, en-US ; zh-Hant-CN, en ; en-US
  78. fr, zh-Hans-CN, en-US ; zh-Hans, en ; zh-Hans-CN
  79. # more specific script should win in case regions are identical
  80. af, af-Latn, af-Arab ; af ; af
  81. af, af-Latn, af-Arab ; af-ZA ; af
  82. af, af-Latn, af-Arab ; af-Latn-ZA ; af-Latn
  83. af, af-Latn, af-Arab ; af-Latn ; af-Latn
  84. # more specific region should win
  85. nl, nl-NL, nl-BE ; nl ; nl
  86. nl, nl-NL, nl-BE ; nl-Latn ; nl
  87. nl, nl-NL, nl-BE ; nl-Latn-NL ; nl-NL
  88. nl, nl-NL, nl-BE ; nl-NL ; nl-NL
  89. # region may replace matched if matched is enclosing
  90. es-419,es ; es-MX ; es-419 ; es-MX
  91. es-419,es ; es-SG ; es
  92. # more specific region wins over more specific script
  93. nl, nl-Latn, nl-NL, nl-BE ; nl ; nl
  94. nl, nl-Latn, nl-NL, nl-BE ; nl-Latn ; nl-Latn
  95. nl, nl-Latn, nl-NL, nl-BE ; nl-NL ; nl-NL
  96. nl, nl-Latn, nl-NL, nl-BE ; nl-Latn-NL ; nl-NL
  97. # region distance Portuguese
  98. pt, pt-PT ; pt-ES ; pt-PT
  99. # if no preferred locale specified, pick top language, not regional
  100. en, fr, fr-CA, fr-CH ; fr-US ; fr ; fr-u-rg-uszzzz
  101. # region distance German
  102. de-AT, de-DE, de-CH ; de ; de-DE
  103. # en-AU is closer to en-GB than to en (which is en-US)
  104. en, en-GB, es-ES, es-419 ; en-AU ; en-GB
  105. en, en-GB, es-ES, es-419 ; es-MX ; es-419 ; es-MX
  106. en, en-GB, es-ES, es-419 ; es-PT ; es-ES
  107. # undefined
  108. it, fr ; und ; it
  109. # und does not match en
  110. it, en ; und ; it
  111. # undefined in priority list
  112. it, und ; und ; und
  113. it, und ; en ; it
  114. # undefined
  115. it, fr, zh ; und-FR ; fr
  116. it, fr, zh ; und-CN ; zh
  117. it, fr, zh ; und-Hans ; zh
  118. it, fr, zh ; und-Hant ; zh
  119. it, fr, zh ; und-Latn ; it
  120. # match on maximized tag
  121. fr, en-GB, ja, es-ES, es-MX ; ja-JP, en-GB ; ja
  122. fr, en-GB, ja, es-ES, es-MX ; ja-Jpan-JP, en-GB ; ja
  123. # pick best maximized tag
  124. ja, ja-Jpan-US, ja-JP, en, ru ; ja-Jpan, ru ; ja
  125. ja, ja-Jpan-US, ja-JP, en, ru ; ja-JP, ru ; ja-JP
  126. ja, ja-Jpan-US, ja-JP, en, ru ; ja-US, ru ; ja-Jpan-US
  127. # termination: pick best maximized match
  128. ja, ja-Jpan, ja-JP, en, ru ; ja-Jpan-JP, ru ; ja-JP
  129. ja, ja-Jpan, ja-JP, en, ru ; ja-Jpan, ru ; ja-Jpan
  130. # same language over exact, but distinguish when user is explicit
  131. fr, en-GB, ja, es-ES, es-MX ; ja, de ; ja
  132. en, de, fr, ja ; de-CH, fr ; de # TODO: ; de-u-rg-CH
  133. en-GB, nl ; en, nl ; en-GB
  134. en-GB, nl ; en, nl, en-GB ; nl
  135. # parent relation preserved
  136. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-150 ; en-GB
  137. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-AU ; en-GB
  138. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-BE ; en-GB
  139. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-GG ; en-GB
  140. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-GI ; en-GB
  141. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-HK ; en-GB
  142. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-IE ; en-GB
  143. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-IM ; en-GB
  144. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-IN ; en-GB
  145. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-JE ; en-GB
  146. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-MT ; en-GB
  147. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-NZ ; en-GB
  148. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-PK ; en-GB
  149. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-SG ; en-GB
  150. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-DE ; en-GB
  151. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; en-MT ; en-GB
  152. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-AR ; es-419 ; es-AR
  153. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-BO ; es-419 ; es-BO
  154. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CL ; es-419 ; es-CL
  155. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CO ; es-419 ; es-CO
  156. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CR ; es-419 ; es-CR
  157. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-CU ; es-419 ; es-CU
  158. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-DO ; es-419 ; es-DO
  159. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-EC ; es-419 ; es-EC
  160. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-GT ; es-419 ; es-GT
  161. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-HN ; es-419 ; es-HN
  162. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-MX ; es-419 ; es-MX
  163. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-NI ; es-419 ; es-NI
  164. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PA ; es-419 ; es-PA
  165. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PE ; es-419 ; es-PE
  166. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PR ; es-419 ; es-PR
  167. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PT ; es
  168. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-PY ; es-419 ; es-PY
  169. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-SV ; es-419 ; es-SV
  170. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-US ; es-419
  171. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-UY ; es-419 ; es-UY
  172. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; es-VE ; es-419 ; es-VE
  173. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-AO ; pt-PT
  174. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-CV ; pt-PT
  175. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-GW ; pt-PT
  176. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-MO ; pt-PT
  177. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-MZ ; pt-PT
  178. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-ST ; pt-PT
  179. en, en-US, en-GB, es, es-419, pt, pt-BR, pt-PT, zh, zh-Hant, zh-Hant-HK ; pt-TL ; pt-PT
  180. # preserve extensions
  181. en, de, sl-nedis ; de-FR-u-co-phonebk ; de ; de-u-co-phonebk-rg-frzzzz
  182. en, de, sl-nedis ; sl-nedis-u-cu-eur ; sl-nedis ; sl-nedis-u-cu-eur
  183. en, de, sl-nedis ; sl-u-cu-eur ; sl-nedis ; sl-nedis-u-cu-eur
  184. en, de, sl-nedis ; sl-HR-nedis-u-cu-eur ; sl-nedis ; sl-nedis-u-cu-eur-rg-hrzzzz
  185. en, de, sl-nedis ; de-t-m0-iso-i0-pinyin ; de ; de-t-m0-iso-i0-pinyin
  186. und, nl ; nl-BE-fonipa ; nl ; nl-u-rg-bezzzz
  187. und, nl-CA ; nl-BE-fonipa ; nl-CA ; nl-CA-u-rg-bezzzz
  188. und, nl-fonupa ; nl-BE-fonipa ; nl-fonupa ; nl-fonupa-u-rg-bezzzz
  189. und, no ; nn-DK-fonipa ; no ; no-u-rg-dkzzzz
  190. und, en-GB-u-sd-usca ; en-US-fonipa-u-nu-Arab-ca-buddhist-sd-usdc-t-m0-iso-i0-pinyin ; en-GB-u-sd-usca ; en-GB-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-Arab-rg-uszzzz-sd-usca