match_test.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package language
  5. import (
  6. "bytes"
  7. "flag"
  8. "fmt"
  9. "os"
  10. "path"
  11. "path/filepath"
  12. "strings"
  13. "testing"
  14. "golang.org/x/text/internal/testtext"
  15. "golang.org/x/text/internal/ucd"
  16. )
  17. var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
  18. func TestCompliance(t *testing.T) {
  19. filepath.Walk("testdata", func(file string, info os.FileInfo, err error) error {
  20. if info.IsDir() {
  21. return nil
  22. }
  23. r, err := os.Open(file)
  24. if err != nil {
  25. t.Fatal(err)
  26. }
  27. ucd.Parse(r, func(p *ucd.Parser) {
  28. name := strings.Replace(path.Join(p.String(0), p.String(1)), " ", "", -1)
  29. if skip[name] {
  30. return
  31. }
  32. t.Run(info.Name()+"/"+name, func(t *testing.T) {
  33. supported := makeTagList(p.String(0))
  34. desired := makeTagList(p.String(1))
  35. gotCombined, index, conf := NewMatcher(supported).Match(desired...)
  36. gotMatch := supported[index]
  37. wantMatch := Raw.Make(p.String(2)) // wantMatch may be null
  38. if gotMatch != wantMatch {
  39. t.Fatalf("match: got %q; want %q (%v)", gotMatch, wantMatch, conf)
  40. }
  41. if tag := strings.TrimSpace(p.String(3)); tag != "" {
  42. wantCombined := Raw.MustParse(tag)
  43. if err == nil && gotCombined != wantCombined {
  44. t.Errorf("combined: got %q; want %q (%v)", gotCombined, wantCombined, conf)
  45. }
  46. }
  47. })
  48. })
  49. return nil
  50. })
  51. }
  52. var skip = map[string]bool{
  53. // TODO: bugs
  54. // Honor the wildcard match. This may only be useful to select non-exact
  55. // stuff.
  56. "mul,af/nl": true, // match: got "af"; want "mul"
  57. // TODO: include other extensions.
  58. // combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab"
  59. "und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true,
  60. // Inconsistencies with Mark Davis' implementation where it is not clear
  61. // which is better.
  62. // Inconsistencies in combined. I think the Go approach is more appropriate.
  63. // We could use -u-rg- as alternative.
  64. "und,fr/fr-BE-fonipa": true, // combined: got "fr"; want "fr-BE-fonipa"
  65. "und,fr-CA/fr-BE-fonipa": true, // combined: got "fr-CA"; want "fr-BE-fonipa"
  66. "und,fr-fonupa/fr-BE-fonipa": true, // combined: got "fr-fonupa"; want "fr-BE-fonipa"
  67. "und,no/nn-BE-fonipa": true, // combined: got "no"; want "no-BE-fonipa"
  68. "50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa"
  69. // The initial number is a threshold. As we don't use scoring, we will not
  70. // implement this.
  71. "50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true,
  72. // match: got "und"; want "fr-Cyrl-CA-fonupa"
  73. // combined: got "und"; want "fr-Cyrl-BE-fonipa"
  74. // Other interesting cases to test:
  75. // - Should same language or same script have the preference if there is
  76. // usually no understanding of the other script?
  77. // - More specific region in desired may replace enclosing supported.
  78. }
  79. func makeTagList(s string) (tags []Tag) {
  80. for _, s := range strings.Split(s, ",") {
  81. tags = append(tags, mk(strings.TrimSpace(s)))
  82. }
  83. return tags
  84. }
  85. func TestMatchStrings(t *testing.T) {
  86. testCases := []struct {
  87. supported string
  88. desired string // strings separted by |
  89. tag string
  90. index int
  91. }{{
  92. supported: "en",
  93. desired: "",
  94. tag: "en",
  95. index: 0,
  96. }, {
  97. supported: "en",
  98. desired: "nl",
  99. tag: "en",
  100. index: 0,
  101. }, {
  102. supported: "en,nl",
  103. desired: "nl",
  104. tag: "nl",
  105. index: 1,
  106. }, {
  107. supported: "en,nl",
  108. desired: "nl|en",
  109. tag: "nl",
  110. index: 1,
  111. }, {
  112. supported: "en-GB,nl",
  113. desired: "en ; q=0.1,nl",
  114. tag: "nl",
  115. index: 1,
  116. }, {
  117. supported: "en-GB,nl",
  118. desired: "en;q=0.005 | dk; q=0.1,nl ",
  119. tag: "en-GB",
  120. index: 0,
  121. }, {
  122. // do not match faulty tags with und
  123. supported: "en,und",
  124. desired: "|en",
  125. tag: "en",
  126. index: 0,
  127. }}
  128. for _, tc := range testCases {
  129. t.Run(path.Join(tc.supported, tc.desired), func(t *testing.T) {
  130. m := NewMatcher(makeTagList(tc.supported))
  131. tag, index := MatchStrings(m, strings.Split(tc.desired, "|")...)
  132. if tag.String() != tc.tag || index != tc.index {
  133. t.Errorf("got %v, %d; want %v, %d", tag, index, tc.tag, tc.index)
  134. }
  135. })
  136. }
  137. }
  138. func TestRegionGroups(t *testing.T) {
  139. testCases := []struct {
  140. a, b string
  141. distance uint8
  142. }{
  143. {"zh-TW", "zh-HK", 5},
  144. {"zh-MO", "zh-HK", 4},
  145. {"es-ES", "es-AR", 5},
  146. {"es-ES", "es", 4},
  147. {"es-419", "es-MX", 4},
  148. {"es-AR", "es-MX", 4},
  149. {"es-ES", "es-MX", 5},
  150. {"es-PT", "es-MX", 5},
  151. }
  152. for _, tc := range testCases {
  153. a := MustParse(tc.a)
  154. aScript, _ := a.Script()
  155. b := MustParse(tc.b)
  156. bScript, _ := b.Script()
  157. if aScript != bScript {
  158. t.Errorf("scripts differ: %q vs %q", aScript, bScript)
  159. continue
  160. }
  161. d, _ := regionGroupDist(a.region(), b.region(), aScript.scriptID, a.lang())
  162. if d != tc.distance {
  163. t.Errorf("got %q; want %q", d, tc.distance)
  164. }
  165. }
  166. }
  167. func TestIsParadigmLocale(t *testing.T) {
  168. testCases := map[string]bool{
  169. "en-US": true,
  170. "en-GB": true,
  171. "en-VI": false,
  172. "es-GB": false,
  173. "es-ES": true,
  174. "es-419": true,
  175. }
  176. for str, want := range testCases {
  177. tt := Make(str)
  178. tag := tt.tag()
  179. got := isParadigmLocale(tag.LangID, tag.RegionID)
  180. if got != want {
  181. t.Errorf("isPL(%q) = %v; want %v", str, got, want)
  182. }
  183. }
  184. }
  185. // Implementation of String methods for various types for debugging purposes.
  186. func (m *matcher) String() string {
  187. w := &bytes.Buffer{}
  188. fmt.Fprintln(w, "Default:", m.default_)
  189. for tag, h := range m.index {
  190. fmt.Fprintf(w, " %s: %v\n", tag, h)
  191. }
  192. return w.String()
  193. }
  194. func (h *matchHeader) String() string {
  195. w := &bytes.Buffer{}
  196. fmt.Fprint(w, "haveTag: ")
  197. for _, h := range h.haveTags {
  198. fmt.Fprintf(w, "%v, ", h)
  199. }
  200. return w.String()
  201. }
  202. func (t haveTag) String() string {
  203. return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript)
  204. }
  205. func TestBestMatchAlloc(t *testing.T) {
  206. m := NewMatcher(makeTagList("en sr nl"))
  207. // Go allocates when creating a list of tags from a single tag!
  208. list := []Tag{English}
  209. avg := testtext.AllocsPerRun(1, func() {
  210. m.Match(list...)
  211. })
  212. if avg > 0 {
  213. t.Errorf("got %f; want 0", avg)
  214. }
  215. }
  216. var benchHave = []Tag{
  217. mk("en"),
  218. mk("en-GB"),
  219. mk("za"),
  220. mk("zh-Hant"),
  221. mk("zh-Hans-CN"),
  222. mk("zh"),
  223. mk("zh-HK"),
  224. mk("ar-MK"),
  225. mk("en-CA"),
  226. mk("fr-CA"),
  227. mk("fr-US"),
  228. mk("fr-CH"),
  229. mk("fr"),
  230. mk("lt"),
  231. mk("lv"),
  232. mk("iw"),
  233. mk("iw-NL"),
  234. mk("he"),
  235. mk("he-IT"),
  236. mk("tlh"),
  237. mk("ja"),
  238. mk("ja-Jpan"),
  239. mk("ja-Jpan-JP"),
  240. mk("de"),
  241. mk("de-CH"),
  242. mk("de-AT"),
  243. mk("de-DE"),
  244. mk("sr"),
  245. mk("sr-Latn"),
  246. mk("sr-Cyrl"),
  247. mk("sr-ME"),
  248. }
  249. var benchWant = [][]Tag{
  250. []Tag{
  251. mk("en"),
  252. },
  253. []Tag{
  254. mk("en-AU"),
  255. mk("de-HK"),
  256. mk("nl"),
  257. mk("fy"),
  258. mk("lv"),
  259. },
  260. []Tag{
  261. mk("en-AU"),
  262. mk("de-HK"),
  263. mk("nl"),
  264. mk("fy"),
  265. },
  266. []Tag{
  267. mk("ja-Hant"),
  268. mk("da-HK"),
  269. mk("nl"),
  270. mk("zh-TW"),
  271. },
  272. []Tag{
  273. mk("ja-Hant"),
  274. mk("da-HK"),
  275. mk("nl"),
  276. mk("hr"),
  277. },
  278. }
  279. func BenchmarkMatch(b *testing.B) {
  280. m := newMatcher(benchHave, nil)
  281. for i := 0; i < b.N; i++ {
  282. for _, want := range benchWant {
  283. m.getBest(want...)
  284. }
  285. }
  286. }
  287. func BenchmarkMatchExact(b *testing.B) {
  288. want := mk("en")
  289. m := newMatcher(benchHave, nil)
  290. for i := 0; i < b.N; i++ {
  291. m.getBest(want)
  292. }
  293. }
  294. func BenchmarkMatchAltLanguagePresent(b *testing.B) {
  295. want := mk("hr")
  296. m := newMatcher(benchHave, nil)
  297. for i := 0; i < b.N; i++ {
  298. m.getBest(want)
  299. }
  300. }
  301. func BenchmarkMatchAltLanguageNotPresent(b *testing.B) {
  302. want := mk("nn")
  303. m := newMatcher(benchHave, nil)
  304. for i := 0; i < b.N; i++ {
  305. m.getBest(want)
  306. }
  307. }
  308. func BenchmarkMatchAltScriptPresent(b *testing.B) {
  309. want := mk("zh-Hant-CN")
  310. m := newMatcher(benchHave, nil)
  311. for i := 0; i < b.N; i++ {
  312. m.getBest(want)
  313. }
  314. }
  315. func BenchmarkMatchAltScriptNotPresent(b *testing.B) {
  316. want := mk("fr-Cyrl")
  317. m := newMatcher(benchHave, nil)
  318. for i := 0; i < b.N; i++ {
  319. m.getBest(want)
  320. }
  321. }
  322. func BenchmarkMatchLimitedExact(b *testing.B) {
  323. want := []Tag{mk("he-NL"), mk("iw-NL")}
  324. m := newMatcher(benchHave, nil)
  325. for i := 0; i < b.N; i++ {
  326. m.getBest(want...)
  327. }
  328. }