123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359 |
- // Copyright 2013 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package language
- import (
- "bytes"
- "flag"
- "fmt"
- "os"
- "path"
- "path/filepath"
- "strings"
- "testing"
- "golang.org/x/text/internal/testtext"
- "golang.org/x/text/internal/ucd"
- )
- var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
- func TestCompliance(t *testing.T) {
- filepath.Walk("testdata", func(file string, info os.FileInfo, err error) error {
- if info.IsDir() {
- return nil
- }
- r, err := os.Open(file)
- if err != nil {
- t.Fatal(err)
- }
- ucd.Parse(r, func(p *ucd.Parser) {
- name := strings.Replace(path.Join(p.String(0), p.String(1)), " ", "", -1)
- if skip[name] {
- return
- }
- t.Run(info.Name()+"/"+name, func(t *testing.T) {
- supported := makeTagList(p.String(0))
- desired := makeTagList(p.String(1))
- gotCombined, index, conf := NewMatcher(supported).Match(desired...)
- gotMatch := supported[index]
- wantMatch := Raw.Make(p.String(2)) // wantMatch may be null
- if gotMatch != wantMatch {
- t.Fatalf("match: got %q; want %q (%v)", gotMatch, wantMatch, conf)
- }
- if tag := strings.TrimSpace(p.String(3)); tag != "" {
- wantCombined := Raw.MustParse(tag)
- if err == nil && gotCombined != wantCombined {
- t.Errorf("combined: got %q; want %q (%v)", gotCombined, wantCombined, conf)
- }
- }
- })
- })
- return nil
- })
- }
- var skip = map[string]bool{
- // TODO: bugs
- // Honor the wildcard match. This may only be useful to select non-exact
- // stuff.
- "mul,af/nl": true, // match: got "af"; want "mul"
- // TODO: include other extensions.
- // combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab"
- "und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true,
- // Inconsistencies with Mark Davis' implementation where it is not clear
- // which is better.
- // Inconsistencies in combined. I think the Go approach is more appropriate.
- // We could use -u-rg- as alternative.
- "und,fr/fr-BE-fonipa": true, // combined: got "fr"; want "fr-BE-fonipa"
- "und,fr-CA/fr-BE-fonipa": true, // combined: got "fr-CA"; want "fr-BE-fonipa"
- "und,fr-fonupa/fr-BE-fonipa": true, // combined: got "fr-fonupa"; want "fr-BE-fonipa"
- "und,no/nn-BE-fonipa": true, // combined: got "no"; want "no-BE-fonipa"
- "50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa"
- // The initial number is a threshold. As we don't use scoring, we will not
- // implement this.
- "50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true,
- // match: got "und"; want "fr-Cyrl-CA-fonupa"
- // combined: got "und"; want "fr-Cyrl-BE-fonipa"
- // Other interesting cases to test:
- // - Should same language or same script have the preference if there is
- // usually no understanding of the other script?
- // - More specific region in desired may replace enclosing supported.
- }
- func makeTagList(s string) (tags []Tag) {
- for _, s := range strings.Split(s, ",") {
- tags = append(tags, mk(strings.TrimSpace(s)))
- }
- return tags
- }
- func TestMatchStrings(t *testing.T) {
- testCases := []struct {
- supported string
- desired string // strings separted by |
- tag string
- index int
- }{{
- supported: "en",
- desired: "",
- tag: "en",
- index: 0,
- }, {
- supported: "en",
- desired: "nl",
- tag: "en",
- index: 0,
- }, {
- supported: "en,nl",
- desired: "nl",
- tag: "nl",
- index: 1,
- }, {
- supported: "en,nl",
- desired: "nl|en",
- tag: "nl",
- index: 1,
- }, {
- supported: "en-GB,nl",
- desired: "en ; q=0.1,nl",
- tag: "nl",
- index: 1,
- }, {
- supported: "en-GB,nl",
- desired: "en;q=0.005 | dk; q=0.1,nl ",
- tag: "en-GB",
- index: 0,
- }, {
- // do not match faulty tags with und
- supported: "en,und",
- desired: "|en",
- tag: "en",
- index: 0,
- }}
- for _, tc := range testCases {
- t.Run(path.Join(tc.supported, tc.desired), func(t *testing.T) {
- m := NewMatcher(makeTagList(tc.supported))
- tag, index := MatchStrings(m, strings.Split(tc.desired, "|")...)
- if tag.String() != tc.tag || index != tc.index {
- t.Errorf("got %v, %d; want %v, %d", tag, index, tc.tag, tc.index)
- }
- })
- }
- }
- func TestRegionGroups(t *testing.T) {
- testCases := []struct {
- a, b string
- distance uint8
- }{
- {"zh-TW", "zh-HK", 5},
- {"zh-MO", "zh-HK", 4},
- {"es-ES", "es-AR", 5},
- {"es-ES", "es", 4},
- {"es-419", "es-MX", 4},
- {"es-AR", "es-MX", 4},
- {"es-ES", "es-MX", 5},
- {"es-PT", "es-MX", 5},
- }
- for _, tc := range testCases {
- a := MustParse(tc.a)
- aScript, _ := a.Script()
- b := MustParse(tc.b)
- bScript, _ := b.Script()
- if aScript != bScript {
- t.Errorf("scripts differ: %q vs %q", aScript, bScript)
- continue
- }
- d, _ := regionGroupDist(a.region(), b.region(), aScript.scriptID, a.lang())
- if d != tc.distance {
- t.Errorf("got %q; want %q", d, tc.distance)
- }
- }
- }
- func TestIsParadigmLocale(t *testing.T) {
- testCases := map[string]bool{
- "en-US": true,
- "en-GB": true,
- "en-VI": false,
- "es-GB": false,
- "es-ES": true,
- "es-419": true,
- }
- for str, want := range testCases {
- tt := Make(str)
- tag := tt.tag()
- got := isParadigmLocale(tag.LangID, tag.RegionID)
- if got != want {
- t.Errorf("isPL(%q) = %v; want %v", str, got, want)
- }
- }
- }
- // Implementation of String methods for various types for debugging purposes.
- func (m *matcher) String() string {
- w := &bytes.Buffer{}
- fmt.Fprintln(w, "Default:", m.default_)
- for tag, h := range m.index {
- fmt.Fprintf(w, " %s: %v\n", tag, h)
- }
- return w.String()
- }
- func (h *matchHeader) String() string {
- w := &bytes.Buffer{}
- fmt.Fprint(w, "haveTag: ")
- for _, h := range h.haveTags {
- fmt.Fprintf(w, "%v, ", h)
- }
- return w.String()
- }
- func (t haveTag) String() string {
- return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript)
- }
- func TestBestMatchAlloc(t *testing.T) {
- m := NewMatcher(makeTagList("en sr nl"))
- // Go allocates when creating a list of tags from a single tag!
- list := []Tag{English}
- avg := testtext.AllocsPerRun(1, func() {
- m.Match(list...)
- })
- if avg > 0 {
- t.Errorf("got %f; want 0", avg)
- }
- }
- var benchHave = []Tag{
- mk("en"),
- mk("en-GB"),
- mk("za"),
- mk("zh-Hant"),
- mk("zh-Hans-CN"),
- mk("zh"),
- mk("zh-HK"),
- mk("ar-MK"),
- mk("en-CA"),
- mk("fr-CA"),
- mk("fr-US"),
- mk("fr-CH"),
- mk("fr"),
- mk("lt"),
- mk("lv"),
- mk("iw"),
- mk("iw-NL"),
- mk("he"),
- mk("he-IT"),
- mk("tlh"),
- mk("ja"),
- mk("ja-Jpan"),
- mk("ja-Jpan-JP"),
- mk("de"),
- mk("de-CH"),
- mk("de-AT"),
- mk("de-DE"),
- mk("sr"),
- mk("sr-Latn"),
- mk("sr-Cyrl"),
- mk("sr-ME"),
- }
- var benchWant = [][]Tag{
- []Tag{
- mk("en"),
- },
- []Tag{
- mk("en-AU"),
- mk("de-HK"),
- mk("nl"),
- mk("fy"),
- mk("lv"),
- },
- []Tag{
- mk("en-AU"),
- mk("de-HK"),
- mk("nl"),
- mk("fy"),
- },
- []Tag{
- mk("ja-Hant"),
- mk("da-HK"),
- mk("nl"),
- mk("zh-TW"),
- },
- []Tag{
- mk("ja-Hant"),
- mk("da-HK"),
- mk("nl"),
- mk("hr"),
- },
- }
- func BenchmarkMatch(b *testing.B) {
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- for _, want := range benchWant {
- m.getBest(want...)
- }
- }
- }
- func BenchmarkMatchExact(b *testing.B) {
- want := mk("en")
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- m.getBest(want)
- }
- }
- func BenchmarkMatchAltLanguagePresent(b *testing.B) {
- want := mk("hr")
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- m.getBest(want)
- }
- }
- func BenchmarkMatchAltLanguageNotPresent(b *testing.B) {
- want := mk("nn")
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- m.getBest(want)
- }
- }
- func BenchmarkMatchAltScriptPresent(b *testing.B) {
- want := mk("zh-Hant-CN")
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- m.getBest(want)
- }
- }
- func BenchmarkMatchAltScriptNotPresent(b *testing.B) {
- want := mk("fr-Cyrl")
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- m.getBest(want)
- }
- }
- func BenchmarkMatchLimitedExact(b *testing.B) {
- want := []Tag{mk("he-NL"), mk("iw-NL")}
- m := newMatcher(benchHave, nil)
- for i := 0; i < b.N; i++ {
- m.getBest(want...)
- }
- }
|