123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482 |
- // Copyright 2012 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package collate
- import (
- "bytes"
- "testing"
- "golang.org/x/text/internal/colltab"
- "golang.org/x/text/language"
- )
- type weightsTest struct {
- opt opts
- in, out ColElems
- }
- type opts struct {
- lev int
- alt alternateHandling
- top int
- backwards bool
- caseLevel bool
- }
- // ignore returns an initialized boolean array based on the given Level.
- // A negative value means using the default setting of quaternary.
- func ignore(level colltab.Level) (ignore [colltab.NumLevels]bool) {
- if level < 0 {
- level = colltab.Quaternary
- }
- for i := range ignore {
- ignore[i] = level < colltab.Level(i)
- }
- return ignore
- }
- func makeCE(w []int) colltab.Elem {
- ce, err := colltab.MakeElem(w[0], w[1], w[2], uint8(w[3]))
- if err != nil {
- panic(err)
- }
- return ce
- }
- func (o opts) collator() *Collator {
- c := &Collator{
- options: options{
- ignore: ignore(colltab.Level(o.lev - 1)),
- alternate: o.alt,
- backwards: o.backwards,
- caseLevel: o.caseLevel,
- variableTop: uint32(o.top),
- },
- }
- return c
- }
- const (
- maxQ = 0x1FFFFF
- )
- func wpq(p, q int) Weights {
- return W(p, defaults.Secondary, defaults.Tertiary, q)
- }
- func wsq(s, q int) Weights {
- return W(0, s, defaults.Tertiary, q)
- }
- func wq(q int) Weights {
- return W(0, 0, 0, q)
- }
- var zero = W(0, 0, 0, 0)
- var processTests = []weightsTest{
- // Shifted
- { // simple sequence of non-variables
- opt: opts{alt: altShifted, top: 100},
- in: ColElems{W(200), W(300), W(400)},
- out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
- },
- { // first is a variable
- opt: opts{alt: altShifted, top: 250},
- in: ColElems{W(200), W(300), W(400)},
- out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
- },
- { // all but first are variable
- opt: opts{alt: altShifted, top: 999},
- in: ColElems{W(1000), W(200), W(300), W(400)},
- out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
- },
- { // first is a modifier
- opt: opts{alt: altShifted, top: 999},
- in: ColElems{W(0, 10), W(1000)},
- out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
- },
- { // primary ignorables
- opt: opts{alt: altShifted, top: 250},
- in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
- out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
- },
- { // secondary ignorables
- opt: opts{alt: altShifted, top: 250},
- in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
- out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
- },
- { // tertiary ignorables, no change
- opt: opts{alt: altShifted, top: 250},
- in: ColElems{W(200), zero, W(300), zero, W(400)},
- out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
- },
- // ShiftTrimmed (same as Shifted)
- { // simple sequence of non-variables
- opt: opts{alt: altShiftTrimmed, top: 100},
- in: ColElems{W(200), W(300), W(400)},
- out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
- },
- { // first is a variable
- opt: opts{alt: altShiftTrimmed, top: 250},
- in: ColElems{W(200), W(300), W(400)},
- out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
- },
- { // all but first are variable
- opt: opts{alt: altShiftTrimmed, top: 999},
- in: ColElems{W(1000), W(200), W(300), W(400)},
- out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
- },
- { // first is a modifier
- opt: opts{alt: altShiftTrimmed, top: 999},
- in: ColElems{W(0, 10), W(1000)},
- out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
- },
- { // primary ignorables
- opt: opts{alt: altShiftTrimmed, top: 250},
- in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
- out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
- },
- { // secondary ignorables
- opt: opts{alt: altShiftTrimmed, top: 250},
- in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
- out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
- },
- { // tertiary ignorables, no change
- opt: opts{alt: altShiftTrimmed, top: 250},
- in: ColElems{W(200), zero, W(300), zero, W(400)},
- out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
- },
- // Blanked
- { // simple sequence of non-variables
- opt: opts{alt: altBlanked, top: 100},
- in: ColElems{W(200), W(300), W(400)},
- out: ColElems{W(200), W(300), W(400)},
- },
- { // first is a variable
- opt: opts{alt: altBlanked, top: 250},
- in: ColElems{W(200), W(300), W(400)},
- out: ColElems{zero, W(300), W(400)},
- },
- { // all but first are variable
- opt: opts{alt: altBlanked, top: 999},
- in: ColElems{W(1000), W(200), W(300), W(400)},
- out: ColElems{W(1000), zero, zero, zero},
- },
- { // first is a modifier
- opt: opts{alt: altBlanked, top: 999},
- in: ColElems{W(0, 10), W(1000)},
- out: ColElems{W(0, 10), W(1000)},
- },
- { // primary ignorables
- opt: opts{alt: altBlanked, top: 250},
- in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
- out: ColElems{zero, zero, W(300), W(0, 15), W(400)},
- },
- { // secondary ignorables
- opt: opts{alt: altBlanked, top: 250},
- in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
- out: ColElems{zero, zero, W(300), W(0, 0, 15), W(400)},
- },
- { // tertiary ignorables, no change
- opt: opts{alt: altBlanked, top: 250},
- in: ColElems{W(200), zero, W(300), zero, W(400)},
- out: ColElems{zero, zero, W(300), zero, W(400)},
- },
- // Non-ignorable: input is always equal to output.
- { // all but first are variable
- opt: opts{alt: altNonIgnorable, top: 999},
- in: ColElems{W(1000), W(200), W(300), W(400)},
- out: ColElems{W(1000), W(200), W(300), W(400)},
- },
- { // primary ignorables
- opt: opts{alt: altNonIgnorable, top: 250},
- in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
- out: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
- },
- { // secondary ignorables
- opt: opts{alt: altNonIgnorable, top: 250},
- in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
- out: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
- },
- { // tertiary ignorables, no change
- opt: opts{alt: altNonIgnorable, top: 250},
- in: ColElems{W(200), zero, W(300), zero, W(400)},
- out: ColElems{W(200), zero, W(300), zero, W(400)},
- },
- }
- func TestProcessWeights(t *testing.T) {
- for i, tt := range processTests {
- in := convertFromWeights(tt.in)
- out := convertFromWeights(tt.out)
- processWeights(tt.opt.alt, uint32(tt.opt.top), in)
- for j, w := range in {
- if w != out[j] {
- t.Errorf("%d: Weights %d was %v; want %v", i, j, w, out[j])
- }
- }
- }
- }
- type keyFromElemTest struct {
- opt opts
- in ColElems
- out []byte
- }
- var defS = byte(defaults.Secondary)
- var defT = byte(defaults.Tertiary)
- const sep = 0 // separator byte
- var keyFromElemTests = []keyFromElemTest{
- { // simple primary and secondary weights.
- opts{alt: altShifted},
- ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
- []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
- sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
- sep, sep, defT, defT, defT, defT, // tertiary
- sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
- },
- },
- { // same as first, but with zero element that need to be removed
- opts{alt: altShifted},
- ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
- []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
- sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
- sep, sep, defT, defT, defT, defT, // tertiary
- sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
- },
- },
- { // same as first, with large primary values
- opts{alt: altShifted},
- ColElems{W(0x200), W(0x8000), W(0, 0x30), W(0x12345)},
- []byte{0x2, 0, 0x80, 0x80, 0x00, 0x81, 0x23, 0x45, // primary
- sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
- sep, sep, defT, defT, defT, defT, // tertiary
- sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
- },
- },
- { // same as first, but with the secondary level backwards
- opts{alt: altShifted, backwards: true},
- ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
- []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
- sep, sep, 0, defS, 0, 0x30, 0, defS, 0, defS, // secondary
- sep, sep, defT, defT, defT, defT, // tertiary
- sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
- },
- },
- { // same as first, ignoring quaternary level
- opts{alt: altShifted, lev: 3},
- ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
- []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
- sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
- sep, sep, defT, defT, defT, defT, // tertiary
- },
- },
- { // same as first, ignoring tertiary level
- opts{alt: altShifted, lev: 2},
- ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
- []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
- sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
- },
- },
- { // same as first, ignoring secondary level
- opts{alt: altShifted, lev: 1},
- ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
- []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00},
- },
- { // simple primary and secondary weights.
- opts{alt: altShiftTrimmed, top: 0x250},
- ColElems{W(0x300), W(0x200), W(0x7FFF), W(0, 0x30), W(0x800)},
- []byte{0x3, 0, 0x7F, 0xFF, 0x8, 0x00, // primary
- sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
- sep, sep, defT, defT, defT, defT, // tertiary
- sep, 0xFF, 0x2, 0, // quaternary
- },
- },
- { // as first, primary with case level enabled
- opts{alt: altShifted, lev: 1, caseLevel: true},
- ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
- []byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
- sep, sep, // secondary
- sep, sep, defT, defT, defT, defT, // tertiary
- },
- },
- }
- func TestKeyFromElems(t *testing.T) {
- buf := Buffer{}
- for i, tt := range keyFromElemTests {
- buf.Reset()
- in := convertFromWeights(tt.in)
- processWeights(tt.opt.alt, uint32(tt.opt.top), in)
- tt.opt.collator().keyFromElems(&buf, in)
- res := buf.key
- if len(res) != len(tt.out) {
- t.Errorf("%d: len(ws) was %d; want %d (%X should be %X)", i, len(res), len(tt.out), res, tt.out)
- }
- n := len(res)
- if len(tt.out) < n {
- n = len(tt.out)
- }
- for j, c := range res[:n] {
- if c != tt.out[j] {
- t.Errorf("%d: byte %d was %X; want %X", i, j, c, tt.out[j])
- }
- }
- }
- }
- func TestGetColElems(t *testing.T) {
- for i, tt := range appendNextTests {
- c, err := makeTable(tt.in)
- if err != nil {
- // error is reported in TestAppendNext
- continue
- }
- // Create one large test per table
- str := make([]byte, 0, 4000)
- out := ColElems{}
- for len(str) < 3000 {
- for _, chk := range tt.chk {
- str = append(str, chk.in[:chk.n]...)
- out = append(out, chk.out...)
- }
- }
- for j, chk := range append(tt.chk, check{string(str), len(str), out}) {
- out := convertFromWeights(chk.out)
- ce := c.getColElems([]byte(chk.in)[:chk.n])
- if len(ce) != len(out) {
- t.Errorf("%d:%d: len(ws) was %d; want %d", i, j, len(ce), len(out))
- continue
- }
- cnt := 0
- for k, w := range ce {
- w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
- if w != out[k] {
- t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
- cnt++
- }
- if cnt > 10 {
- break
- }
- }
- }
- }
- }
- type keyTest struct {
- in string
- out []byte
- }
- var keyTests = []keyTest{
- {"abc",
- []byte{0, 100, 0, 200, 1, 44, 0, 0, 0, 32, 0, 32, 0, 32, 0, 0, 2, 2, 2, 0, 255, 255, 255},
- },
- {"a\u0301",
- []byte{0, 102, 0, 0, 0, 32, 0, 0, 2, 0, 255},
- },
- {"aaaaa",
- []byte{0, 100, 0, 100, 0, 100, 0, 100, 0, 100, 0, 0,
- 0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 0,
- 2, 2, 2, 2, 2, 0,
- 255, 255, 255, 255, 255,
- },
- },
- // Issue 16391: incomplete rune at end of UTF-8 sequence.
- {"\xc2", []byte{133, 255, 253, 0, 0, 0, 32, 0, 0, 2, 0, 255}},
- {"\xc2a", []byte{133, 255, 253, 0, 100, 0, 0, 0, 32, 0, 32, 0, 0, 2, 2, 0, 255, 255}},
- }
- func TestKey(t *testing.T) {
- c, _ := makeTable(appendNextTests[4].in)
- c.alternate = altShifted
- c.ignore = ignore(colltab.Quaternary)
- buf := Buffer{}
- keys1 := [][]byte{}
- keys2 := [][]byte{}
- for _, tt := range keyTests {
- keys1 = append(keys1, c.Key(&buf, []byte(tt.in)))
- keys2 = append(keys2, c.KeyFromString(&buf, tt.in))
- }
- // Separate generation from testing to ensure buffers are not overwritten.
- for i, tt := range keyTests {
- if !bytes.Equal(keys1[i], tt.out) {
- t.Errorf("%d: Key(%q) = %d; want %d", i, tt.in, keys1[i], tt.out)
- }
- if !bytes.Equal(keys2[i], tt.out) {
- t.Errorf("%d: KeyFromString(%q) = %d; want %d", i, tt.in, keys2[i], tt.out)
- }
- }
- }
- type compareTest struct {
- a, b string
- res int // comparison result
- }
- var compareTests = []compareTest{
- {"a\u0301", "a", 1},
- {"a\u0301b", "ab", 1},
- {"a", "a\u0301", -1},
- {"ab", "a\u0301b", -1},
- {"bc", "a\u0301c", 1},
- {"ab", "aB", -1},
- {"a\u0301", "a\u0301", 0},
- {"a", "a", 0},
- // Only clip prefixes of whole runes.
- {"\u302E", "\u302F", 1},
- // Don't clip prefixes when last rune of prefix may be part of contraction.
- {"a\u035E", "a\u0301\u035F", -1},
- {"a\u0301\u035Fb", "a\u0301\u035F", -1},
- }
- func TestCompare(t *testing.T) {
- c, _ := makeTable(appendNextTests[4].in)
- for i, tt := range compareTests {
- if res := c.Compare([]byte(tt.a), []byte(tt.b)); res != tt.res {
- t.Errorf("%d: Compare(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
- }
- if res := c.CompareString(tt.a, tt.b); res != tt.res {
- t.Errorf("%d: CompareString(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
- }
- }
- }
- func TestNumeric(t *testing.T) {
- c := New(language.English, Loose, Numeric)
- for i, tt := range []struct {
- a, b string
- want int
- }{
- {"1", "2", -1},
- {"2", "12", -1},
- {"2", "12", -1}, // Fullwidth is sorted as usual.
- {"₂", "₁₂", 1}, // Subscript is not sorted as numbers.
- {"②", "①②", 1}, // Circled is not sorted as numbers.
- { // Imperial Aramaic, is not sorted as number.
- "\U00010859",
- "\U00010858\U00010859",
- 1,
- },
- {"12", "2", 1},
- {"A-1", "A-2", -1},
- {"A-2", "A-12", -1},
- {"A-12", "A-2", 1},
- {"A-0001", "A-1", 0},
- } {
- if got := c.CompareString(tt.a, tt.b); got != tt.want {
- t.Errorf("%d: CompareString(%s, %s) = %d; want %d", i, tt.a, tt.b, got, tt.want)
- }
- }
- }
|