123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290 |
- // Copyright 2012 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package build
- import "testing"
- // cjk returns an implicit collation element for a CJK rune.
- func cjk(r rune) []rawCE {
- // A CJK character C is represented in the DUCET as
- // [.AAAA.0020.0002.C][.BBBB.0000.0000.C]
- // Where AAAA is the most significant 15 bits plus a base value.
- // Any base value will work for the test, so we pick the common value of FB40.
- const base = 0xFB40
- return []rawCE{
- {w: []int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)}},
- {w: []int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)}},
- }
- }
- func pCE(p int) []rawCE {
- return mkCE([]int{p, defaultSecondary, defaultTertiary, 0}, 0)
- }
- func pqCE(p, q int) []rawCE {
- return mkCE([]int{p, defaultSecondary, defaultTertiary, q}, 0)
- }
- func ptCE(p, t int) []rawCE {
- return mkCE([]int{p, defaultSecondary, t, 0}, 0)
- }
- func ptcCE(p, t int, ccc uint8) []rawCE {
- return mkCE([]int{p, defaultSecondary, t, 0}, ccc)
- }
- func sCE(s int) []rawCE {
- return mkCE([]int{0, s, defaultTertiary, 0}, 0)
- }
- func stCE(s, t int) []rawCE {
- return mkCE([]int{0, s, t, 0}, 0)
- }
- func scCE(s int, ccc uint8) []rawCE {
- return mkCE([]int{0, s, defaultTertiary, 0}, ccc)
- }
- func mkCE(w []int, ccc uint8) []rawCE {
- return []rawCE{rawCE{w, ccc}}
- }
- // ducetElem is used to define test data that is used to generate a table.
- type ducetElem struct {
- str string
- ces []rawCE
- }
- func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
- b := NewBuilder()
- for _, e := range ducet {
- ces := [][]int{}
- for _, ce := range e.ces {
- ces = append(ces, ce.w)
- }
- if err := b.Add([]rune(e.str), ces, nil); err != nil {
- t.Errorf(err.Error())
- }
- }
- b.t = &table{}
- b.root.sort()
- return b
- }
- type convertTest struct {
- in, out []rawCE
- err bool
- }
- var convLargeTests = []convertTest{
- {pCE(0xFB39), pCE(0xFB39), false},
- {cjk(0x2F9B2), pqCE(0x3F9B2, 0x2F9B2), false},
- {pCE(0xFB40), pCE(0), true},
- {append(pCE(0xFB40), pCE(0)[0]), pCE(0), true},
- {pCE(0xFFFE), pCE(illegalOffset), false},
- {pCE(0xFFFF), pCE(illegalOffset + 1), false},
- }
- func TestConvertLarge(t *testing.T) {
- for i, tt := range convLargeTests {
- e := new(entry)
- for _, ce := range tt.in {
- e.elems = append(e.elems, makeRawCE(ce.w, ce.ccc))
- }
- elems, err := convertLargeWeights(e.elems)
- if tt.err {
- if err == nil {
- t.Errorf("%d: expected error; none found", i)
- }
- continue
- } else if err != nil {
- t.Errorf("%d: unexpected error: %v", i, err)
- }
- if !equalCEArrays(elems, tt.out) {
- t.Errorf("%d: conversion was %x; want %x", i, elems, tt.out)
- }
- }
- }
- // Collation element table for simplify tests.
- var simplifyTest = []ducetElem{
- {"\u0300", sCE(30)}, // grave
- {"\u030C", sCE(40)}, // caron
- {"A", ptCE(100, 8)},
- {"D", ptCE(104, 8)},
- {"E", ptCE(105, 8)},
- {"I", ptCE(110, 8)},
- {"z", ptCE(130, 8)},
- {"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0])},
- {"\u05B7", sCE(80)},
- {"\u00C0", append(ptCE(100, 8), sCE(30)...)}, // A with grave, can be removed
- {"\u00C8", append(ptCE(105, 8), sCE(30)...)}, // E with grave
- {"\uFB1F", append(ptCE(200, 4), ptCE(200, 4)[0], sCE(80)[0])}, // eliminated by NFD
- {"\u00C8\u0302", ptCE(106, 8)}, // block previous from simplifying
- {"\u01C5", append(ptCE(104, 9), ptCE(130, 4)[0], stCE(40, maxTertiary)[0])}, // eliminated by NFKD
- // no removal: tertiary value of third element is not maxTertiary
- {"\u2162", append(ptCE(110, 9), ptCE(110, 4)[0], ptCE(110, 8)[0])},
- }
- var genColTests = []ducetElem{
- {"\uFA70", pqCE(0x1FA70, 0xFA70)},
- {"A\u0300", append(ptCE(100, 8), sCE(30)...)},
- {"A\u0300\uFA70", append(ptCE(100, 8), sCE(30)[0], pqCE(0x1FA70, 0xFA70)[0])},
- {"A\u0300A\u0300", append(ptCE(100, 8), sCE(30)[0], ptCE(100, 8)[0], sCE(30)[0])},
- }
- func TestGenColElems(t *testing.T) {
- b := newBuilder(t, simplifyTest[:5])
- for i, tt := range genColTests {
- res := b.root.genColElems(tt.str)
- if !equalCEArrays(tt.ces, res) {
- t.Errorf("%d: result %X; want %X", i, res, tt.ces)
- }
- }
- }
- type strArray []string
- func (sa strArray) contains(s string) bool {
- for _, e := range sa {
- if e == s {
- return true
- }
- }
- return false
- }
- var simplifyRemoved = strArray{"\u00C0", "\uFB1F"}
- var simplifyMarked = strArray{"\u01C5"}
- func TestSimplify(t *testing.T) {
- b := newBuilder(t, simplifyTest)
- o := &b.root
- simplify(o)
- for i, tt := range simplifyTest {
- if simplifyRemoved.contains(tt.str) {
- continue
- }
- e := o.find(tt.str)
- if e.str != tt.str || !equalCEArrays(e.elems, tt.ces) {
- t.Errorf("%d: found element %s -> %X; want %s -> %X", i, e.str, e.elems, tt.str, tt.ces)
- break
- }
- }
- var i, k int
- for e := o.front(); e != nil; e, _ = e.nextIndexed() {
- gold := simplifyMarked.contains(e.str)
- if gold {
- k++
- }
- if gold != e.decompose {
- t.Errorf("%d: %s has decompose %v; want %v", i, e.str, e.decompose, gold)
- }
- i++
- }
- if k != len(simplifyMarked) {
- t.Errorf(" an entry that should be marked as decompose was deleted")
- }
- }
- var expandTest = []ducetElem{
- {"\u0300", append(scCE(29, 230), scCE(30, 230)...)},
- {"\u00C0", append(ptCE(100, 8), scCE(30, 230)...)},
- {"\u00C8", append(ptCE(105, 8), scCE(30, 230)...)},
- {"\u00C9", append(ptCE(105, 8), scCE(30, 230)...)}, // identical expansion
- {"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])},
- {"\u01FF", append(ptCE(200, 4), ptcCE(201, 4, 0)[0], scCE(30, 230)[0])},
- }
- func TestExpand(t *testing.T) {
- const (
- totalExpansions = 5
- totalElements = 2 + 2 + 2 + 3 + 3 + totalExpansions
- )
- b := newBuilder(t, expandTest)
- o := &b.root
- b.processExpansions(o)
- e := o.front()
- for _, tt := range expandTest {
- exp := b.t.ExpandElem[e.expansionIndex:]
- if int(exp[0]) != len(tt.ces) {
- t.Errorf("%U: len(expansion)==%d; want %d", []rune(tt.str)[0], exp[0], len(tt.ces))
- }
- exp = exp[1:]
- for j, w := range tt.ces {
- if ce, _ := makeCE(w); exp[j] != ce {
- t.Errorf("%U: element %d is %X; want %X", []rune(tt.str)[0], j, exp[j], ce)
- }
- }
- e, _ = e.nextIndexed()
- }
- // Verify uniquing.
- if len(b.t.ExpandElem) != totalElements {
- t.Errorf("len(expandElem)==%d; want %d", len(b.t.ExpandElem), totalElements)
- }
- }
- var contractTest = []ducetElem{
- {"abc", pCE(102)},
- {"abd", pCE(103)},
- {"a", pCE(100)},
- {"ab", pCE(101)},
- {"ac", pCE(104)},
- {"bcd", pCE(202)},
- {"b", pCE(200)},
- {"bc", pCE(201)},
- {"bd", pCE(203)},
- // shares suffixes with a*
- {"Ab", pCE(301)},
- {"A", pCE(300)},
- {"Ac", pCE(304)},
- {"Abc", pCE(302)},
- {"Abd", pCE(303)},
- // starter to be ignored
- {"z", pCE(1000)},
- }
- func TestContract(t *testing.T) {
- const (
- totalElements = 5 + 5 + 4
- )
- b := newBuilder(t, contractTest)
- o := &b.root
- b.processContractions(o)
- indexMap := make(map[int]bool)
- handleMap := make(map[rune]*entry)
- for e := o.front(); e != nil; e, _ = e.nextIndexed() {
- if e.contractionHandle.n > 0 {
- handleMap[e.runes[0]] = e
- indexMap[e.contractionHandle.index] = true
- }
- }
- // Verify uniquing.
- if len(indexMap) != 2 {
- t.Errorf("number of tries is %d; want %d", len(indexMap), 2)
- }
- for _, tt := range contractTest {
- e, ok := handleMap[[]rune(tt.str)[0]]
- if !ok {
- continue
- }
- str := tt.str[1:]
- offset, n := lookup(&b.t.ContractTries, e.contractionHandle, []byte(str))
- if len(str) != n {
- t.Errorf("%s: bytes consumed==%d; want %d", tt.str, n, len(str))
- }
- ce := b.t.ContractElem[offset+e.contractionIndex]
- if want, _ := makeCE(tt.ces[0]); want != ce {
- t.Errorf("%s: element %X; want %X", tt.str, ce, want)
- }
- }
- if len(b.t.ContractElem) != totalElements {
- t.Errorf("len(expandElem)==%d; want %d", len(b.t.ContractElem), totalElements)
- }
- }
|