builder_test.go 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package build
  5. import "testing"
  6. // cjk returns an implicit collation element for a CJK rune.
  7. func cjk(r rune) []rawCE {
  8. // A CJK character C is represented in the DUCET as
  9. // [.AAAA.0020.0002.C][.BBBB.0000.0000.C]
  10. // Where AAAA is the most significant 15 bits plus a base value.
  11. // Any base value will work for the test, so we pick the common value of FB40.
  12. const base = 0xFB40
  13. return []rawCE{
  14. {w: []int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)}},
  15. {w: []int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)}},
  16. }
  17. }
  18. func pCE(p int) []rawCE {
  19. return mkCE([]int{p, defaultSecondary, defaultTertiary, 0}, 0)
  20. }
  21. func pqCE(p, q int) []rawCE {
  22. return mkCE([]int{p, defaultSecondary, defaultTertiary, q}, 0)
  23. }
  24. func ptCE(p, t int) []rawCE {
  25. return mkCE([]int{p, defaultSecondary, t, 0}, 0)
  26. }
  27. func ptcCE(p, t int, ccc uint8) []rawCE {
  28. return mkCE([]int{p, defaultSecondary, t, 0}, ccc)
  29. }
  30. func sCE(s int) []rawCE {
  31. return mkCE([]int{0, s, defaultTertiary, 0}, 0)
  32. }
  33. func stCE(s, t int) []rawCE {
  34. return mkCE([]int{0, s, t, 0}, 0)
  35. }
  36. func scCE(s int, ccc uint8) []rawCE {
  37. return mkCE([]int{0, s, defaultTertiary, 0}, ccc)
  38. }
  39. func mkCE(w []int, ccc uint8) []rawCE {
  40. return []rawCE{rawCE{w, ccc}}
  41. }
  42. // ducetElem is used to define test data that is used to generate a table.
  43. type ducetElem struct {
  44. str string
  45. ces []rawCE
  46. }
  47. func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
  48. b := NewBuilder()
  49. for _, e := range ducet {
  50. ces := [][]int{}
  51. for _, ce := range e.ces {
  52. ces = append(ces, ce.w)
  53. }
  54. if err := b.Add([]rune(e.str), ces, nil); err != nil {
  55. t.Errorf(err.Error())
  56. }
  57. }
  58. b.t = &table{}
  59. b.root.sort()
  60. return b
  61. }
  62. type convertTest struct {
  63. in, out []rawCE
  64. err bool
  65. }
  66. var convLargeTests = []convertTest{
  67. {pCE(0xFB39), pCE(0xFB39), false},
  68. {cjk(0x2F9B2), pqCE(0x3F9B2, 0x2F9B2), false},
  69. {pCE(0xFB40), pCE(0), true},
  70. {append(pCE(0xFB40), pCE(0)[0]), pCE(0), true},
  71. {pCE(0xFFFE), pCE(illegalOffset), false},
  72. {pCE(0xFFFF), pCE(illegalOffset + 1), false},
  73. }
  74. func TestConvertLarge(t *testing.T) {
  75. for i, tt := range convLargeTests {
  76. e := new(entry)
  77. for _, ce := range tt.in {
  78. e.elems = append(e.elems, makeRawCE(ce.w, ce.ccc))
  79. }
  80. elems, err := convertLargeWeights(e.elems)
  81. if tt.err {
  82. if err == nil {
  83. t.Errorf("%d: expected error; none found", i)
  84. }
  85. continue
  86. } else if err != nil {
  87. t.Errorf("%d: unexpected error: %v", i, err)
  88. }
  89. if !equalCEArrays(elems, tt.out) {
  90. t.Errorf("%d: conversion was %x; want %x", i, elems, tt.out)
  91. }
  92. }
  93. }
  94. // Collation element table for simplify tests.
  95. var simplifyTest = []ducetElem{
  96. {"\u0300", sCE(30)}, // grave
  97. {"\u030C", sCE(40)}, // caron
  98. {"A", ptCE(100, 8)},
  99. {"D", ptCE(104, 8)},
  100. {"E", ptCE(105, 8)},
  101. {"I", ptCE(110, 8)},
  102. {"z", ptCE(130, 8)},
  103. {"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0])},
  104. {"\u05B7", sCE(80)},
  105. {"\u00C0", append(ptCE(100, 8), sCE(30)...)}, // A with grave, can be removed
  106. {"\u00C8", append(ptCE(105, 8), sCE(30)...)}, // E with grave
  107. {"\uFB1F", append(ptCE(200, 4), ptCE(200, 4)[0], sCE(80)[0])}, // eliminated by NFD
  108. {"\u00C8\u0302", ptCE(106, 8)}, // block previous from simplifying
  109. {"\u01C5", append(ptCE(104, 9), ptCE(130, 4)[0], stCE(40, maxTertiary)[0])}, // eliminated by NFKD
  110. // no removal: tertiary value of third element is not maxTertiary
  111. {"\u2162", append(ptCE(110, 9), ptCE(110, 4)[0], ptCE(110, 8)[0])},
  112. }
  113. var genColTests = []ducetElem{
  114. {"\uFA70", pqCE(0x1FA70, 0xFA70)},
  115. {"A\u0300", append(ptCE(100, 8), sCE(30)...)},
  116. {"A\u0300\uFA70", append(ptCE(100, 8), sCE(30)[0], pqCE(0x1FA70, 0xFA70)[0])},
  117. {"A\u0300A\u0300", append(ptCE(100, 8), sCE(30)[0], ptCE(100, 8)[0], sCE(30)[0])},
  118. }
  119. func TestGenColElems(t *testing.T) {
  120. b := newBuilder(t, simplifyTest[:5])
  121. for i, tt := range genColTests {
  122. res := b.root.genColElems(tt.str)
  123. if !equalCEArrays(tt.ces, res) {
  124. t.Errorf("%d: result %X; want %X", i, res, tt.ces)
  125. }
  126. }
  127. }
  128. type strArray []string
  129. func (sa strArray) contains(s string) bool {
  130. for _, e := range sa {
  131. if e == s {
  132. return true
  133. }
  134. }
  135. return false
  136. }
  137. var simplifyRemoved = strArray{"\u00C0", "\uFB1F"}
  138. var simplifyMarked = strArray{"\u01C5"}
  139. func TestSimplify(t *testing.T) {
  140. b := newBuilder(t, simplifyTest)
  141. o := &b.root
  142. simplify(o)
  143. for i, tt := range simplifyTest {
  144. if simplifyRemoved.contains(tt.str) {
  145. continue
  146. }
  147. e := o.find(tt.str)
  148. if e.str != tt.str || !equalCEArrays(e.elems, tt.ces) {
  149. t.Errorf("%d: found element %s -> %X; want %s -> %X", i, e.str, e.elems, tt.str, tt.ces)
  150. break
  151. }
  152. }
  153. var i, k int
  154. for e := o.front(); e != nil; e, _ = e.nextIndexed() {
  155. gold := simplifyMarked.contains(e.str)
  156. if gold {
  157. k++
  158. }
  159. if gold != e.decompose {
  160. t.Errorf("%d: %s has decompose %v; want %v", i, e.str, e.decompose, gold)
  161. }
  162. i++
  163. }
  164. if k != len(simplifyMarked) {
  165. t.Errorf(" an entry that should be marked as decompose was deleted")
  166. }
  167. }
  168. var expandTest = []ducetElem{
  169. {"\u0300", append(scCE(29, 230), scCE(30, 230)...)},
  170. {"\u00C0", append(ptCE(100, 8), scCE(30, 230)...)},
  171. {"\u00C8", append(ptCE(105, 8), scCE(30, 230)...)},
  172. {"\u00C9", append(ptCE(105, 8), scCE(30, 230)...)}, // identical expansion
  173. {"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])},
  174. {"\u01FF", append(ptCE(200, 4), ptcCE(201, 4, 0)[0], scCE(30, 230)[0])},
  175. }
  176. func TestExpand(t *testing.T) {
  177. const (
  178. totalExpansions = 5
  179. totalElements = 2 + 2 + 2 + 3 + 3 + totalExpansions
  180. )
  181. b := newBuilder(t, expandTest)
  182. o := &b.root
  183. b.processExpansions(o)
  184. e := o.front()
  185. for _, tt := range expandTest {
  186. exp := b.t.ExpandElem[e.expansionIndex:]
  187. if int(exp[0]) != len(tt.ces) {
  188. t.Errorf("%U: len(expansion)==%d; want %d", []rune(tt.str)[0], exp[0], len(tt.ces))
  189. }
  190. exp = exp[1:]
  191. for j, w := range tt.ces {
  192. if ce, _ := makeCE(w); exp[j] != ce {
  193. t.Errorf("%U: element %d is %X; want %X", []rune(tt.str)[0], j, exp[j], ce)
  194. }
  195. }
  196. e, _ = e.nextIndexed()
  197. }
  198. // Verify uniquing.
  199. if len(b.t.ExpandElem) != totalElements {
  200. t.Errorf("len(expandElem)==%d; want %d", len(b.t.ExpandElem), totalElements)
  201. }
  202. }
  203. var contractTest = []ducetElem{
  204. {"abc", pCE(102)},
  205. {"abd", pCE(103)},
  206. {"a", pCE(100)},
  207. {"ab", pCE(101)},
  208. {"ac", pCE(104)},
  209. {"bcd", pCE(202)},
  210. {"b", pCE(200)},
  211. {"bc", pCE(201)},
  212. {"bd", pCE(203)},
  213. // shares suffixes with a*
  214. {"Ab", pCE(301)},
  215. {"A", pCE(300)},
  216. {"Ac", pCE(304)},
  217. {"Abc", pCE(302)},
  218. {"Abd", pCE(303)},
  219. // starter to be ignored
  220. {"z", pCE(1000)},
  221. }
  222. func TestContract(t *testing.T) {
  223. const (
  224. totalElements = 5 + 5 + 4
  225. )
  226. b := newBuilder(t, contractTest)
  227. o := &b.root
  228. b.processContractions(o)
  229. indexMap := make(map[int]bool)
  230. handleMap := make(map[rune]*entry)
  231. for e := o.front(); e != nil; e, _ = e.nextIndexed() {
  232. if e.contractionHandle.n > 0 {
  233. handleMap[e.runes[0]] = e
  234. indexMap[e.contractionHandle.index] = true
  235. }
  236. }
  237. // Verify uniquing.
  238. if len(indexMap) != 2 {
  239. t.Errorf("number of tries is %d; want %d", len(indexMap), 2)
  240. }
  241. for _, tt := range contractTest {
  242. e, ok := handleMap[[]rune(tt.str)[0]]
  243. if !ok {
  244. continue
  245. }
  246. str := tt.str[1:]
  247. offset, n := lookup(&b.t.ContractTries, e.contractionHandle, []byte(str))
  248. if len(str) != n {
  249. t.Errorf("%s: bytes consumed==%d; want %d", tt.str, n, len(str))
  250. }
  251. ce := b.t.ContractElem[offset+e.contractionIndex]
  252. if want, _ := makeCE(tt.ces[0]); want != ce {
  253. t.Errorf("%s: element %X; want %X", tt.str, ce, want)
  254. }
  255. }
  256. if len(b.t.ContractElem) != totalElements {
  257. t.Errorf("len(expandElem)==%d; want %d", len(b.t.ContractElem), totalElements)
  258. }
  259. }