table_test.go 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package collate
  5. import (
  6. "testing"
  7. "golang.org/x/text/collate/build"
  8. "golang.org/x/text/internal/colltab"
  9. "golang.org/x/text/unicode/norm"
  10. )
  11. type ColElems []Weights
  12. type input struct {
  13. str string
  14. ces [][]int
  15. }
  16. type check struct {
  17. in string
  18. n int
  19. out ColElems
  20. }
  21. type tableTest struct {
  22. in []input
  23. chk []check
  24. }
  25. func w(ce ...int) Weights {
  26. return W(ce...)
  27. }
  28. var defaults = w(0)
  29. func pt(p, t int) []int {
  30. return []int{p, defaults.Secondary, t}
  31. }
  32. func makeTable(in []input) (*Collator, error) {
  33. b := build.NewBuilder()
  34. for _, r := range in {
  35. if e := b.Add([]rune(r.str), r.ces, nil); e != nil {
  36. panic(e)
  37. }
  38. }
  39. t, err := b.Build()
  40. if err != nil {
  41. return nil, err
  42. }
  43. return NewFromTable(t), nil
  44. }
  45. // modSeq holds a seqeunce of modifiers in increasing order of CCC long enough
  46. // to cause a segment overflow if not handled correctly. The last rune in this
  47. // list has a CCC of 214.
  48. var modSeq = []rune{
  49. 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BB,
  50. 0x05BC, 0x05BD, 0x05BF, 0x05C1, 0x05C2, 0xFB1E, 0x064B, 0x064C, 0x064D, 0x064E,
  51. 0x064F, 0x0650, 0x0651, 0x0652, 0x0670, 0x0711, 0x0C55, 0x0C56, 0x0E38, 0x0E48,
  52. 0x0EB8, 0x0EC8, 0x0F71, 0x0F72, 0x0F74, 0x0321, 0x1DCE,
  53. }
  54. var mods []input
  55. var modW = func() ColElems {
  56. ws := ColElems{}
  57. for _, r := range modSeq {
  58. rune := norm.NFC.PropertiesString(string(r))
  59. ws = append(ws, w(0, int(rune.CCC())))
  60. mods = append(mods, input{string(r), [][]int{{0, int(rune.CCC())}}})
  61. }
  62. return ws
  63. }()
  64. var appendNextTests = []tableTest{
  65. { // test getWeights
  66. []input{
  67. {"a", [][]int{{100}}},
  68. {"b", [][]int{{105}}},
  69. {"c", [][]int{{110}}},
  70. {"ß", [][]int{{120}}},
  71. },
  72. []check{
  73. {"a", 1, ColElems{w(100)}},
  74. {"b", 1, ColElems{w(105)}},
  75. {"c", 1, ColElems{w(110)}},
  76. {"d", 1, ColElems{w(0x50064)}},
  77. {"ab", 1, ColElems{w(100)}},
  78. {"bc", 1, ColElems{w(105)}},
  79. {"dd", 1, ColElems{w(0x50064)}},
  80. {"ß", 2, ColElems{w(120)}},
  81. },
  82. },
  83. { // test expansion
  84. []input{
  85. {"u", [][]int{{100}}},
  86. {"U", [][]int{{100}, {0, 25}}},
  87. {"w", [][]int{{100}, {100}}},
  88. {"W", [][]int{{100}, {0, 25}, {100}, {0, 25}}},
  89. },
  90. []check{
  91. {"u", 1, ColElems{w(100)}},
  92. {"U", 1, ColElems{w(100), w(0, 25)}},
  93. {"w", 1, ColElems{w(100), w(100)}},
  94. {"W", 1, ColElems{w(100), w(0, 25), w(100), w(0, 25)}},
  95. },
  96. },
  97. { // test decompose
  98. []input{
  99. {"D", [][]int{pt(104, 8)}},
  100. {"z", [][]int{pt(130, 8)}},
  101. {"\u030C", [][]int{{0, 40}}}, // Caron
  102. {"\u01C5", [][]int{pt(104, 9), pt(130, 4), {0, 40, 0x1F}}}, // Dž = D+z+caron
  103. },
  104. []check{
  105. {"\u01C5", 2, ColElems{w(pt(104, 9)...), w(pt(130, 4)...), w(0, 40, 0x1F)}},
  106. },
  107. },
  108. { // test basic contraction
  109. []input{
  110. {"a", [][]int{{100}}},
  111. {"ab", [][]int{{101}}},
  112. {"aab", [][]int{{101}, {101}}},
  113. {"abc", [][]int{{102}}},
  114. {"b", [][]int{{200}}},
  115. {"c", [][]int{{300}}},
  116. {"d", [][]int{{400}}},
  117. },
  118. []check{
  119. {"a", 1, ColElems{w(100)}},
  120. {"aa", 1, ColElems{w(100)}},
  121. {"aac", 1, ColElems{w(100)}},
  122. {"d", 1, ColElems{w(400)}},
  123. {"ab", 2, ColElems{w(101)}},
  124. {"abb", 2, ColElems{w(101)}},
  125. {"aab", 3, ColElems{w(101), w(101)}},
  126. {"aaba", 3, ColElems{w(101), w(101)}},
  127. {"abc", 3, ColElems{w(102)}},
  128. {"abcd", 3, ColElems{w(102)}},
  129. },
  130. },
  131. { // test discontinuous contraction
  132. append(mods, []input{
  133. // modifiers; secondary weight equals ccc
  134. {"\u0316", [][]int{{0, 220}}},
  135. {"\u0317", [][]int{{0, 220}, {0, 220}}},
  136. {"\u302D", [][]int{{0, 222}}},
  137. {"\u302E", [][]int{{0, 225}}}, // used as starter
  138. {"\u302F", [][]int{{0, 224}}}, // used as starter
  139. {"\u18A9", [][]int{{0, 228}}},
  140. {"\u0300", [][]int{{0, 230}}},
  141. {"\u0301", [][]int{{0, 230}}},
  142. {"\u0315", [][]int{{0, 232}}},
  143. {"\u031A", [][]int{{0, 232}}},
  144. {"\u035C", [][]int{{0, 233}}},
  145. {"\u035F", [][]int{{0, 233}}},
  146. {"\u035D", [][]int{{0, 234}}},
  147. {"\u035E", [][]int{{0, 234}}},
  148. {"\u0345", [][]int{{0, 240}}},
  149. // starters
  150. {"a", [][]int{{100}}},
  151. {"b", [][]int{{200}}},
  152. {"c", [][]int{{300}}},
  153. {"\u03B1", [][]int{{900}}},
  154. {"\x01", [][]int{{0, 0, 0, 0}}},
  155. // contractions
  156. {"a\u0300", [][]int{{101}}},
  157. {"a\u0301", [][]int{{102}}},
  158. {"a\u035E", [][]int{{110}}},
  159. {"a\u035Eb\u035E", [][]int{{115}}},
  160. {"ac\u035Eaca\u035E", [][]int{{116}}},
  161. {"a\u035Db\u035D", [][]int{{117}}},
  162. {"a\u0301\u035Db", [][]int{{120}}},
  163. {"a\u0301\u035F", [][]int{{121}}},
  164. {"a\u0301\u035Fb", [][]int{{119}}},
  165. {"\u03B1\u0345", [][]int{{901}, {902}}},
  166. {"\u302E\u302F", [][]int{{0, 131}, {0, 131}}},
  167. {"\u302F\u18A9", [][]int{{0, 130}}},
  168. }...),
  169. []check{
  170. {"a\x01\u0300", 1, ColElems{w(100)}},
  171. {"ab", 1, ColElems{w(100)}}, // closing segment
  172. {"a\u0316\u0300b", 5, ColElems{w(101), w(0, 220)}}, // closing segment
  173. {"a\u0316\u0300", 5, ColElems{w(101), w(0, 220)}}, // no closing segment
  174. {"a\u0316\u0300\u035Cb", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
  175. {"a\u0316\u0300\u035C", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
  176. {"a\u0316\u0301b", 5, ColElems{w(102), w(0, 220)}}, // closing segment
  177. {"a\u0316\u0301", 5, ColElems{w(102), w(0, 220)}}, // no closing segment
  178. {"a\u0316\u0301\u035Cb", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
  179. {"a\u0316\u0301\u035C", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
  180. // match blocked by modifier with same ccc
  181. {"a\u0301\u0315\u031A\u035Fb", 3, ColElems{w(102)}},
  182. // multiple gaps
  183. {"a\u0301\u035Db", 6, ColElems{w(120)}},
  184. {"a\u0301\u035F", 5, ColElems{w(121)}},
  185. {"a\u0301\u035Fb", 6, ColElems{w(119)}},
  186. {"a\u0316\u0301\u035F", 7, ColElems{w(121), w(0, 220)}},
  187. {"a\u0301\u0315\u035Fb", 7, ColElems{w(121), w(0, 232)}},
  188. {"a\u0316\u0301\u0315\u035Db", 5, ColElems{w(102), w(0, 220)}},
  189. {"a\u0316\u0301\u0315\u035F", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
  190. {"a\u0316\u0301\u0315\u035Fb", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
  191. {"a\u0316\u0301\u0315\u035F\u035D", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
  192. {"a\u0316\u0301\u0315\u035F\u035Db", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
  193. // handling of segment overflow
  194. { // just fits within segment
  195. "a" + string(modSeq[:30]) + "\u0301",
  196. 3 + len(string(modSeq[:30])),
  197. append(ColElems{w(102)}, modW[:30]...),
  198. },
  199. {"a" + string(modSeq[:31]) + "\u0301", 1, ColElems{w(100)}}, // overflow
  200. {"a" + string(modSeq) + "\u0301", 1, ColElems{w(100)}},
  201. { // just fits within segment with two interstitial runes
  202. "a" + string(modSeq[:28]) + "\u0301\u0315\u035F",
  203. 7 + len(string(modSeq[:28])),
  204. append(append(ColElems{w(121)}, modW[:28]...), w(0, 232)),
  205. },
  206. { // second half does not fit within segment
  207. "a" + string(modSeq[:29]) + "\u0301\u0315\u035F",
  208. 3 + len(string(modSeq[:29])),
  209. append(ColElems{w(102)}, modW[:29]...),
  210. },
  211. // discontinuity can only occur in last normalization segment
  212. {"a\u035Eb\u035E", 6, ColElems{w(115)}},
  213. {"a\u0316\u035Eb\u035E", 5, ColElems{w(110), w(0, 220)}},
  214. {"a\u035Db\u035D", 6, ColElems{w(117)}},
  215. {"a\u0316\u035Db\u035D", 1, ColElems{w(100)}},
  216. {"a\u035Eb\u0316\u035E", 8, ColElems{w(115), w(0, 220)}},
  217. {"a\u035Db\u0316\u035D", 8, ColElems{w(117), w(0, 220)}},
  218. {"ac\u035Eaca\u035E", 9, ColElems{w(116)}},
  219. {"a\u0316c\u035Eaca\u035E", 1, ColElems{w(100)}},
  220. {"ac\u035Eac\u0316a\u035E", 1, ColElems{w(100)}},
  221. // expanding contraction
  222. {"\u03B1\u0345", 4, ColElems{w(901), w(902)}},
  223. // Theoretical possibilities
  224. // contraction within a gap
  225. {"a\u302F\u18A9\u0301", 9, ColElems{w(102), w(0, 130)}},
  226. // expansion within a gap
  227. {"a\u0317\u0301", 5, ColElems{w(102), w(0, 220), w(0, 220)}},
  228. // repeating CCC blocks last modifier
  229. {"a\u302E\u302F\u0301", 1, ColElems{w(100)}},
  230. // The trailing combining characters (with lower CCC) should block the first one.
  231. // TODO: make the following pass.
  232. // {"a\u035E\u0316\u0316", 1, ColElems{w(100)}},
  233. {"a\u035F\u035Eb", 5, ColElems{w(110), w(0, 233)}},
  234. // Last combiner should match after normalization.
  235. // TODO: make the following pass.
  236. // {"a\u035D\u0301", 3, ColElems{w(102), w(0, 234)}},
  237. // The first combiner is blocking the second one as they have the same CCC.
  238. {"a\u035D\u035Eb", 1, ColElems{w(100)}},
  239. },
  240. },
  241. }
  242. func TestAppendNext(t *testing.T) {
  243. for i, tt := range appendNextTests {
  244. c, err := makeTable(tt.in)
  245. if err != nil {
  246. t.Errorf("%d: error creating table: %v", i, err)
  247. continue
  248. }
  249. for j, chk := range tt.chk {
  250. ws, n := c.t.AppendNext(nil, []byte(chk.in))
  251. if n != chk.n {
  252. t.Errorf("%d:%d: bytes consumed was %d; want %d", i, j, n, chk.n)
  253. }
  254. out := convertFromWeights(chk.out)
  255. if len(ws) != len(out) {
  256. t.Errorf("%d:%d: len(ws) was %d; want %d (%X vs %X)\n%X", i, j, len(ws), len(out), ws, out, chk.in)
  257. continue
  258. }
  259. for k, w := range ws {
  260. w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
  261. if w != out[k] {
  262. t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
  263. }
  264. }
  265. }
  266. }
  267. }