merge_test.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package rangetable
  5. import (
  6. "testing"
  7. "unicode"
  8. )
  9. var (
  10. maxRuneTable = &unicode.RangeTable{
  11. R32: []unicode.Range32{
  12. {unicode.MaxRune, unicode.MaxRune, 1},
  13. },
  14. }
  15. overlap1 = &unicode.RangeTable{
  16. R16: []unicode.Range16{
  17. {0x100, 0xfffc, 4},
  18. },
  19. R32: []unicode.Range32{
  20. {0x100000, 0x10fffc, 4},
  21. },
  22. }
  23. overlap2 = &unicode.RangeTable{
  24. R16: []unicode.Range16{
  25. {0x101, 0xfffd, 4},
  26. },
  27. R32: []unicode.Range32{
  28. {0x100001, 0x10fffd, 3},
  29. },
  30. }
  31. // The following table should be compacted into two entries for R16 and R32.
  32. optimize = &unicode.RangeTable{
  33. R16: []unicode.Range16{
  34. {0x1, 0x1, 1},
  35. {0x2, 0x2, 1},
  36. {0x3, 0x3, 1},
  37. {0x5, 0x5, 1},
  38. {0x7, 0x7, 1},
  39. {0x9, 0x9, 1},
  40. {0xb, 0xf, 2},
  41. },
  42. R32: []unicode.Range32{
  43. {0x10001, 0x10001, 1},
  44. {0x10002, 0x10002, 1},
  45. {0x10003, 0x10003, 1},
  46. {0x10005, 0x10005, 1},
  47. {0x10007, 0x10007, 1},
  48. {0x10009, 0x10009, 1},
  49. {0x1000b, 0x1000f, 2},
  50. },
  51. }
  52. )
  53. func TestMerge(t *testing.T) {
  54. for i, tt := range [][]*unicode.RangeTable{
  55. {unicode.Cc, unicode.Cf},
  56. {unicode.L, unicode.Ll},
  57. {unicode.L, unicode.Ll, unicode.Lu},
  58. {unicode.Ll, unicode.Lu},
  59. {unicode.M},
  60. unicode.GraphicRanges,
  61. cased,
  62. // Merge R16 only and R32 only and vice versa.
  63. {unicode.Khmer, unicode.Khudawadi},
  64. {unicode.Imperial_Aramaic, unicode.Radical},
  65. // Merge with empty.
  66. {&unicode.RangeTable{}},
  67. {&unicode.RangeTable{}, &unicode.RangeTable{}},
  68. {&unicode.RangeTable{}, &unicode.RangeTable{}, &unicode.RangeTable{}},
  69. {&unicode.RangeTable{}, unicode.Hiragana},
  70. {unicode.Inherited, &unicode.RangeTable{}},
  71. {&unicode.RangeTable{}, unicode.Hanunoo, &unicode.RangeTable{}},
  72. // Hypothetical tables.
  73. {maxRuneTable},
  74. {overlap1, overlap2},
  75. // Optimization
  76. {optimize},
  77. } {
  78. rt := Merge(tt...)
  79. for r := rune(0); r <= unicode.MaxRune; r++ {
  80. if got, want := unicode.Is(rt, r), unicode.In(r, tt...); got != want {
  81. t.Fatalf("%d:%U: got %v; want %v", i, r, got, want)
  82. }
  83. }
  84. // Test optimization and correctness for R16.
  85. for k := 0; k < len(rt.R16)-1; k++ {
  86. if lo, hi := rt.R16[k].Lo, rt.R16[k].Hi; lo > hi {
  87. t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
  88. }
  89. if hi, lo := rt.R16[k].Hi, rt.R16[k+1].Lo; hi >= lo {
  90. t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
  91. }
  92. if rt.R16[k].Hi+rt.R16[k].Stride == rt.R16[k+1].Lo {
  93. t.Errorf("%d: missed optimization for R16 at %d between %X and %x",
  94. i, k, rt.R16[k], rt.R16[k+1])
  95. }
  96. }
  97. // Test optimization and correctness for R32.
  98. for k := 0; k < len(rt.R32)-1; k++ {
  99. if lo, hi := rt.R32[k].Lo, rt.R32[k].Hi; lo > hi {
  100. t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
  101. }
  102. if hi, lo := rt.R32[k].Hi, rt.R32[k+1].Lo; hi >= lo {
  103. t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
  104. }
  105. if rt.R32[k].Hi+rt.R32[k].Stride == rt.R32[k+1].Lo {
  106. t.Errorf("%d: missed optimization for R32 at %d between %X and %X",
  107. i, k, rt.R32[k], rt.R32[k+1])
  108. }
  109. }
  110. }
  111. }
  112. const runes = "Hello World in 2015!,\U0010fffd"
  113. func BenchmarkNotMerged(t *testing.B) {
  114. for i := 0; i < t.N; i++ {
  115. for _, r := range runes {
  116. unicode.In(r, unicode.GraphicRanges...)
  117. }
  118. }
  119. }
  120. func BenchmarkMerged(t *testing.B) {
  121. rt := Merge(unicode.GraphicRanges...)
  122. for i := 0; i < t.N; i++ {
  123. for _, r := range runes {
  124. unicode.Is(rt, r)
  125. }
  126. }
  127. }
  128. var cased = []*unicode.RangeTable{
  129. unicode.Lower,
  130. unicode.Upper,
  131. unicode.Title,
  132. unicode.Other_Lowercase,
  133. unicode.Other_Uppercase,
  134. }
  135. func BenchmarkNotMergedCased(t *testing.B) {
  136. for i := 0; i < t.N; i++ {
  137. for _, r := range runes {
  138. unicode.In(r, cased...)
  139. }
  140. }
  141. }
  142. func BenchmarkMergedCased(t *testing.B) {
  143. // This reduces len(R16) from 243 to 82 and len(R32) from 65 to 35 for
  144. // Unicode 7.0.0.
  145. rt := Merge(cased...)
  146. for i := 0; i < t.N; i++ {
  147. for _, r := range runes {
  148. unicode.Is(rt, r)
  149. }
  150. }
  151. }
  152. func BenchmarkInit(t *testing.B) {
  153. for i := 0; i < t.N; i++ {
  154. Merge(cased...)
  155. Merge(unicode.GraphicRanges...)
  156. }
  157. }
  158. func BenchmarkInit2(t *testing.B) {
  159. // Hypothetical near-worst-case performance.
  160. for i := 0; i < t.N; i++ {
  161. Merge(overlap1, overlap2)
  162. }
  163. }