trie_test.go 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. package stringx
  2. import (
  3. "testing"
  4. "github.com/stretchr/testify/assert"
  5. )
  6. func TestTrie(t *testing.T) {
  7. tests := []struct {
  8. input string
  9. output string
  10. keywords []string
  11. found bool
  12. }{
  13. {
  14. input: "日本AV演员兼电视、电影演员。苍井空AV女优是xx出道, 日本AV女优们最精彩的表演是AV演员色情表演",
  15. output: "日本****兼电视、电影演员。*****女优是xx出道, ******们最精彩的表演是******表演",
  16. keywords: []string{
  17. "AV演员",
  18. "苍井空",
  19. "AV",
  20. "日本AV女优",
  21. "AV演员色情",
  22. },
  23. found: true,
  24. },
  25. {
  26. input: "完全和谐的文本完全和谐的文本",
  27. output: "完全和谐的文本完全和谐的文本",
  28. keywords: nil,
  29. found: false,
  30. },
  31. {
  32. input: "就一个字不对",
  33. output: "就*个字不对",
  34. keywords: []string{
  35. "一",
  36. },
  37. found: true,
  38. },
  39. {
  40. input: "就一对, AV",
  41. output: "就*对, **",
  42. keywords: []string{
  43. "一",
  44. "AV",
  45. },
  46. found: true,
  47. },
  48. {
  49. input: "就一不对, AV",
  50. output: "就**对, **",
  51. keywords: []string{
  52. "一",
  53. "一不",
  54. "AV",
  55. },
  56. found: true,
  57. },
  58. {
  59. input: "就对, AV",
  60. output: "就对, **",
  61. keywords: []string{
  62. "AV",
  63. },
  64. found: true,
  65. },
  66. {
  67. input: "就对, 一不",
  68. output: "就对, **",
  69. keywords: []string{
  70. "一",
  71. "一不",
  72. },
  73. found: true,
  74. },
  75. {
  76. input: "",
  77. output: "",
  78. keywords: nil,
  79. found: false,
  80. },
  81. }
  82. trie := NewTrie([]string{
  83. "", // no hurts for empty keywords
  84. "一",
  85. "一不",
  86. "AV",
  87. "AV演员",
  88. "苍井空",
  89. "AV演员色情",
  90. "日本AV女优",
  91. })
  92. for _, test := range tests {
  93. t.Run(test.input, func(t *testing.T) {
  94. output, keywords, ok := trie.Filter(test.input)
  95. assert.Equal(t, test.found, ok)
  96. assert.Equal(t, test.output, output)
  97. assert.ElementsMatch(t, test.keywords, keywords)
  98. keywords = trie.FindKeywords(test.input)
  99. assert.ElementsMatch(t, test.keywords, keywords)
  100. })
  101. }
  102. }
  103. func TestTrieSingleWord(t *testing.T) {
  104. trie := NewTrie([]string{
  105. "闹",
  106. }, WithMask('#'))
  107. output, keywords, ok := trie.Filter("今晚真热闹")
  108. assert.ElementsMatch(t, []string{"闹"}, keywords)
  109. assert.True(t, ok)
  110. assert.Equal(t, "今晚真热#", output)
  111. }
  112. func TestTrieOverlap(t *testing.T) {
  113. trie := NewTrie([]string{
  114. "一二三四五",
  115. "二三四五六七八",
  116. }, WithMask('#'))
  117. output, keywords, ok := trie.Filter("零一二三四五六七八九十")
  118. assert.ElementsMatch(t, []string{
  119. "一二三四五",
  120. "二三四五六七八",
  121. }, keywords)
  122. assert.True(t, ok)
  123. assert.Equal(t, "零########九十", output)
  124. }
  125. func TestTrieNested(t *testing.T) {
  126. trie := NewTrie([]string{
  127. "一二三",
  128. "一二三四五",
  129. "一二三四五六七八",
  130. }, WithMask('#'))
  131. output, keywords, ok := trie.Filter("零一二三四五六七八九十")
  132. assert.ElementsMatch(t, []string{
  133. "一二三",
  134. "一二三四五",
  135. "一二三四五六七八",
  136. }, keywords)
  137. assert.True(t, ok)
  138. assert.Equal(t, "零########九十", output)
  139. }
  140. func BenchmarkTrie(b *testing.B) {
  141. b.ReportAllocs()
  142. trie := NewTrie([]string{
  143. "A",
  144. "AV",
  145. "AV演员",
  146. "苍井空",
  147. "AV演员色情",
  148. "日本AV女优",
  149. })
  150. for i := 0; i < b.N; i++ {
  151. trie.Filter("日本AV演员兼电视、电影演员。苍井空AV女优是xx出道, 日本AV女优们最精彩的表演是AV演员色情表演")
  152. }
  153. }