pattern.go 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package search
  5. import (
  6. "golang.org/x/text/internal/colltab"
  7. )
  8. // TODO: handle variable primary weights?
  9. func (p *Pattern) deleteEmptyElements() {
  10. k := 0
  11. for _, e := range p.ce {
  12. if !isIgnorable(p.m, e) {
  13. p.ce[k] = e
  14. k++
  15. }
  16. }
  17. p.ce = p.ce[:k]
  18. }
  19. func isIgnorable(m *Matcher, e colltab.Elem) bool {
  20. if e.Primary() > 0 {
  21. return false
  22. }
  23. if e.Secondary() > 0 {
  24. if !m.ignoreDiacritics {
  25. return false
  26. }
  27. // Primary value is 0 and ignoreDiacritics is true. In this case we
  28. // ignore the tertiary element, as it only pertains to the modifier.
  29. return true
  30. }
  31. // TODO: further distinguish once we have the new implementation.
  32. if !(m.ignoreWidth || m.ignoreCase) && e.Tertiary() > 0 {
  33. return false
  34. }
  35. // TODO: we ignore the Quaternary level for now.
  36. return true
  37. }
  38. // TODO: Use a Boyer-Moore-like algorithm (probably Sunday) for searching.
  39. func (p *Pattern) forwardSearch(it *colltab.Iter) (start, end int) {
  40. for start := 0; it.Next(); it.Reset(start) {
  41. nextStart := it.End()
  42. if end := p.searchOnce(it); end != -1 {
  43. return start, end
  44. }
  45. start = nextStart
  46. }
  47. return -1, -1
  48. }
  49. func (p *Pattern) anchoredForwardSearch(it *colltab.Iter) (start, end int) {
  50. if it.Next() {
  51. if end := p.searchOnce(it); end != -1 {
  52. return 0, end
  53. }
  54. }
  55. return -1, -1
  56. }
  57. // next advances to the next weight in a pattern. f must return one of the
  58. // weights of a collation element. next will advance to the first non-zero
  59. // weight and return this weight and true if it exists, or 0, false otherwise.
  60. func (p *Pattern) next(i *int, f func(colltab.Elem) int) (weight int, ok bool) {
  61. for *i < len(p.ce) {
  62. v := f(p.ce[*i])
  63. *i++
  64. if v != 0 {
  65. // Skip successive ignorable values.
  66. for ; *i < len(p.ce) && f(p.ce[*i]) == 0; *i++ {
  67. }
  68. return v, true
  69. }
  70. }
  71. return 0, false
  72. }
  73. // TODO: remove this function once Elem is internal and Tertiary returns int.
  74. func tertiary(e colltab.Elem) int {
  75. return int(e.Tertiary())
  76. }
  77. // searchOnce tries to match the pattern s.p at the text position i. s.buf needs
  78. // to be filled with collation elements of the first segment, where n is the
  79. // number of source bytes consumed for this segment. It will return the end
  80. // position of the match or -1.
  81. func (p *Pattern) searchOnce(it *colltab.Iter) (end int) {
  82. var pLevel [4]int
  83. m := p.m
  84. for {
  85. k := 0
  86. for ; k < it.N; k++ {
  87. if v := it.Elems[k].Primary(); v > 0 {
  88. if w, ok := p.next(&pLevel[0], colltab.Elem.Primary); !ok || v != w {
  89. return -1
  90. }
  91. }
  92. if !m.ignoreDiacritics {
  93. if v := it.Elems[k].Secondary(); v > 0 {
  94. if w, ok := p.next(&pLevel[1], colltab.Elem.Secondary); !ok || v != w {
  95. return -1
  96. }
  97. }
  98. } else if it.Elems[k].Primary() == 0 {
  99. // We ignore tertiary values of collation elements of the
  100. // secondary level.
  101. continue
  102. }
  103. // TODO: distinguish between case and width. This will be easier to
  104. // implement after we moved to the new collation implementation.
  105. if !m.ignoreWidth && !m.ignoreCase {
  106. if v := it.Elems[k].Tertiary(); v > 0 {
  107. if w, ok := p.next(&pLevel[2], tertiary); !ok || int(v) != w {
  108. return -1
  109. }
  110. }
  111. }
  112. // TODO: check quaternary weight
  113. }
  114. it.Discard() // Remove the current segment from the buffer.
  115. // Check for completion.
  116. switch {
  117. // If any of these cases match, we are not at the end.
  118. case pLevel[0] < len(p.ce):
  119. case !m.ignoreDiacritics && pLevel[1] < len(p.ce):
  120. case !(m.ignoreWidth || m.ignoreCase) && pLevel[2] < len(p.ce):
  121. default:
  122. // At this point, both the segment and pattern has matched fully.
  123. // However, the segment may still be have trailing modifiers.
  124. // This can be verified by another call to next.
  125. end = it.End()
  126. if it.Next() && it.Elems[0].Primary() == 0 {
  127. if !m.ignoreDiacritics {
  128. return -1
  129. }
  130. end = it.End()
  131. }
  132. return end
  133. }
  134. // Fill the buffer with the next batch of collation elements.
  135. if !it.Next() {
  136. return -1
  137. }
  138. }
  139. }