merge.go 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package rangetable
  5. import (
  6. "unicode"
  7. )
  8. // atEnd is used to mark a completed iteration.
  9. const atEnd = unicode.MaxRune + 1
  10. // Merge returns a new RangeTable that is the union of the given tables.
  11. // It can also be used to compact user-created RangeTables. The entries in
  12. // R16 and R32 for any given RangeTable should be sorted and non-overlapping.
  13. //
  14. // A lookup in the resulting table can be several times faster than using In
  15. // directly on the ranges. Merge is an expensive operation, however, and only
  16. // makes sense if one intends to use the result for more than a couple of
  17. // hundred lookups.
  18. func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable {
  19. rt := &unicode.RangeTable{}
  20. if len(ranges) == 0 {
  21. return rt
  22. }
  23. iter := tablesIter(make([]tableIndex, len(ranges)))
  24. for i, t := range ranges {
  25. iter[i] = tableIndex{t, 0, atEnd}
  26. if len(t.R16) > 0 {
  27. iter[i].next = rune(t.R16[0].Lo)
  28. }
  29. }
  30. if r0 := iter.next16(); r0.Stride != 0 {
  31. for {
  32. r1 := iter.next16()
  33. if r1.Stride == 0 {
  34. rt.R16 = append(rt.R16, r0)
  35. break
  36. }
  37. stride := r1.Lo - r0.Hi
  38. if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
  39. // Fully merge the next range into the previous one.
  40. r0.Hi, r0.Stride = r1.Hi, stride
  41. continue
  42. } else if stride == r0.Stride {
  43. // Move the first element of r1 to r0. This may eliminate an
  44. // entry.
  45. r0.Hi = r1.Lo
  46. r0.Stride = stride
  47. r1.Lo = r1.Lo + r1.Stride
  48. if r1.Lo > r1.Hi {
  49. continue
  50. }
  51. }
  52. rt.R16 = append(rt.R16, r0)
  53. r0 = r1
  54. }
  55. }
  56. for i, t := range ranges {
  57. iter[i] = tableIndex{t, 0, atEnd}
  58. if len(t.R32) > 0 {
  59. iter[i].next = rune(t.R32[0].Lo)
  60. }
  61. }
  62. if r0 := iter.next32(); r0.Stride != 0 {
  63. for {
  64. r1 := iter.next32()
  65. if r1.Stride == 0 {
  66. rt.R32 = append(rt.R32, r0)
  67. break
  68. }
  69. stride := r1.Lo - r0.Hi
  70. if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
  71. // Fully merge the next range into the previous one.
  72. r0.Hi, r0.Stride = r1.Hi, stride
  73. continue
  74. } else if stride == r0.Stride {
  75. // Move the first element of r1 to r0. This may eliminate an
  76. // entry.
  77. r0.Hi = r1.Lo
  78. r1.Lo = r1.Lo + r1.Stride
  79. if r1.Lo > r1.Hi {
  80. continue
  81. }
  82. }
  83. rt.R32 = append(rt.R32, r0)
  84. r0 = r1
  85. }
  86. }
  87. for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ {
  88. rt.LatinOffset = i + 1
  89. }
  90. return rt
  91. }
  92. type tableIndex struct {
  93. t *unicode.RangeTable
  94. p uint32
  95. next rune
  96. }
  97. type tablesIter []tableIndex
  98. // sortIter does an insertion sort using the next field of tableIndex. Insertion
  99. // sort is a good sorting algorithm for this case.
  100. func sortIter(t []tableIndex) {
  101. for i := range t {
  102. for j := i; j > 0 && t[j-1].next > t[j].next; j-- {
  103. t[j], t[j-1] = t[j-1], t[j]
  104. }
  105. }
  106. }
  107. // next16 finds the ranged to be added to the table. If ranges overlap between
  108. // multiple tables it clips the result to a non-overlapping range if the
  109. // elements are not fully subsumed. It returns a zero range if there are no more
  110. // ranges.
  111. func (ti tablesIter) next16() unicode.Range16 {
  112. sortIter(ti)
  113. t0 := ti[0]
  114. if t0.next == atEnd {
  115. return unicode.Range16{}
  116. }
  117. r0 := t0.t.R16[t0.p]
  118. r0.Lo = uint16(t0.next)
  119. // We restrict the Hi of the current range if it overlaps with another range.
  120. for i := range ti {
  121. tn := ti[i]
  122. // Since our tableIndices are sorted by next, we can break if the there
  123. // is no overlap. The first value of a next range can always be merged
  124. // into the current one, so we can break in case of equality as well.
  125. if rune(r0.Hi) <= tn.next {
  126. break
  127. }
  128. rn := tn.t.R16[tn.p]
  129. rn.Lo = uint16(tn.next)
  130. // Limit r0.Hi based on next ranges in list, but allow it to overlap
  131. // with ranges as long as it subsumes it.
  132. m := (rn.Lo - r0.Lo) % r0.Stride
  133. if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
  134. // Overlap, take the min of the two Hi values: for simplicity's sake
  135. // we only process one range at a time.
  136. if r0.Hi > rn.Hi {
  137. r0.Hi = rn.Hi
  138. }
  139. } else {
  140. // Not a compatible stride. Set to the last possible value before
  141. // rn.Lo, but ensure there is at least one value.
  142. if x := rn.Lo - m; r0.Lo <= x {
  143. r0.Hi = x
  144. }
  145. break
  146. }
  147. }
  148. // Update the next values for each table.
  149. for i := range ti {
  150. tn := &ti[i]
  151. if rune(r0.Hi) < tn.next {
  152. break
  153. }
  154. rn := tn.t.R16[tn.p]
  155. stride := rune(rn.Stride)
  156. tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
  157. if rune(rn.Hi) < tn.next {
  158. if tn.p++; int(tn.p) == len(tn.t.R16) {
  159. tn.next = atEnd
  160. } else {
  161. tn.next = rune(tn.t.R16[tn.p].Lo)
  162. }
  163. }
  164. }
  165. if r0.Lo == r0.Hi {
  166. r0.Stride = 1
  167. }
  168. return r0
  169. }
  170. // next32 finds the ranged to be added to the table. If ranges overlap between
  171. // multiple tables it clips the result to a non-overlapping range if the
  172. // elements are not fully subsumed. It returns a zero range if there are no more
  173. // ranges.
  174. func (ti tablesIter) next32() unicode.Range32 {
  175. sortIter(ti)
  176. t0 := ti[0]
  177. if t0.next == atEnd {
  178. return unicode.Range32{}
  179. }
  180. r0 := t0.t.R32[t0.p]
  181. r0.Lo = uint32(t0.next)
  182. // We restrict the Hi of the current range if it overlaps with another range.
  183. for i := range ti {
  184. tn := ti[i]
  185. // Since our tableIndices are sorted by next, we can break if the there
  186. // is no overlap. The first value of a next range can always be merged
  187. // into the current one, so we can break in case of equality as well.
  188. if rune(r0.Hi) <= tn.next {
  189. break
  190. }
  191. rn := tn.t.R32[tn.p]
  192. rn.Lo = uint32(tn.next)
  193. // Limit r0.Hi based on next ranges in list, but allow it to overlap
  194. // with ranges as long as it subsumes it.
  195. m := (rn.Lo - r0.Lo) % r0.Stride
  196. if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
  197. // Overlap, take the min of the two Hi values: for simplicity's sake
  198. // we only process one range at a time.
  199. if r0.Hi > rn.Hi {
  200. r0.Hi = rn.Hi
  201. }
  202. } else {
  203. // Not a compatible stride. Set to the last possible value before
  204. // rn.Lo, but ensure there is at least one value.
  205. if x := rn.Lo - m; r0.Lo <= x {
  206. r0.Hi = x
  207. }
  208. break
  209. }
  210. }
  211. // Update the next values for each table.
  212. for i := range ti {
  213. tn := &ti[i]
  214. if rune(r0.Hi) < tn.next {
  215. break
  216. }
  217. rn := tn.t.R32[tn.p]
  218. stride := rune(rn.Stride)
  219. tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
  220. if rune(rn.Hi) < tn.next {
  221. if tn.p++; int(tn.p) == len(tn.t.R32) {
  222. tn.next = atEnd
  223. } else {
  224. tn.next = rune(tn.t.R32[tn.p].Lo)
  225. }
  226. }
  227. }
  228. if r0.Lo == r0.Hi {
  229. r0.Stride = 1
  230. }
  231. return r0
  232. }