order.go 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package build
  5. import (
  6. "fmt"
  7. "log"
  8. "sort"
  9. "strings"
  10. "unicode"
  11. "golang.org/x/text/internal/colltab"
  12. "golang.org/x/text/unicode/norm"
  13. )
  14. type logicalAnchor int
  15. const (
  16. firstAnchor logicalAnchor = -1
  17. noAnchor = 0
  18. lastAnchor = 1
  19. )
  20. // entry is used to keep track of a single entry in the collation element table
  21. // during building. Examples of entries can be found in the Default Unicode
  22. // Collation Element Table.
  23. // See https://www.unicode.org/Public/UCA/6.0.0/allkeys.txt.
  24. type entry struct {
  25. str string // same as string(runes)
  26. runes []rune
  27. elems []rawCE // the collation elements
  28. extend string // weights of extend to be appended to elems
  29. before bool // weights relative to next instead of previous.
  30. lock bool // entry is used in extension and can no longer be moved.
  31. // prev, next, and level are used to keep track of tailorings.
  32. prev, next *entry
  33. level colltab.Level // next differs at this level
  34. skipRemove bool // do not unlink when removed
  35. decompose bool // can use NFKD decomposition to generate elems
  36. exclude bool // do not include in table
  37. implicit bool // derived, is not included in the list
  38. modified bool // entry was modified in tailoring
  39. logical logicalAnchor
  40. expansionIndex int // used to store index into expansion table
  41. contractionHandle ctHandle
  42. contractionIndex int // index into contraction elements
  43. }
  44. func (e *entry) String() string {
  45. return fmt.Sprintf("%X (%q) -> %X (ch:%x; ci:%d, ei:%d)",
  46. e.runes, e.str, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
  47. }
  48. func (e *entry) skip() bool {
  49. return e.contraction()
  50. }
  51. func (e *entry) expansion() bool {
  52. return !e.decompose && len(e.elems) > 1
  53. }
  54. func (e *entry) contraction() bool {
  55. return len(e.runes) > 1
  56. }
  57. func (e *entry) contractionStarter() bool {
  58. return e.contractionHandle.n != 0
  59. }
  60. // nextIndexed gets the next entry that needs to be stored in the table.
  61. // It returns the entry and the collation level at which the next entry differs
  62. // from the current entry.
  63. // Entries that can be explicitly derived and logical reset positions are
  64. // examples of entries that will not be indexed.
  65. func (e *entry) nextIndexed() (*entry, colltab.Level) {
  66. level := e.level
  67. for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next {
  68. if e.level < level {
  69. level = e.level
  70. }
  71. }
  72. return e, level
  73. }
  74. // remove unlinks entry e from the sorted chain and clears the collation
  75. // elements. e may not be at the front or end of the list. This should always
  76. // be the case, as the front and end of the list are always logical anchors,
  77. // which may not be removed.
  78. func (e *entry) remove() {
  79. if e.logical != noAnchor {
  80. log.Fatalf("may not remove anchor %q", e.str)
  81. }
  82. // TODO: need to set e.prev.level to e.level if e.level is smaller?
  83. e.elems = nil
  84. if !e.skipRemove {
  85. if e.prev != nil {
  86. e.prev.next = e.next
  87. }
  88. if e.next != nil {
  89. e.next.prev = e.prev
  90. }
  91. }
  92. e.skipRemove = false
  93. }
  94. // insertAfter inserts n after e.
  95. func (e *entry) insertAfter(n *entry) {
  96. if e == n {
  97. panic("e == anchor")
  98. }
  99. if e == nil {
  100. panic("unexpected nil anchor")
  101. }
  102. n.remove()
  103. n.decompose = false // redo decomposition test
  104. n.next = e.next
  105. n.prev = e
  106. if e.next != nil {
  107. e.next.prev = n
  108. }
  109. e.next = n
  110. }
  111. // insertBefore inserts n before e.
  112. func (e *entry) insertBefore(n *entry) {
  113. if e == n {
  114. panic("e == anchor")
  115. }
  116. if e == nil {
  117. panic("unexpected nil anchor")
  118. }
  119. n.remove()
  120. n.decompose = false // redo decomposition test
  121. n.prev = e.prev
  122. n.next = e
  123. if e.prev != nil {
  124. e.prev.next = n
  125. }
  126. e.prev = n
  127. }
  128. func (e *entry) encodeBase() (ce uint32, err error) {
  129. switch {
  130. case e.expansion():
  131. ce, err = makeExpandIndex(e.expansionIndex)
  132. default:
  133. if e.decompose {
  134. log.Fatal("decompose should be handled elsewhere")
  135. }
  136. ce, err = makeCE(e.elems[0])
  137. }
  138. return
  139. }
  140. func (e *entry) encode() (ce uint32, err error) {
  141. if e.skip() {
  142. log.Fatal("cannot build colElem for entry that should be skipped")
  143. }
  144. switch {
  145. case e.decompose:
  146. t1 := e.elems[0].w[2]
  147. t2 := 0
  148. if len(e.elems) > 1 {
  149. t2 = e.elems[1].w[2]
  150. }
  151. ce, err = makeDecompose(t1, t2)
  152. case e.contractionStarter():
  153. ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex)
  154. default:
  155. if len(e.runes) > 1 {
  156. log.Fatal("colElem: contractions are handled in contraction trie")
  157. }
  158. ce, err = e.encodeBase()
  159. }
  160. return
  161. }
  162. // entryLess returns true if a sorts before b and false otherwise.
  163. func entryLess(a, b *entry) bool {
  164. if res, _ := compareWeights(a.elems, b.elems); res != 0 {
  165. return res == -1
  166. }
  167. if a.logical != noAnchor {
  168. return a.logical == firstAnchor
  169. }
  170. if b.logical != noAnchor {
  171. return b.logical == lastAnchor
  172. }
  173. return a.str < b.str
  174. }
  175. type sortedEntries []*entry
  176. func (s sortedEntries) Len() int {
  177. return len(s)
  178. }
  179. func (s sortedEntries) Swap(i, j int) {
  180. s[i], s[j] = s[j], s[i]
  181. }
  182. func (s sortedEntries) Less(i, j int) bool {
  183. return entryLess(s[i], s[j])
  184. }
  185. type ordering struct {
  186. id string
  187. entryMap map[string]*entry
  188. ordered []*entry
  189. handle *trieHandle
  190. }
  191. // insert inserts e into both entryMap and ordered.
  192. // Note that insert simply appends e to ordered. To reattain a sorted
  193. // order, o.sort() should be called.
  194. func (o *ordering) insert(e *entry) {
  195. if e.logical == noAnchor {
  196. o.entryMap[e.str] = e
  197. } else {
  198. // Use key format as used in UCA rules.
  199. o.entryMap[fmt.Sprintf("[%s]", e.str)] = e
  200. // Also add index entry for XML format.
  201. o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(e.str, " ", "_", -1))] = e
  202. }
  203. o.ordered = append(o.ordered, e)
  204. }
  205. // newEntry creates a new entry for the given info and inserts it into
  206. // the index.
  207. func (o *ordering) newEntry(s string, ces []rawCE) *entry {
  208. e := &entry{
  209. runes: []rune(s),
  210. elems: ces,
  211. str: s,
  212. }
  213. o.insert(e)
  214. return e
  215. }
  216. // find looks up and returns the entry for the given string.
  217. // It returns nil if str is not in the index and if an implicit value
  218. // cannot be derived, that is, if str represents more than one rune.
  219. func (o *ordering) find(str string) *entry {
  220. e := o.entryMap[str]
  221. if e == nil {
  222. r := []rune(str)
  223. if len(r) == 1 {
  224. const (
  225. firstHangul = 0xAC00
  226. lastHangul = 0xD7A3
  227. )
  228. if r[0] >= firstHangul && r[0] <= lastHangul {
  229. ce := []rawCE{}
  230. nfd := norm.NFD.String(str)
  231. for _, r := range nfd {
  232. ce = append(ce, o.find(string(r)).elems...)
  233. }
  234. e = o.newEntry(nfd, ce)
  235. } else {
  236. e = o.newEntry(string(r[0]), []rawCE{
  237. {w: []int{
  238. implicitPrimary(r[0]),
  239. defaultSecondary,
  240. defaultTertiary,
  241. int(r[0]),
  242. },
  243. },
  244. })
  245. e.modified = true
  246. }
  247. e.exclude = true // do not index implicits
  248. }
  249. }
  250. return e
  251. }
  252. // makeRootOrdering returns a newly initialized ordering value and populates
  253. // it with a set of logical reset points that can be used as anchors.
  254. // The anchors first_tertiary_ignorable and __END__ will always sort at
  255. // the beginning and end, respectively. This means that prev and next are non-nil
  256. // for any indexed entry.
  257. func makeRootOrdering() ordering {
  258. const max = unicode.MaxRune
  259. o := ordering{
  260. entryMap: make(map[string]*entry),
  261. }
  262. insert := func(typ logicalAnchor, s string, ce []int) {
  263. e := &entry{
  264. elems: []rawCE{{w: ce}},
  265. str: s,
  266. exclude: true,
  267. logical: typ,
  268. }
  269. o.insert(e)
  270. }
  271. insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0})
  272. insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max})
  273. insert(lastAnchor, "last primary ignorable", []int{0, defaultSecondary, defaultTertiary, max})
  274. insert(lastAnchor, "last non ignorable", []int{maxPrimary, defaultSecondary, defaultTertiary, max})
  275. insert(lastAnchor, "__END__", []int{1 << maxPrimaryBits, defaultSecondary, defaultTertiary, max})
  276. return o
  277. }
  278. // patchForInsert eleminates entries from the list with more than one collation element.
  279. // The next and prev fields of the eliminated entries still point to appropriate
  280. // values in the newly created list.
  281. // It requires that sort has been called.
  282. func (o *ordering) patchForInsert() {
  283. for i := 0; i < len(o.ordered)-1; {
  284. e := o.ordered[i]
  285. lev := e.level
  286. n := e.next
  287. for ; n != nil && len(n.elems) > 1; n = n.next {
  288. if n.level < lev {
  289. lev = n.level
  290. }
  291. n.skipRemove = true
  292. }
  293. for ; o.ordered[i] != n; i++ {
  294. o.ordered[i].level = lev
  295. o.ordered[i].next = n
  296. o.ordered[i+1].prev = e
  297. }
  298. }
  299. }
  300. // clone copies all ordering of es into a new ordering value.
  301. func (o *ordering) clone() *ordering {
  302. o.sort()
  303. oo := ordering{
  304. entryMap: make(map[string]*entry),
  305. }
  306. for _, e := range o.ordered {
  307. ne := &entry{
  308. runes: e.runes,
  309. elems: e.elems,
  310. str: e.str,
  311. decompose: e.decompose,
  312. exclude: e.exclude,
  313. logical: e.logical,
  314. }
  315. oo.insert(ne)
  316. }
  317. oo.sort() // link all ordering.
  318. oo.patchForInsert()
  319. return &oo
  320. }
  321. // front returns the first entry to be indexed.
  322. // It assumes that sort() has been called.
  323. func (o *ordering) front() *entry {
  324. e := o.ordered[0]
  325. if e.prev != nil {
  326. log.Panicf("unexpected first entry: %v", e)
  327. }
  328. // The first entry is always a logical position, which should not be indexed.
  329. e, _ = e.nextIndexed()
  330. return e
  331. }
  332. // sort sorts all ordering based on their collation elements and initializes
  333. // the prev, next, and level fields accordingly.
  334. func (o *ordering) sort() {
  335. sort.Sort(sortedEntries(o.ordered))
  336. l := o.ordered
  337. for i := 1; i < len(l); i++ {
  338. k := i - 1
  339. l[k].next = l[i]
  340. _, l[k].level = compareWeights(l[k].elems, l[i].elems)
  341. l[i].prev = l[k]
  342. }
  343. }
  344. // genColElems generates a collation element array from the runes in str. This
  345. // assumes that all collation elements have already been added to the Builder.
  346. func (o *ordering) genColElems(str string) []rawCE {
  347. elems := []rawCE{}
  348. for _, r := range []rune(str) {
  349. for _, ce := range o.find(string(r)).elems {
  350. if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 {
  351. elems = append(elems, ce)
  352. }
  353. }
  354. }
  355. return elems
  356. }