123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393 |
- // Copyright 2012 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package build
- import (
- "fmt"
- "log"
- "sort"
- "strings"
- "unicode"
- "golang.org/x/text/internal/colltab"
- "golang.org/x/text/unicode/norm"
- )
- type logicalAnchor int
- const (
- firstAnchor logicalAnchor = -1
- noAnchor = 0
- lastAnchor = 1
- )
- // entry is used to keep track of a single entry in the collation element table
- // during building. Examples of entries can be found in the Default Unicode
- // Collation Element Table.
- // See https://www.unicode.org/Public/UCA/6.0.0/allkeys.txt.
- type entry struct {
- str string // same as string(runes)
- runes []rune
- elems []rawCE // the collation elements
- extend string // weights of extend to be appended to elems
- before bool // weights relative to next instead of previous.
- lock bool // entry is used in extension and can no longer be moved.
- // prev, next, and level are used to keep track of tailorings.
- prev, next *entry
- level colltab.Level // next differs at this level
- skipRemove bool // do not unlink when removed
- decompose bool // can use NFKD decomposition to generate elems
- exclude bool // do not include in table
- implicit bool // derived, is not included in the list
- modified bool // entry was modified in tailoring
- logical logicalAnchor
- expansionIndex int // used to store index into expansion table
- contractionHandle ctHandle
- contractionIndex int // index into contraction elements
- }
- func (e *entry) String() string {
- return fmt.Sprintf("%X (%q) -> %X (ch:%x; ci:%d, ei:%d)",
- e.runes, e.str, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
- }
- func (e *entry) skip() bool {
- return e.contraction()
- }
- func (e *entry) expansion() bool {
- return !e.decompose && len(e.elems) > 1
- }
- func (e *entry) contraction() bool {
- return len(e.runes) > 1
- }
- func (e *entry) contractionStarter() bool {
- return e.contractionHandle.n != 0
- }
- // nextIndexed gets the next entry that needs to be stored in the table.
- // It returns the entry and the collation level at which the next entry differs
- // from the current entry.
- // Entries that can be explicitly derived and logical reset positions are
- // examples of entries that will not be indexed.
- func (e *entry) nextIndexed() (*entry, colltab.Level) {
- level := e.level
- for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next {
- if e.level < level {
- level = e.level
- }
- }
- return e, level
- }
- // remove unlinks entry e from the sorted chain and clears the collation
- // elements. e may not be at the front or end of the list. This should always
- // be the case, as the front and end of the list are always logical anchors,
- // which may not be removed.
- func (e *entry) remove() {
- if e.logical != noAnchor {
- log.Fatalf("may not remove anchor %q", e.str)
- }
- // TODO: need to set e.prev.level to e.level if e.level is smaller?
- e.elems = nil
- if !e.skipRemove {
- if e.prev != nil {
- e.prev.next = e.next
- }
- if e.next != nil {
- e.next.prev = e.prev
- }
- }
- e.skipRemove = false
- }
- // insertAfter inserts n after e.
- func (e *entry) insertAfter(n *entry) {
- if e == n {
- panic("e == anchor")
- }
- if e == nil {
- panic("unexpected nil anchor")
- }
- n.remove()
- n.decompose = false // redo decomposition test
- n.next = e.next
- n.prev = e
- if e.next != nil {
- e.next.prev = n
- }
- e.next = n
- }
- // insertBefore inserts n before e.
- func (e *entry) insertBefore(n *entry) {
- if e == n {
- panic("e == anchor")
- }
- if e == nil {
- panic("unexpected nil anchor")
- }
- n.remove()
- n.decompose = false // redo decomposition test
- n.prev = e.prev
- n.next = e
- if e.prev != nil {
- e.prev.next = n
- }
- e.prev = n
- }
- func (e *entry) encodeBase() (ce uint32, err error) {
- switch {
- case e.expansion():
- ce, err = makeExpandIndex(e.expansionIndex)
- default:
- if e.decompose {
- log.Fatal("decompose should be handled elsewhere")
- }
- ce, err = makeCE(e.elems[0])
- }
- return
- }
- func (e *entry) encode() (ce uint32, err error) {
- if e.skip() {
- log.Fatal("cannot build colElem for entry that should be skipped")
- }
- switch {
- case e.decompose:
- t1 := e.elems[0].w[2]
- t2 := 0
- if len(e.elems) > 1 {
- t2 = e.elems[1].w[2]
- }
- ce, err = makeDecompose(t1, t2)
- case e.contractionStarter():
- ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex)
- default:
- if len(e.runes) > 1 {
- log.Fatal("colElem: contractions are handled in contraction trie")
- }
- ce, err = e.encodeBase()
- }
- return
- }
- // entryLess returns true if a sorts before b and false otherwise.
- func entryLess(a, b *entry) bool {
- if res, _ := compareWeights(a.elems, b.elems); res != 0 {
- return res == -1
- }
- if a.logical != noAnchor {
- return a.logical == firstAnchor
- }
- if b.logical != noAnchor {
- return b.logical == lastAnchor
- }
- return a.str < b.str
- }
- type sortedEntries []*entry
- func (s sortedEntries) Len() int {
- return len(s)
- }
- func (s sortedEntries) Swap(i, j int) {
- s[i], s[j] = s[j], s[i]
- }
- func (s sortedEntries) Less(i, j int) bool {
- return entryLess(s[i], s[j])
- }
- type ordering struct {
- id string
- entryMap map[string]*entry
- ordered []*entry
- handle *trieHandle
- }
- // insert inserts e into both entryMap and ordered.
- // Note that insert simply appends e to ordered. To reattain a sorted
- // order, o.sort() should be called.
- func (o *ordering) insert(e *entry) {
- if e.logical == noAnchor {
- o.entryMap[e.str] = e
- } else {
- // Use key format as used in UCA rules.
- o.entryMap[fmt.Sprintf("[%s]", e.str)] = e
- // Also add index entry for XML format.
- o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(e.str, " ", "_", -1))] = e
- }
- o.ordered = append(o.ordered, e)
- }
- // newEntry creates a new entry for the given info and inserts it into
- // the index.
- func (o *ordering) newEntry(s string, ces []rawCE) *entry {
- e := &entry{
- runes: []rune(s),
- elems: ces,
- str: s,
- }
- o.insert(e)
- return e
- }
- // find looks up and returns the entry for the given string.
- // It returns nil if str is not in the index and if an implicit value
- // cannot be derived, that is, if str represents more than one rune.
- func (o *ordering) find(str string) *entry {
- e := o.entryMap[str]
- if e == nil {
- r := []rune(str)
- if len(r) == 1 {
- const (
- firstHangul = 0xAC00
- lastHangul = 0xD7A3
- )
- if r[0] >= firstHangul && r[0] <= lastHangul {
- ce := []rawCE{}
- nfd := norm.NFD.String(str)
- for _, r := range nfd {
- ce = append(ce, o.find(string(r)).elems...)
- }
- e = o.newEntry(nfd, ce)
- } else {
- e = o.newEntry(string(r[0]), []rawCE{
- {w: []int{
- implicitPrimary(r[0]),
- defaultSecondary,
- defaultTertiary,
- int(r[0]),
- },
- },
- })
- e.modified = true
- }
- e.exclude = true // do not index implicits
- }
- }
- return e
- }
- // makeRootOrdering returns a newly initialized ordering value and populates
- // it with a set of logical reset points that can be used as anchors.
- // The anchors first_tertiary_ignorable and __END__ will always sort at
- // the beginning and end, respectively. This means that prev and next are non-nil
- // for any indexed entry.
- func makeRootOrdering() ordering {
- const max = unicode.MaxRune
- o := ordering{
- entryMap: make(map[string]*entry),
- }
- insert := func(typ logicalAnchor, s string, ce []int) {
- e := &entry{
- elems: []rawCE{{w: ce}},
- str: s,
- exclude: true,
- logical: typ,
- }
- o.insert(e)
- }
- insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0})
- insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max})
- insert(lastAnchor, "last primary ignorable", []int{0, defaultSecondary, defaultTertiary, max})
- insert(lastAnchor, "last non ignorable", []int{maxPrimary, defaultSecondary, defaultTertiary, max})
- insert(lastAnchor, "__END__", []int{1 << maxPrimaryBits, defaultSecondary, defaultTertiary, max})
- return o
- }
- // patchForInsert eleminates entries from the list with more than one collation element.
- // The next and prev fields of the eliminated entries still point to appropriate
- // values in the newly created list.
- // It requires that sort has been called.
- func (o *ordering) patchForInsert() {
- for i := 0; i < len(o.ordered)-1; {
- e := o.ordered[i]
- lev := e.level
- n := e.next
- for ; n != nil && len(n.elems) > 1; n = n.next {
- if n.level < lev {
- lev = n.level
- }
- n.skipRemove = true
- }
- for ; o.ordered[i] != n; i++ {
- o.ordered[i].level = lev
- o.ordered[i].next = n
- o.ordered[i+1].prev = e
- }
- }
- }
- // clone copies all ordering of es into a new ordering value.
- func (o *ordering) clone() *ordering {
- o.sort()
- oo := ordering{
- entryMap: make(map[string]*entry),
- }
- for _, e := range o.ordered {
- ne := &entry{
- runes: e.runes,
- elems: e.elems,
- str: e.str,
- decompose: e.decompose,
- exclude: e.exclude,
- logical: e.logical,
- }
- oo.insert(ne)
- }
- oo.sort() // link all ordering.
- oo.patchForInsert()
- return &oo
- }
- // front returns the first entry to be indexed.
- // It assumes that sort() has been called.
- func (o *ordering) front() *entry {
- e := o.ordered[0]
- if e.prev != nil {
- log.Panicf("unexpected first entry: %v", e)
- }
- // The first entry is always a logical position, which should not be indexed.
- e, _ = e.nextIndexed()
- return e
- }
- // sort sorts all ordering based on their collation elements and initializes
- // the prev, next, and level fields accordingly.
- func (o *ordering) sort() {
- sort.Sort(sortedEntries(o.ordered))
- l := o.ordered
- for i := 1; i < len(l); i++ {
- k := i - 1
- l[k].next = l[i]
- _, l[k].level = compareWeights(l[k].elems, l[i].elems)
- l[i].prev = l[k]
- }
- }
- // genColElems generates a collation element array from the runes in str. This
- // assumes that all collation elements have already been added to the Builder.
- func (o *ordering) genColElems(str string) []rawCE {
- elems := []rawCE{}
- for _, r := range []rune(str) {
- for _, ce := range o.find(string(r)).elems {
- if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 {
- elems = append(elems, ce)
- }
- }
- }
- return elems
- }
|