123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- // Copyright 2012 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // The trie in this file is used to associate the first full character in an
- // UTF-8 string to a collation element. All but the last byte in a UTF-8 byte
- // sequence are used to lookup offsets in the index table to be used for the
- // next byte. The last byte is used to index into a table of collation elements.
- // For a full description, see go.text/collate/build/trie.go.
- package colltab
- const blockSize = 64
- type Trie struct {
- Index0 []uint16 // index for first byte (0xC0-0xFF)
- Values0 []uint32 // index for first byte (0x00-0x7F)
- Index []uint16
- Values []uint32
- }
- const (
- t1 = 0x00 // 0000 0000
- tx = 0x80 // 1000 0000
- t2 = 0xC0 // 1100 0000
- t3 = 0xE0 // 1110 0000
- t4 = 0xF0 // 1111 0000
- t5 = 0xF8 // 1111 1000
- t6 = 0xFC // 1111 1100
- te = 0xFE // 1111 1110
- )
- func (t *Trie) lookupValue(n uint16, b byte) Elem {
- return Elem(t.Values[int(n)<<6+int(b)])
- }
- // lookup returns the trie value for the first UTF-8 encoding in s and
- // the width in bytes of this encoding. The size will be 0 if s does not
- // hold enough bytes to complete the encoding. len(s) must be greater than 0.
- func (t *Trie) lookup(s []byte) (v Elem, sz int) {
- c0 := s[0]
- switch {
- case c0 < tx:
- return Elem(t.Values0[c0]), 1
- case c0 < t2:
- return 0, 1
- case c0 < t3:
- if len(s) < 2 {
- return 0, 0
- }
- i := t.Index0[c0]
- c1 := s[1]
- if c1 < tx || t2 <= c1 {
- return 0, 1
- }
- return t.lookupValue(i, c1), 2
- case c0 < t4:
- if len(s) < 3 {
- return 0, 0
- }
- i := t.Index0[c0]
- c1 := s[1]
- if c1 < tx || t2 <= c1 {
- return 0, 1
- }
- o := int(i)<<6 + int(c1)
- i = t.Index[o]
- c2 := s[2]
- if c2 < tx || t2 <= c2 {
- return 0, 2
- }
- return t.lookupValue(i, c2), 3
- case c0 < t5:
- if len(s) < 4 {
- return 0, 0
- }
- i := t.Index0[c0]
- c1 := s[1]
- if c1 < tx || t2 <= c1 {
- return 0, 1
- }
- o := int(i)<<6 + int(c1)
- i = t.Index[o]
- c2 := s[2]
- if c2 < tx || t2 <= c2 {
- return 0, 2
- }
- o = int(i)<<6 + int(c2)
- i = t.Index[o]
- c3 := s[3]
- if c3 < tx || t2 <= c3 {
- return 0, 3
- }
- return t.lookupValue(i, c3), 4
- }
- // Illegal rune
- return 0, 1
- }
- // The body of lookupString is a verbatim copy of that of lookup.
- func (t *Trie) lookupString(s string) (v Elem, sz int) {
- c0 := s[0]
- switch {
- case c0 < tx:
- return Elem(t.Values0[c0]), 1
- case c0 < t2:
- return 0, 1
- case c0 < t3:
- if len(s) < 2 {
- return 0, 0
- }
- i := t.Index0[c0]
- c1 := s[1]
- if c1 < tx || t2 <= c1 {
- return 0, 1
- }
- return t.lookupValue(i, c1), 2
- case c0 < t4:
- if len(s) < 3 {
- return 0, 0
- }
- i := t.Index0[c0]
- c1 := s[1]
- if c1 < tx || t2 <= c1 {
- return 0, 1
- }
- o := int(i)<<6 + int(c1)
- i = t.Index[o]
- c2 := s[2]
- if c2 < tx || t2 <= c2 {
- return 0, 2
- }
- return t.lookupValue(i, c2), 3
- case c0 < t5:
- if len(s) < 4 {
- return 0, 0
- }
- i := t.Index0[c0]
- c1 := s[1]
- if c1 < tx || t2 <= c1 {
- return 0, 1
- }
- o := int(i)<<6 + int(c1)
- i = t.Index[o]
- c2 := s[2]
- if c2 < tx || t2 <= c2 {
- return 0, 2
- }
- o = int(i)<<6 + int(c2)
- i = t.Index[o]
- c3 := s[3]
- if c3 < tx || t2 <= c3 {
- return 0, 3
- }
- return t.lookupValue(i, c3), 4
- }
- // Illegal rune
- return 0, 1
- }
|