123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251 |
- // Copyright 2014 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package display
- // This file contains common lookup code that is shared between the various
- // implementations of Namer and Dictionaries.
- import (
- "fmt"
- "sort"
- "strings"
- "golang.org/x/text/language"
- )
- type namer interface {
- // name gets the string for the given index. It should walk the
- // inheritance chain if a value is not present in the base index.
- name(idx int) string
- }
- func nameLanguage(n namer, x interface{}) string {
- t, _ := language.All.Compose(x)
- for {
- i, _, _ := langTagSet.index(t.Raw())
- if s := n.name(i); s != "" {
- return s
- }
- if t = t.Parent(); t == language.Und {
- return ""
- }
- }
- }
- func nameScript(n namer, x interface{}) string {
- t, _ := language.DeprecatedScript.Compose(x)
- _, s, _ := t.Raw()
- return n.name(scriptIndex.index(s.String()))
- }
- func nameRegion(n namer, x interface{}) string {
- t, _ := language.DeprecatedRegion.Compose(x)
- _, _, r := t.Raw()
- return n.name(regionIndex.index(r.String()))
- }
- func nameTag(langN, scrN, regN namer, x interface{}) string {
- t, ok := x.(language.Tag)
- if !ok {
- return ""
- }
- const form = language.All &^ language.SuppressScript
- if c, err := form.Canonicalize(t); err == nil {
- t = c
- }
- _, sRaw, rRaw := t.Raw()
- i, scr, reg := langTagSet.index(t.Raw())
- for i != -1 {
- if str := langN.name(i); str != "" {
- if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR {
- ss, sr := "", ""
- if hasS {
- ss = scrN.name(scriptIndex.index(scr.String()))
- }
- if hasR {
- sr = regN.name(regionIndex.index(reg.String()))
- }
- // TODO: use patterns in CLDR or at least confirm they are the
- // same for all languages.
- if ss != "" && sr != "" {
- return fmt.Sprintf("%s (%s, %s)", str, ss, sr)
- }
- if ss != "" || sr != "" {
- return fmt.Sprintf("%s (%s%s)", str, ss, sr)
- }
- }
- return str
- }
- scr, reg = sRaw, rRaw
- if t = t.Parent(); t == language.Und {
- return ""
- }
- i, _, _ = langTagSet.index(t.Raw())
- }
- return ""
- }
- // header contains the data and indexes for a single namer.
- // data contains a series of strings concatenated into one. index contains the
- // offsets for a string in data. For example, consider a header that defines
- // strings for the languages de, el, en, fi, and nl:
- //
- // header{
- // data: "GermanGreekEnglishDutch",
- // index: []uint16{ 0, 6, 11, 18, 18, 23 },
- // }
- //
- // For a language with index i, the string is defined by
- // data[index[i]:index[i+1]]. So the number of elements in index is always one
- // greater than the number of languages for which header defines a value.
- // A string for a language may be empty, which means the name is undefined. In
- // the above example, the name for fi (Finnish) is undefined.
- type header struct {
- data string
- index []uint16
- }
- // name looks up the name for a tag in the dictionary, given its index.
- func (h *header) name(i int) string {
- if 0 <= i && i < len(h.index)-1 {
- return h.data[h.index[i]:h.index[i+1]]
- }
- return ""
- }
- // tagSet is used to find the index of a language in a set of tags.
- type tagSet struct {
- single tagIndex
- long []string
- }
- var (
- langTagSet = tagSet{
- single: langIndex,
- long: langTagsLong,
- }
- // selfTagSet is used for indexing the language strings in their own
- // language.
- selfTagSet = tagSet{
- single: selfIndex,
- long: selfTagsLong,
- }
- zzzz = language.MustParseScript("Zzzz")
- zz = language.MustParseRegion("ZZ")
- )
- // index returns the index of the tag for the given base, script and region or
- // its parent if the tag is not available. If the match is for a parent entry,
- // the excess script and region are returned.
- func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) {
- lang := base.String()
- index := -1
- if (scr != language.Script{} || reg != language.Region{}) {
- if scr == zzzz {
- scr = language.Script{}
- }
- if reg == zz {
- reg = language.Region{}
- }
- i := sort.SearchStrings(ts.long, lang)
- // All entries have either a script or a region and not both.
- scrStr, regStr := scr.String(), reg.String()
- for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ {
- if s := ts.long[i][len(lang)+1:]; s == scrStr {
- scr = language.Script{}
- index = i + ts.single.len()
- break
- } else if s == regStr {
- reg = language.Region{}
- index = i + ts.single.len()
- break
- }
- }
- }
- if index == -1 {
- index = ts.single.index(lang)
- }
- return index, scr, reg
- }
- func (ts *tagSet) Tags() []language.Tag {
- tags := make([]language.Tag, 0, ts.single.len()+len(ts.long))
- ts.single.keys(func(s string) {
- tags = append(tags, language.Raw.MustParse(s))
- })
- for _, s := range ts.long {
- tags = append(tags, language.Raw.MustParse(s))
- }
- return tags
- }
- func supportedScripts() []language.Script {
- scr := make([]language.Script, 0, scriptIndex.len())
- scriptIndex.keys(func(s string) {
- scr = append(scr, language.MustParseScript(s))
- })
- return scr
- }
- func supportedRegions() []language.Region {
- reg := make([]language.Region, 0, regionIndex.len())
- regionIndex.keys(func(s string) {
- reg = append(reg, language.MustParseRegion(s))
- })
- return reg
- }
- // tagIndex holds a concatenated lists of subtags of length 2 to 4, one string
- // for each length, which can be used in combination with binary search to get
- // the index associated with a tag.
- // For example, a tagIndex{
- // "arenesfrruzh", // 6 2-byte tags.
- // "barwae", // 2 3-byte tags.
- // "",
- // }
- // would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag
- // "wae" had an index of 7.
- type tagIndex [3]string
- func (t *tagIndex) index(s string) int {
- sz := len(s)
- if sz < 2 || 4 < sz {
- return -1
- }
- a := t[sz-2]
- index := sort.Search(len(a)/sz, func(i int) bool {
- p := i * sz
- return a[p:p+sz] >= s
- })
- p := index * sz
- if end := p + sz; end > len(a) || a[p:end] != s {
- return -1
- }
- // Add the number of tags for smaller sizes.
- for i := 0; i < sz-2; i++ {
- index += len(t[i]) / (i + 2)
- }
- return index
- }
- // len returns the number of tags that are contained in the tagIndex.
- func (t *tagIndex) len() (n int) {
- for i, s := range t {
- n += len(s) / (i + 2)
- }
- return n
- }
- // keys calls f for each tag.
- func (t *tagIndex) keys(f func(key string)) {
- for i, s := range *t {
- for ; s != ""; s = s[i+2:] {
- f(s[:i+2])
- }
- }
- }
|