123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228 |
- // Copyright 2013 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package language
- import (
- "errors"
- "strconv"
- "strings"
- "golang.org/x/text/internal/language"
- )
- // ValueError is returned by any of the parsing functions when the
- // input is well-formed but the respective subtag is not recognized
- // as a valid value.
- type ValueError interface {
- error
- // Subtag returns the subtag for which the error occurred.
- Subtag() string
- }
- // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
- // failed it returns an error and any part of the tag that could be parsed.
- // If parsing succeeded but an unknown value was found, it returns
- // ValueError. The Tag returned in this case is just stripped of the unknown
- // value. All other values are preserved. It accepts tags in the BCP 47 format
- // and extensions to this standard defined in
- // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
- // The resulting tag is canonicalized using the default canonicalization type.
- func Parse(s string) (t Tag, err error) {
- return Default.Parse(s)
- }
- // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
- // failed it returns an error and any part of the tag that could be parsed.
- // If parsing succeeded but an unknown value was found, it returns
- // ValueError. The Tag returned in this case is just stripped of the unknown
- // value. All other values are preserved. It accepts tags in the BCP 47 format
- // and extensions to this standard defined in
- // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
- // The resulting tag is canonicalized using the canonicalization type c.
- func (c CanonType) Parse(s string) (t Tag, err error) {
- tt, err := language.Parse(s)
- if err != nil {
- return makeTag(tt), err
- }
- tt, changed := canonicalize(c, tt)
- if changed {
- tt.RemakeString()
- }
- return makeTag(tt), err
- }
- // Compose creates a Tag from individual parts, which may be of type Tag, Base,
- // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
- // Base, Script or Region or slice of type Variant or Extension is passed more
- // than once, the latter will overwrite the former. Variants and Extensions are
- // accumulated, but if two extensions of the same type are passed, the latter
- // will replace the former. For -u extensions, though, the key-type pairs are
- // added, where later values overwrite older ones. A Tag overwrites all former
- // values and typically only makes sense as the first argument. The resulting
- // tag is returned after canonicalizing using the Default CanonType. If one or
- // more errors are encountered, one of the errors is returned.
- func Compose(part ...interface{}) (t Tag, err error) {
- return Default.Compose(part...)
- }
- // Compose creates a Tag from individual parts, which may be of type Tag, Base,
- // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
- // Base, Script or Region or slice of type Variant or Extension is passed more
- // than once, the latter will overwrite the former. Variants and Extensions are
- // accumulated, but if two extensions of the same type are passed, the latter
- // will replace the former. For -u extensions, though, the key-type pairs are
- // added, where later values overwrite older ones. A Tag overwrites all former
- // values and typically only makes sense as the first argument. The resulting
- // tag is returned after canonicalizing using CanonType c. If one or more errors
- // are encountered, one of the errors is returned.
- func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
- var b language.Builder
- if err = update(&b, part...); err != nil {
- return und, err
- }
- b.Tag, _ = canonicalize(c, b.Tag)
- return makeTag(b.Make()), err
- }
- var errInvalidArgument = errors.New("invalid Extension or Variant")
- func update(b *language.Builder, part ...interface{}) (err error) {
- for _, x := range part {
- switch v := x.(type) {
- case Tag:
- b.SetTag(v.tag())
- case Base:
- b.Tag.LangID = v.langID
- case Script:
- b.Tag.ScriptID = v.scriptID
- case Region:
- b.Tag.RegionID = v.regionID
- case Variant:
- if v.variant == "" {
- err = errInvalidArgument
- break
- }
- b.AddVariant(v.variant)
- case Extension:
- if v.s == "" {
- err = errInvalidArgument
- break
- }
- b.SetExt(v.s)
- case []Variant:
- b.ClearVariants()
- for _, v := range v {
- b.AddVariant(v.variant)
- }
- case []Extension:
- b.ClearExtensions()
- for _, e := range v {
- b.SetExt(e.s)
- }
- // TODO: support parsing of raw strings based on morphology or just extensions?
- case error:
- if v != nil {
- err = v
- }
- }
- }
- return
- }
- var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
- // ParseAcceptLanguage parses the contents of an Accept-Language header as
- // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
- // a list of corresponding quality weights. It is more permissive than RFC 2616
- // and may return non-nil slices even if the input is not valid.
- // The Tags will be sorted by highest weight first and then by first occurrence.
- // Tags with a weight of zero will be dropped. An error will be returned if the
- // input could not be parsed.
- func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
- var entry string
- for s != "" {
- if entry, s = split(s, ','); entry == "" {
- continue
- }
- entry, weight := split(entry, ';')
- // Scan the language.
- t, err := Parse(entry)
- if err != nil {
- id, ok := acceptFallback[entry]
- if !ok {
- return nil, nil, err
- }
- t = makeTag(language.Tag{LangID: id})
- }
- // Scan the optional weight.
- w := 1.0
- if weight != "" {
- weight = consume(weight, 'q')
- weight = consume(weight, '=')
- // consume returns the empty string when a token could not be
- // consumed, resulting in an error for ParseFloat.
- if w, err = strconv.ParseFloat(weight, 32); err != nil {
- return nil, nil, errInvalidWeight
- }
- // Drop tags with a quality weight of 0.
- if w <= 0 {
- continue
- }
- }
- tag = append(tag, t)
- q = append(q, float32(w))
- }
- sortStable(&tagSort{tag, q})
- return tag, q, nil
- }
- // consume removes a leading token c from s and returns the result or the empty
- // string if there is no such token.
- func consume(s string, c byte) string {
- if s == "" || s[0] != c {
- return ""
- }
- return strings.TrimSpace(s[1:])
- }
- func split(s string, c byte) (head, tail string) {
- if i := strings.IndexByte(s, c); i >= 0 {
- return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
- }
- return strings.TrimSpace(s), ""
- }
- // Add hack mapping to deal with a small number of cases that occur
- // in Accept-Language (with reasonable frequency).
- var acceptFallback = map[string]language.Language{
- "english": _en,
- "deutsch": _de,
- "italian": _it,
- "french": _fr,
- "*": _mul, // defined in the spec to match all languages.
- }
- type tagSort struct {
- tag []Tag
- q []float32
- }
- func (s *tagSort) Len() int {
- return len(s.q)
- }
- func (s *tagSort) Less(i, j int) bool {
- return s.q[i] > s.q[j]
- }
- func (s *tagSort) Swap(i, j int) {
- s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
- s.q[i], s.q[j] = s.q[j], s.q[i]
- }
|