123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206 |
- // Copyright 2015 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- //go:generate stringer -type=Kind
- //go:generate go run gen.go gen_common.go gen_trieval.go
- // Package width provides functionality for handling different widths in text.
- //
- // Wide characters behave like ideographs; they tend to allow line breaks after
- // each character and remain upright in vertical text layout. Narrow characters
- // are kept together in words or runs that are rotated sideways in vertical text
- // layout.
- //
- // For more information, see https://unicode.org/reports/tr11/.
- package width // import "golang.org/x/text/width"
- import (
- "unicode/utf8"
- "golang.org/x/text/transform"
- )
- // TODO
- // 1) Reduce table size by compressing blocks.
- // 2) API proposition for computing display length
- // (approximation, fixed pitch only).
- // 3) Implement display length.
- // Kind indicates the type of width property as defined in https://unicode.org/reports/tr11/.
- type Kind int
- const (
- // Neutral characters do not occur in legacy East Asian character sets.
- Neutral Kind = iota
- // EastAsianAmbiguous characters that can be sometimes wide and sometimes
- // narrow and require additional information not contained in the character
- // code to further resolve their width.
- EastAsianAmbiguous
- // EastAsianWide characters are wide in its usual form. They occur only in
- // the context of East Asian typography. These runes may have explicit
- // halfwidth counterparts.
- EastAsianWide
- // EastAsianNarrow characters are narrow in its usual form. They often have
- // fullwidth counterparts.
- EastAsianNarrow
- // Note: there exist Narrow runes that do not have fullwidth or wide
- // counterparts, despite what the definition says (e.g. U+27E6).
- // EastAsianFullwidth characters have a compatibility decompositions of type
- // wide that map to a narrow counterpart.
- EastAsianFullwidth
- // EastAsianHalfwidth characters have a compatibility decomposition of type
- // narrow that map to a wide or ambiguous counterpart, plus U+20A9 ₩ WON
- // SIGN.
- EastAsianHalfwidth
- // Note: there exist runes that have a halfwidth counterparts but that are
- // classified as Ambiguous, rather than wide (e.g. U+2190).
- )
- // TODO: the generated tries need to return size 1 for invalid runes for the
- // width to be computed correctly (each byte should render width 1)
- var trie = newWidthTrie(0)
- // Lookup reports the Properties of the first rune in b and the number of bytes
- // of its UTF-8 encoding.
- func Lookup(b []byte) (p Properties, size int) {
- v, sz := trie.lookup(b)
- return Properties{elem(v), b[sz-1]}, sz
- }
- // LookupString reports the Properties of the first rune in s and the number of
- // bytes of its UTF-8 encoding.
- func LookupString(s string) (p Properties, size int) {
- v, sz := trie.lookupString(s)
- return Properties{elem(v), s[sz-1]}, sz
- }
- // LookupRune reports the Properties of rune r.
- func LookupRune(r rune) Properties {
- var buf [4]byte
- n := utf8.EncodeRune(buf[:], r)
- v, _ := trie.lookup(buf[:n])
- last := byte(r)
- if r >= utf8.RuneSelf {
- last = 0x80 + byte(r&0x3f)
- }
- return Properties{elem(v), last}
- }
- // Properties provides access to width properties of a rune.
- type Properties struct {
- elem elem
- last byte
- }
- func (e elem) kind() Kind {
- return Kind(e >> typeShift)
- }
- // Kind returns the Kind of a rune as defined in Unicode TR #11.
- // See https://unicode.org/reports/tr11/ for more details.
- func (p Properties) Kind() Kind {
- return p.elem.kind()
- }
- // Folded returns the folded variant of a rune or 0 if the rune is canonical.
- func (p Properties) Folded() rune {
- if p.elem&tagNeedsFold != 0 {
- buf := inverseData[byte(p.elem)]
- buf[buf[0]] ^= p.last
- r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
- return r
- }
- return 0
- }
- // Narrow returns the narrow variant of a rune or 0 if the rune is already
- // narrow or doesn't have a narrow variant.
- func (p Properties) Narrow() rune {
- if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous) {
- buf := inverseData[byte(p.elem)]
- buf[buf[0]] ^= p.last
- r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
- return r
- }
- return 0
- }
- // Wide returns the wide variant of a rune or 0 if the rune is already
- // wide or doesn't have a wide variant.
- func (p Properties) Wide() rune {
- if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianHalfwidth || k == EastAsianNarrow) {
- buf := inverseData[byte(p.elem)]
- buf[buf[0]] ^= p.last
- r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]])
- return r
- }
- return 0
- }
- // TODO for Properties:
- // - Add Fullwidth/Halfwidth or Inverted methods for computing variants
- // mapping.
- // - Add width information (including information on non-spacing runes).
- // Transformer implements the transform.Transformer interface.
- type Transformer struct {
- t transform.SpanningTransformer
- }
- // Reset implements the transform.Transformer interface.
- func (t Transformer) Reset() { t.t.Reset() }
- // Transform implements the transform.Transformer interface.
- func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
- return t.t.Transform(dst, src, atEOF)
- }
- // Span implements the transform.SpanningTransformer interface.
- func (t Transformer) Span(src []byte, atEOF bool) (n int, err error) {
- return t.t.Span(src, atEOF)
- }
- // Bytes returns a new byte slice with the result of applying t to b.
- func (t Transformer) Bytes(b []byte) []byte {
- b, _, _ = transform.Bytes(t, b)
- return b
- }
- // String returns a string with the result of applying t to s.
- func (t Transformer) String(s string) string {
- s, _, _ = transform.String(t, s)
- return s
- }
- var (
- // Fold is a transform that maps all runes to their canonical width.
- //
- // Note that the NFKC and NFKD transforms in golang.org/x/text/unicode/norm
- // provide a more generic folding mechanism.
- Fold Transformer = Transformer{foldTransform{}}
- // Widen is a transform that maps runes to their wide variant, if
- // available.
- Widen Transformer = Transformer{wideTransform{}}
- // Narrow is a transform that maps runes to their narrow variant, if
- // available.
- Narrow Transformer = Transformer{narrowTransform{}}
- )
- // TODO: Consider the following options:
- // - Treat Ambiguous runes that have a halfwidth counterpart as wide, or some
- // generalized variant of this.
- // - Consider a wide Won character to be the default width (or some generalized
- // variant of this).
- // - Filter the set of characters that gets converted (the preferred approach is
- // to allow applying filters to transforms).
|