123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376 |
- // Copyright 2014 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package cases
- import "golang.org/x/text/transform"
- // A context is used for iterating over source bytes, fetching case info and
- // writing to a destination buffer.
- //
- // Casing operations may need more than one rune of context to decide how a rune
- // should be cased. Casing implementations should call checkpoint on context
- // whenever it is known to be safe to return the runes processed so far.
- //
- // It is recommended for implementations to not allow for more than 30 case
- // ignorables as lookahead (analogous to the limit in norm) and to use state if
- // unbounded lookahead is needed for cased runes.
- type context struct {
- dst, src []byte
- atEOF bool
- pDst int // pDst points past the last written rune in dst.
- pSrc int // pSrc points to the start of the currently scanned rune.
- // checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
- nDst, nSrc int
- err error
- sz int // size of current rune
- info info // case information of currently scanned rune
- // State preserved across calls to Transform.
- isMidWord bool // false if next cased letter needs to be title-cased.
- }
- func (c *context) Reset() {
- c.isMidWord = false
- }
- // ret returns the return values for the Transform method. It checks whether
- // there were insufficient bytes in src to complete and introduces an error
- // accordingly, if necessary.
- func (c *context) ret() (nDst, nSrc int, err error) {
- if c.err != nil || c.nSrc == len(c.src) {
- return c.nDst, c.nSrc, c.err
- }
- // This point is only reached by mappers if there was no short destination
- // buffer. This means that the source buffer was exhausted and that c.sz was
- // set to 0 by next.
- if c.atEOF && c.pSrc == len(c.src) {
- return c.pDst, c.pSrc, nil
- }
- return c.nDst, c.nSrc, transform.ErrShortSrc
- }
- // retSpan returns the return values for the Span method. It checks whether
- // there were insufficient bytes in src to complete and introduces an error
- // accordingly, if necessary.
- func (c *context) retSpan() (n int, err error) {
- _, nSrc, err := c.ret()
- return nSrc, err
- }
- // checkpoint sets the return value buffer points for Transform to the current
- // positions.
- func (c *context) checkpoint() {
- if c.err == nil {
- c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
- }
- }
- // unreadRune causes the last rune read by next to be reread on the next
- // invocation of next. Only one unreadRune may be called after a call to next.
- func (c *context) unreadRune() {
- c.sz = 0
- }
- func (c *context) next() bool {
- c.pSrc += c.sz
- if c.pSrc == len(c.src) || c.err != nil {
- c.info, c.sz = 0, 0
- return false
- }
- v, sz := trie.lookup(c.src[c.pSrc:])
- c.info, c.sz = info(v), sz
- if c.sz == 0 {
- if c.atEOF {
- // A zero size means we have an incomplete rune. If we are atEOF,
- // this means it is an illegal rune, which we will consume one
- // byte at a time.
- c.sz = 1
- } else {
- c.err = transform.ErrShortSrc
- return false
- }
- }
- return true
- }
- // writeBytes adds bytes to dst.
- func (c *context) writeBytes(b []byte) bool {
- if len(c.dst)-c.pDst < len(b) {
- c.err = transform.ErrShortDst
- return false
- }
- // This loop is faster than using copy.
- for _, ch := range b {
- c.dst[c.pDst] = ch
- c.pDst++
- }
- return true
- }
- // writeString writes the given string to dst.
- func (c *context) writeString(s string) bool {
- if len(c.dst)-c.pDst < len(s) {
- c.err = transform.ErrShortDst
- return false
- }
- // This loop is faster than using copy.
- for i := 0; i < len(s); i++ {
- c.dst[c.pDst] = s[i]
- c.pDst++
- }
- return true
- }
- // copy writes the current rune to dst.
- func (c *context) copy() bool {
- return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
- }
- // copyXOR copies the current rune to dst and modifies it by applying the XOR
- // pattern of the case info. It is the responsibility of the caller to ensure
- // that this is a rune with a XOR pattern defined.
- func (c *context) copyXOR() bool {
- if !c.copy() {
- return false
- }
- if c.info&xorIndexBit == 0 {
- // Fast path for 6-bit XOR pattern, which covers most cases.
- c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
- } else {
- // Interpret XOR bits as an index.
- // TODO: test performance for unrolling this loop. Verify that we have
- // at least two bytes and at most three.
- idx := c.info >> xorShift
- for p := c.pDst - 1; ; p-- {
- c.dst[p] ^= xorData[idx]
- idx--
- if xorData[idx] == 0 {
- break
- }
- }
- }
- return true
- }
- // hasPrefix returns true if src[pSrc:] starts with the given string.
- func (c *context) hasPrefix(s string) bool {
- b := c.src[c.pSrc:]
- if len(b) < len(s) {
- return false
- }
- for i, c := range b[:len(s)] {
- if c != s[i] {
- return false
- }
- }
- return true
- }
- // caseType returns an info with only the case bits, normalized to either
- // cLower, cUpper, cTitle or cUncased.
- func (c *context) caseType() info {
- cm := c.info & 0x7
- if cm < 4 {
- return cm
- }
- if cm >= cXORCase {
- // xor the last bit of the rune with the case type bits.
- b := c.src[c.pSrc+c.sz-1]
- return info(b&1) ^ cm&0x3
- }
- if cm == cIgnorableCased {
- return cLower
- }
- return cUncased
- }
- // lower writes the lowercase version of the current rune to dst.
- func lower(c *context) bool {
- ct := c.caseType()
- if c.info&hasMappingMask == 0 || ct == cLower {
- return c.copy()
- }
- if c.info&exceptionBit == 0 {
- return c.copyXOR()
- }
- e := exceptions[c.info>>exceptionShift:]
- offset := 2 + e[0]&lengthMask // size of header + fold string
- if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
- return c.writeString(e[offset : offset+nLower])
- }
- return c.copy()
- }
- func isLower(c *context) bool {
- ct := c.caseType()
- if c.info&hasMappingMask == 0 || ct == cLower {
- return true
- }
- if c.info&exceptionBit == 0 {
- c.err = transform.ErrEndOfSpan
- return false
- }
- e := exceptions[c.info>>exceptionShift:]
- if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
- c.err = transform.ErrEndOfSpan
- return false
- }
- return true
- }
- // upper writes the uppercase version of the current rune to dst.
- func upper(c *context) bool {
- ct := c.caseType()
- if c.info&hasMappingMask == 0 || ct == cUpper {
- return c.copy()
- }
- if c.info&exceptionBit == 0 {
- return c.copyXOR()
- }
- e := exceptions[c.info>>exceptionShift:]
- offset := 2 + e[0]&lengthMask // size of header + fold string
- // Get length of first special case mapping.
- n := (e[1] >> lengthBits) & lengthMask
- if ct == cTitle {
- // The first special case mapping is for lower. Set n to the second.
- if n == noChange {
- n = 0
- }
- n, e = e[1]&lengthMask, e[n:]
- }
- if n != noChange {
- return c.writeString(e[offset : offset+n])
- }
- return c.copy()
- }
- // isUpper writes the isUppercase version of the current rune to dst.
- func isUpper(c *context) bool {
- ct := c.caseType()
- if c.info&hasMappingMask == 0 || ct == cUpper {
- return true
- }
- if c.info&exceptionBit == 0 {
- c.err = transform.ErrEndOfSpan
- return false
- }
- e := exceptions[c.info>>exceptionShift:]
- // Get length of first special case mapping.
- n := (e[1] >> lengthBits) & lengthMask
- if ct == cTitle {
- n = e[1] & lengthMask
- }
- if n != noChange {
- c.err = transform.ErrEndOfSpan
- return false
- }
- return true
- }
- // title writes the title case version of the current rune to dst.
- func title(c *context) bool {
- ct := c.caseType()
- if c.info&hasMappingMask == 0 || ct == cTitle {
- return c.copy()
- }
- if c.info&exceptionBit == 0 {
- if ct == cLower {
- return c.copyXOR()
- }
- return c.copy()
- }
- // Get the exception data.
- e := exceptions[c.info>>exceptionShift:]
- offset := 2 + e[0]&lengthMask // size of header + fold string
- nFirst := (e[1] >> lengthBits) & lengthMask
- if nTitle := e[1] & lengthMask; nTitle != noChange {
- if nFirst != noChange {
- e = e[nFirst:]
- }
- return c.writeString(e[offset : offset+nTitle])
- }
- if ct == cLower && nFirst != noChange {
- // Use the uppercase version instead.
- return c.writeString(e[offset : offset+nFirst])
- }
- // Already in correct case.
- return c.copy()
- }
- // isTitle reports whether the current rune is in title case.
- func isTitle(c *context) bool {
- ct := c.caseType()
- if c.info&hasMappingMask == 0 || ct == cTitle {
- return true
- }
- if c.info&exceptionBit == 0 {
- if ct == cLower {
- c.err = transform.ErrEndOfSpan
- return false
- }
- return true
- }
- // Get the exception data.
- e := exceptions[c.info>>exceptionShift:]
- if nTitle := e[1] & lengthMask; nTitle != noChange {
- c.err = transform.ErrEndOfSpan
- return false
- }
- nFirst := (e[1] >> lengthBits) & lengthMask
- if ct == cLower && nFirst != noChange {
- c.err = transform.ErrEndOfSpan
- return false
- }
- return true
- }
- // foldFull writes the foldFull version of the current rune to dst.
- func foldFull(c *context) bool {
- if c.info&hasMappingMask == 0 {
- return c.copy()
- }
- ct := c.caseType()
- if c.info&exceptionBit == 0 {
- if ct != cLower || c.info&inverseFoldBit != 0 {
- return c.copyXOR()
- }
- return c.copy()
- }
- e := exceptions[c.info>>exceptionShift:]
- n := e[0] & lengthMask
- if n == 0 {
- if ct == cLower {
- return c.copy()
- }
- n = (e[1] >> lengthBits) & lengthMask
- }
- return c.writeString(e[2 : 2+n])
- }
- // isFoldFull reports whether the current run is mapped to foldFull
- func isFoldFull(c *context) bool {
- if c.info&hasMappingMask == 0 {
- return true
- }
- ct := c.caseType()
- if c.info&exceptionBit == 0 {
- if ct != cLower || c.info&inverseFoldBit != 0 {
- c.err = transform.ErrEndOfSpan
- return false
- }
- return true
- }
- e := exceptions[c.info>>exceptionShift:]
- n := e[0] & lengthMask
- if n == 0 && ct == cLower {
- return true
- }
- c.err = transform.ErrEndOfSpan
- return false
- }
|