123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282 |
- // Copyright 2015 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package runes
- import (
- "strings"
- "testing"
- "unicode"
- "golang.org/x/text/cases"
- "golang.org/x/text/language"
- "golang.org/x/text/transform"
- )
- var (
- toUpper = cases.Upper(language.Und)
- toLower = cases.Lower(language.Und)
- )
- type spanformer interface {
- transform.SpanningTransformer
- }
- func TestPredicate(t *testing.T) {
- testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
- return If(Predicate(func(r rune) bool {
- return unicode.Is(rt, r)
- }), t, f)
- })
- }
- func TestIn(t *testing.T) {
- testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
- return If(In(rt), t, f)
- })
- }
- func TestNotIn(t *testing.T) {
- testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
- return If(NotIn(rt), f, t)
- })
- }
- func testConditional(t *testing.T, f func(rt *unicode.RangeTable, t, f spanformer) spanformer) {
- lower := f(unicode.Latin, toLower, toLower)
- for i, tt := range []transformTest{{
- desc: "empty",
- szDst: large,
- atEOF: true,
- in: "",
- out: "",
- outFull: "",
- t: lower,
- }, {
- desc: "small",
- szDst: 1,
- atEOF: true,
- in: "B",
- out: "b",
- outFull: "b",
- errSpan: transform.ErrEndOfSpan,
- t: lower,
- }, {
- desc: "short dst",
- szDst: 2,
- atEOF: true,
- in: "AAA",
- out: "aa",
- outFull: "aaa",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: lower,
- }, {
- desc: "short dst writing error",
- szDst: 1,
- atEOF: false,
- in: "A\x80",
- out: "a",
- outFull: "a\x80",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: lower,
- }, {
- desc: "short dst writing incomplete rune",
- szDst: 2,
- atEOF: true,
- in: "Σ\xc2",
- out: "Σ",
- outFull: "Σ\xc2",
- err: transform.ErrShortDst,
- t: f(unicode.Latin, toLower, nil),
- }, {
- desc: "short dst, longer",
- szDst: 5,
- atEOF: true,
- in: "Hellø",
- out: "Hell",
- outFull: "Hellø",
- err: transform.ErrShortDst,
- // idem is used to test short buffers by forcing processing of full-rune increments.
- t: f(unicode.Latin, Map(idem), nil),
- }, {
- desc: "short dst, longer, writing error",
- szDst: 6,
- atEOF: false,
- in: "\x80Hello\x80",
- out: "\x80Hello",
- outFull: "\x80Hello\x80",
- err: transform.ErrShortDst,
- t: f(unicode.Latin, Map(idem), nil),
- }, {
- desc: "short src",
- szDst: 2,
- atEOF: false,
- in: "A\xc2",
- out: "a",
- outFull: "a\xc2",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrEndOfSpan,
- t: lower,
- }, {
- desc: "short src no change",
- szDst: 2,
- atEOF: false,
- in: "a\xc2",
- out: "a",
- outFull: "a\xc2",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrShortSrc,
- nSpan: 1,
- t: lower,
- }, {
- desc: "invalid input, atEOF",
- szDst: large,
- atEOF: true,
- in: "\x80",
- out: "\x80",
- outFull: "\x80",
- t: lower,
- }, {
- desc: "invalid input, !atEOF",
- szDst: large,
- atEOF: false,
- in: "\x80",
- out: "\x80",
- outFull: "\x80",
- t: lower,
- }, {
- desc: "invalid input, incomplete rune atEOF",
- szDst: large,
- atEOF: true,
- in: "\xc2",
- out: "\xc2",
- outFull: "\xc2",
- t: lower,
- }, {
- desc: "nop",
- szDst: large,
- atEOF: true,
- in: "Hello World!",
- out: "Hello World!",
- outFull: "Hello World!",
- t: f(unicode.Latin, nil, nil),
- }, {
- desc: "nop in",
- szDst: large,
- atEOF: true,
- in: "THIS IS α ΤΕΣΤ",
- out: "this is α ΤΕΣΤ",
- outFull: "this is α ΤΕΣΤ",
- errSpan: transform.ErrEndOfSpan,
- t: f(unicode.Greek, nil, toLower),
- }, {
- desc: "nop in latin",
- szDst: large,
- atEOF: true,
- in: "THIS IS α ΤΕΣΤ",
- out: "THIS IS α τεστ",
- outFull: "THIS IS α τεστ",
- errSpan: transform.ErrEndOfSpan,
- t: f(unicode.Latin, nil, toLower),
- }, {
- desc: "nop not in",
- szDst: large,
- atEOF: true,
- in: "THIS IS α ΤΕΣΤ",
- out: "this is α ΤΕΣΤ",
- outFull: "this is α ΤΕΣΤ",
- errSpan: transform.ErrEndOfSpan,
- t: f(unicode.Latin, toLower, nil),
- }, {
- desc: "pass atEOF is true when at end",
- szDst: large,
- atEOF: true,
- in: "hello",
- out: "HELLO",
- outFull: "HELLO",
- errSpan: transform.ErrEndOfSpan,
- t: f(unicode.Latin, upperAtEOF{}, nil),
- }, {
- desc: "pass atEOF is true when at end of segment",
- szDst: large,
- atEOF: true,
- in: "hello ",
- out: "HELLO ",
- outFull: "HELLO ",
- errSpan: transform.ErrEndOfSpan,
- t: f(unicode.Latin, upperAtEOF{}, nil),
- }, {
- desc: "don't pass atEOF is true when atEOF is false",
- szDst: large,
- atEOF: false,
- in: "hello",
- out: "",
- outFull: "HELLO",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrShortSrc,
- t: f(unicode.Latin, upperAtEOF{}, nil),
- }, {
- desc: "pass atEOF is true when at end, no change",
- szDst: large,
- atEOF: true,
- in: "HELLO",
- out: "HELLO",
- outFull: "HELLO",
- t: f(unicode.Latin, upperAtEOF{}, nil),
- }, {
- desc: "pass atEOF is true when at end of segment, no change",
- szDst: large,
- atEOF: true,
- in: "HELLO ",
- out: "HELLO ",
- outFull: "HELLO ",
- t: f(unicode.Latin, upperAtEOF{}, nil),
- }, {
- desc: "large input ASCII",
- szDst: 12000,
- atEOF: false,
- in: strings.Repeat("HELLO", 2000),
- out: strings.Repeat("hello", 2000),
- outFull: strings.Repeat("hello", 2000),
- errSpan: transform.ErrEndOfSpan,
- err: nil,
- t: lower,
- }, {
- desc: "large input non-ASCII",
- szDst: 12000,
- atEOF: false,
- in: strings.Repeat("\u3333", 2000),
- out: strings.Repeat("\u3333", 2000),
- outFull: strings.Repeat("\u3333", 2000),
- err: nil,
- t: lower,
- }} {
- tt.check(t, i)
- }
- }
- // upperAtEOF is a strange Transformer that converts text to uppercase, but only
- // if atEOF is true.
- type upperAtEOF struct{ transform.NopResetter }
- func (upperAtEOF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
- if !atEOF {
- return 0, 0, transform.ErrShortSrc
- }
- return toUpper.Transform(dst, src, atEOF)
- }
- func (upperAtEOF) Span(src []byte, atEOF bool) (n int, err error) {
- if !atEOF {
- return 0, transform.ErrShortSrc
- }
- return toUpper.Span(src, atEOF)
- }
- func BenchmarkConditional(b *testing.B) {
- doBench(b, If(In(unicode.Hangul), transform.Nop, transform.Nop))
- }
|