123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664 |
- // Copyright 2015 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package runes
- import (
- "strings"
- "testing"
- "unicode/utf8"
- "golang.org/x/text/internal/testtext"
- "golang.org/x/text/transform"
- )
- type transformTest struct {
- desc string
- szDst int
- atEOF bool
- repl string
- in string
- out string // result string of first call to Transform
- outFull string // transform of entire input string
- err error
- errSpan error
- nSpan int
- t transform.SpanningTransformer
- }
- const large = 10240
- func (tt *transformTest) check(t *testing.T, i int) {
- if tt.t == nil {
- return
- }
- dst := make([]byte, tt.szDst)
- src := []byte(tt.in)
- nDst, nSrc, err := tt.t.Transform(dst, src, tt.atEOF)
- if err != tt.err {
- t.Errorf("%d:%s:error: got %v; want %v", i, tt.desc, err, tt.err)
- }
- if got := string(dst[:nDst]); got != tt.out {
- t.Errorf("%d:%s:out: got %q; want %q", i, tt.desc, got, tt.out)
- }
- // Calls tt.t.Transform for the remainder of the input. We use this to test
- // the nSrc return value.
- out := make([]byte, large)
- n := copy(out, dst[:nDst])
- nDst, _, _ = tt.t.Transform(out[n:], src[nSrc:], true)
- if got, want := string(out[:n+nDst]), tt.outFull; got != want {
- t.Errorf("%d:%s:outFull: got %q; want %q", i, tt.desc, got, want)
- }
- tt.t.Reset()
- p := 0
- for ; p < len(tt.in) && p < len(tt.outFull) && tt.in[p] == tt.outFull[p]; p++ {
- }
- if tt.nSpan != 0 {
- p = tt.nSpan
- }
- if n, err = tt.t.Span([]byte(tt.in), tt.atEOF); n != p || err != tt.errSpan {
- t.Errorf("%d:%s:span: got %d, %v; want %d, %v", i, tt.desc, n, err, p, tt.errSpan)
- }
- }
- func idem(r rune) rune { return r }
- func TestMap(t *testing.T) {
- runes := []rune{'a', 'ç', '中', '\U00012345', 'a'}
- // Default mapper used for this test.
- rotate := Map(func(r rune) rune {
- for i, m := range runes {
- if m == r {
- return runes[i+1]
- }
- }
- return r
- })
- for i, tt := range []transformTest{{
- desc: "empty",
- szDst: large,
- atEOF: true,
- in: "",
- out: "",
- outFull: "",
- t: rotate,
- }, {
- desc: "no change",
- szDst: 1,
- atEOF: true,
- in: "b",
- out: "b",
- outFull: "b",
- t: rotate,
- }, {
- desc: "short dst",
- szDst: 2,
- atEOF: true,
- in: "aaaa",
- out: "ç",
- outFull: "çççç",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: rotate,
- }, {
- desc: "short dst ascii, no change",
- szDst: 2,
- atEOF: true,
- in: "bbb",
- out: "bb",
- outFull: "bbb",
- err: transform.ErrShortDst,
- t: rotate,
- }, {
- desc: "short dst writing error",
- szDst: 2,
- atEOF: false,
- in: "a\x80",
- out: "ç",
- outFull: "ç\ufffd",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: rotate,
- }, {
- desc: "short dst writing incomplete rune",
- szDst: 2,
- atEOF: true,
- in: "a\xc0",
- out: "ç",
- outFull: "ç\ufffd",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: rotate,
- }, {
- desc: "short dst, longer",
- szDst: 5,
- atEOF: true,
- in: "Hellø",
- out: "Hell",
- outFull: "Hellø",
- err: transform.ErrShortDst,
- t: rotate,
- }, {
- desc: "short dst, single",
- szDst: 1,
- atEOF: false,
- in: "ø",
- out: "",
- outFull: "ø",
- err: transform.ErrShortDst,
- t: Map(idem),
- }, {
- desc: "short dst, longer, writing error",
- szDst: 8,
- atEOF: false,
- in: "\x80Hello\x80",
- out: "\ufffdHello",
- outFull: "\ufffdHello\ufffd",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: rotate,
- }, {
- desc: "short src",
- szDst: 2,
- atEOF: false,
- in: "a\xc2",
- out: "ç",
- outFull: "ç\ufffd",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrEndOfSpan,
- t: rotate,
- }, {
- desc: "invalid input, atEOF",
- szDst: large,
- atEOF: true,
- in: "\x80",
- out: "\ufffd",
- outFull: "\ufffd",
- errSpan: transform.ErrEndOfSpan,
- t: rotate,
- }, {
- desc: "invalid input, !atEOF",
- szDst: large,
- atEOF: false,
- in: "\x80",
- out: "\ufffd",
- outFull: "\ufffd",
- errSpan: transform.ErrEndOfSpan,
- t: rotate,
- }, {
- desc: "incomplete rune !atEOF",
- szDst: large,
- atEOF: false,
- in: "\xc2",
- out: "",
- outFull: "\ufffd",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrShortSrc,
- t: rotate,
- }, {
- desc: "invalid input, incomplete rune atEOF",
- szDst: large,
- atEOF: true,
- in: "\xc2",
- out: "\ufffd",
- outFull: "\ufffd",
- errSpan: transform.ErrEndOfSpan,
- t: rotate,
- }, {
- desc: "misc correct",
- szDst: large,
- atEOF: true,
- in: "a\U00012345 ç!",
- out: "ça 中!",
- outFull: "ça 中!",
- errSpan: transform.ErrEndOfSpan,
- t: rotate,
- }, {
- desc: "misc correct and invalid",
- szDst: large,
- atEOF: true,
- in: "Hello\x80 w\x80orl\xc0d!\xc0",
- out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
- outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
- errSpan: transform.ErrEndOfSpan,
- t: rotate,
- }, {
- desc: "misc correct and invalid, short src",
- szDst: large,
- atEOF: false,
- in: "Hello\x80 w\x80orl\xc0d!\xc2",
- out: "Hello\ufffd w\ufffdorl\ufffdd!",
- outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrEndOfSpan,
- t: rotate,
- }, {
- desc: "misc correct and invalid, short src, replacing RuneError",
- szDst: large,
- atEOF: false,
- in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2",
- out: "Hel?lo? w?orl?d!",
- outFull: "Hel?lo? w?orl?d!?",
- errSpan: transform.ErrEndOfSpan,
- err: transform.ErrShortSrc,
- t: Map(func(r rune) rune {
- if r == utf8.RuneError {
- return '?'
- }
- return r
- }),
- }} {
- tt.check(t, i)
- }
- }
- func TestRemove(t *testing.T) {
- remove := Remove(Predicate(func(r rune) bool {
- return strings.ContainsRune("aeiou\u0300\uFF24\U00012345", r)
- }))
- for i, tt := range []transformTest{
- 0: {
- szDst: large,
- atEOF: true,
- in: "",
- out: "",
- outFull: "",
- t: remove,
- },
- 1: {
- szDst: 0,
- atEOF: true,
- in: "aaaa",
- out: "",
- outFull: "",
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 2: {
- szDst: 1,
- atEOF: true,
- in: "aaaa",
- out: "",
- outFull: "",
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 3: {
- szDst: 1,
- atEOF: true,
- in: "baaaa",
- out: "b",
- outFull: "b",
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 4: {
- szDst: 2,
- atEOF: true,
- in: "açaaa",
- out: "ç",
- outFull: "ç",
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 5: {
- szDst: 2,
- atEOF: true,
- in: "aaaç",
- out: "ç",
- outFull: "ç",
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 6: {
- szDst: 2,
- atEOF: false,
- in: "a\x80",
- out: "",
- outFull: "\ufffd",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 7: {
- szDst: 1,
- atEOF: true,
- in: "a\xc0",
- out: "",
- outFull: "\ufffd",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 8: {
- szDst: 1,
- atEOF: false,
- in: "a\xc2",
- out: "",
- outFull: "\ufffd",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 9: {
- szDst: large,
- atEOF: true,
- in: "\x80",
- out: "\ufffd",
- outFull: "\ufffd",
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 10: {
- szDst: large,
- atEOF: false,
- in: "\x80",
- out: "\ufffd",
- outFull: "\ufffd",
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 11: {
- szDst: large,
- atEOF: true,
- in: "\xc2",
- out: "\ufffd",
- outFull: "\ufffd",
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 12: {
- szDst: large,
- atEOF: false,
- in: "\xc2",
- out: "",
- outFull: "\ufffd",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrShortSrc,
- t: remove,
- },
- 13: {
- szDst: large,
- atEOF: true,
- in: "Hello \U00012345world!",
- out: "Hll wrld!",
- outFull: "Hll wrld!",
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 14: {
- szDst: large,
- atEOF: true,
- in: "Hello\x80 w\x80orl\xc0d!\xc0",
- out: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
- outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 15: {
- szDst: large,
- atEOF: false,
- in: "Hello\x80 w\x80orl\xc0d!\xc2",
- out: "Hll\ufffd w\ufffdrl\ufffdd!",
- outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 16: {
- szDst: large,
- atEOF: false,
- in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2",
- out: "Hello world!",
- outFull: "Hello world!",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrEndOfSpan,
- t: Remove(Predicate(func(r rune) bool { return r == utf8.RuneError })),
- },
- 17: {
- szDst: 4,
- atEOF: true,
- in: "Hellø",
- out: "Hll",
- outFull: "Hllø",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 18: {
- szDst: 4,
- atEOF: false,
- in: "Hellø",
- out: "Hll",
- outFull: "Hllø",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 19: {
- szDst: 8,
- atEOF: false,
- in: "\x80Hello\uFF24\x80",
- out: "\ufffdHll",
- outFull: "\ufffdHll\ufffd",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: remove,
- },
- 20: {
- szDst: 8,
- atEOF: false,
- in: "Hllll",
- out: "Hllll",
- outFull: "Hllll",
- t: remove,
- }} {
- tt.check(t, i)
- }
- }
- func TestReplaceIllFormed(t *testing.T) {
- replace := ReplaceIllFormed()
- for i, tt := range []transformTest{
- 0: {
- szDst: large,
- atEOF: true,
- in: "",
- out: "",
- outFull: "",
- t: replace,
- },
- 1: {
- szDst: 1,
- atEOF: true,
- in: "aa",
- out: "a",
- outFull: "aa",
- err: transform.ErrShortDst,
- t: replace,
- },
- 2: {
- szDst: 1,
- atEOF: true,
- in: "a\x80",
- out: "a",
- outFull: "a\ufffd",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: replace,
- },
- 3: {
- szDst: 1,
- atEOF: true,
- in: "a\xc2",
- out: "a",
- outFull: "a\ufffd",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: replace,
- },
- 4: {
- szDst: large,
- atEOF: true,
- in: "\x80",
- out: "\ufffd",
- outFull: "\ufffd",
- errSpan: transform.ErrEndOfSpan,
- t: replace,
- },
- 5: {
- szDst: large,
- atEOF: false,
- in: "\x80",
- out: "\ufffd",
- outFull: "\ufffd",
- errSpan: transform.ErrEndOfSpan,
- t: replace,
- },
- 6: {
- szDst: large,
- atEOF: true,
- in: "\xc2",
- out: "\ufffd",
- outFull: "\ufffd",
- errSpan: transform.ErrEndOfSpan,
- t: replace,
- },
- 7: {
- szDst: large,
- atEOF: false,
- in: "\xc2",
- out: "",
- outFull: "\ufffd",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrShortSrc,
- t: replace,
- },
- 8: {
- szDst: large,
- atEOF: true,
- in: "Hello world!",
- out: "Hello world!",
- outFull: "Hello world!",
- t: replace,
- },
- 9: {
- szDst: large,
- atEOF: true,
- in: "Hello\x80 w\x80orl\xc2d!\xc2",
- out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
- outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
- errSpan: transform.ErrEndOfSpan,
- t: replace,
- },
- 10: {
- szDst: large,
- atEOF: false,
- in: "Hello\x80 w\x80orl\xc2d!\xc2",
- out: "Hello\ufffd w\ufffdorl\ufffdd!",
- outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
- err: transform.ErrShortSrc,
- errSpan: transform.ErrEndOfSpan,
- t: replace,
- },
- 16: {
- szDst: 10,
- atEOF: false,
- in: "\x80Hello\x80",
- out: "\ufffdHello",
- outFull: "\ufffdHello\ufffd",
- err: transform.ErrShortDst,
- errSpan: transform.ErrEndOfSpan,
- t: replace,
- },
- 17: {
- szDst: 10,
- atEOF: false,
- in: "\ufffdHello\ufffd",
- out: "\ufffdHello",
- outFull: "\ufffdHello\ufffd",
- err: transform.ErrShortDst,
- t: replace,
- },
- } {
- tt.check(t, i)
- }
- }
- func TestMapAlloc(t *testing.T) {
- if n := testtext.AllocsPerRun(3, func() {
- Map(idem).Transform(nil, nil, false)
- }); n > 0 {
- t.Errorf("got %f; want 0", n)
- }
- }
- func rmNop(r rune) bool { return false }
- func TestRemoveAlloc(t *testing.T) {
- if n := testtext.AllocsPerRun(3, func() {
- Remove(Predicate(rmNop)).Transform(nil, nil, false)
- }); n > 0 {
- t.Errorf("got %f; want 0", n)
- }
- }
- func TestReplaceIllFormedAlloc(t *testing.T) {
- if n := testtext.AllocsPerRun(3, func() {
- ReplaceIllFormed().Transform(nil, nil, false)
- }); n > 0 {
- t.Errorf("got %f; want 0", n)
- }
- }
- func doBench(b *testing.B, t Transformer) {
- for _, bc := range []struct{ name, data string }{
- {"ascii", testtext.ASCII},
- {"3byte", testtext.ThreeByteUTF8},
- } {
- dst := make([]byte, 2*len(bc.data))
- src := []byte(bc.data)
- testtext.Bench(b, bc.name+"/transform", func(b *testing.B) {
- b.SetBytes(int64(len(src)))
- for i := 0; i < b.N; i++ {
- t.Transform(dst, src, true)
- }
- })
- src = t.Bytes(src)
- t.Reset()
- testtext.Bench(b, bc.name+"/span", func(b *testing.B) {
- b.SetBytes(int64(len(src)))
- for i := 0; i < b.N; i++ {
- t.Span(src, true)
- }
- })
- }
- }
- func BenchmarkRemove(b *testing.B) {
- doBench(b, Remove(Predicate(func(r rune) bool { return r == 'e' })))
- }
- func BenchmarkMapAll(b *testing.B) {
- doBench(b, Map(func(r rune) rune { return 'a' }))
- }
- func BenchmarkMapNone(b *testing.B) {
- doBench(b, Map(func(r rune) rune { return r }))
- }
- func BenchmarkReplaceIllFormed(b *testing.B) {
- doBench(b, ReplaceIllFormed())
- }
- var (
- input = strings.Repeat("Thé qüick brøwn føx jumps øver the lazy døg. ", 100)
- )
|