123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701 |
- // Copyright 2015 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package width
- import (
- "bytes"
- "strings"
- "testing"
- "golang.org/x/text/internal/testtext"
- "golang.org/x/text/transform"
- )
- func foldRune(r rune) (folded rune, ok bool) {
- alt, ok := mapRunes[r]
- if ok && alt.e&tagNeedsFold != 0 {
- return alt.r, true
- }
- return r, false
- }
- func widenRune(r rune) (wide rune, ok bool) {
- alt, ok := mapRunes[r]
- if k := alt.e.kind(); k == EastAsianHalfwidth || k == EastAsianNarrow {
- return alt.r, true
- }
- return r, false
- }
- func narrowRune(r rune) (narrow rune, ok bool) {
- alt, ok := mapRunes[r]
- if k := alt.e.kind(); k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous {
- return alt.r, true
- }
- return r, false
- }
- func TestFoldSingleRunes(t *testing.T) {
- for r := rune(0); r < 0x1FFFF; r++ {
- if loSurrogate <= r && r <= hiSurrogate {
- continue
- }
- x, _ := foldRune(r)
- want := string(x)
- got := Fold.String(string(r))
- if got != want {
- t.Errorf("Fold().String(%U) = %+q; want %+q", r, got, want)
- }
- }
- }
- type transformTest struct {
- desc string
- src string
- nBuf int
- nDst int
- atEOF bool
- dst string
- nSrc int
- err error
- nSpan int
- errSpan error
- }
- func (tc *transformTest) doTest(t *testing.T, tr Transformer) {
- testtext.Run(t, tc.desc, func(t *testing.T) {
- b := make([]byte, tc.nBuf)
- nDst, nSrc, err := tr.Transform(b, []byte(tc.src), tc.atEOF)
- if got := string(b[:nDst]); got != tc.dst[:nDst] {
- t.Errorf("dst was %+q; want %+q", got, tc.dst)
- }
- if nDst != tc.nDst {
- t.Errorf("nDst was %d; want %d", nDst, tc.nDst)
- }
- if nSrc != tc.nSrc {
- t.Errorf("nSrc was %d; want %d", nSrc, tc.nSrc)
- }
- if err != tc.err {
- t.Errorf("error was %v; want %v", err, tc.err)
- }
- if got := tr.String(tc.src); got != tc.dst {
- t.Errorf("String(%q) = %q; want %q", tc.src, got, tc.dst)
- }
- n, err := tr.Span([]byte(tc.src), tc.atEOF)
- if n != tc.nSpan || err != tc.errSpan {
- t.Errorf("Span: got %d, %v; want %d, %v", n, err, tc.nSpan, tc.errSpan)
- }
- })
- }
- func TestFold(t *testing.T) {
- for _, tc := range []transformTest{{
- desc: "empty",
- src: "",
- nBuf: 10,
- dst: "",
- nDst: 0,
- nSrc: 0,
- atEOF: false,
- err: nil,
- nSpan: 0,
- errSpan: nil,
- }, {
- desc: "short source 1",
- src: "a\xc2",
- nBuf: 10,
- dst: "a\xc2",
- nDst: 1,
- nSrc: 1,
- atEOF: false,
- err: transform.ErrShortSrc,
- nSpan: 1,
- errSpan: transform.ErrShortSrc,
- }, {
- desc: "short source 2",
- src: "a\xe0\x80",
- nBuf: 10,
- dst: "a\xe0\x80",
- nDst: 1,
- nSrc: 1,
- atEOF: false,
- err: transform.ErrShortSrc,
- nSpan: 1,
- errSpan: transform.ErrShortSrc,
- }, {
- desc: "incomplete but terminated source 1",
- src: "a\xc2",
- nBuf: 10,
- dst: "a\xc2",
- nDst: 2,
- nSrc: 2,
- atEOF: true,
- err: nil,
- nSpan: 2,
- errSpan: nil,
- }, {
- desc: "incomplete but terminated source 2",
- src: "a\xe0\x80",
- nBuf: 10,
- dst: "a\xe0\x80",
- nDst: 3,
- nSrc: 3,
- atEOF: true,
- err: nil,
- nSpan: 3,
- errSpan: nil,
- }, {
- desc: "exact fit dst",
- src: "a\uff01",
- nBuf: 2,
- dst: "a!",
- nDst: 2,
- nSrc: 4,
- atEOF: false,
- err: nil,
- nSpan: 1,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "exact fit dst and src ascii",
- src: "ab",
- nBuf: 2,
- dst: "ab",
- nDst: 2,
- nSrc: 2,
- atEOF: true,
- err: nil,
- nSpan: 2,
- errSpan: nil,
- }, {
- desc: "empty dst",
- src: "\u0300",
- nBuf: 0,
- dst: "\u0300",
- nDst: 0,
- nSrc: 0,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 2,
- errSpan: nil,
- }, {
- desc: "empty dst ascii",
- src: "a",
- nBuf: 0,
- dst: "a",
- nDst: 0,
- nSrc: 0,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 1,
- errSpan: nil,
- }, {
- desc: "short dst 1",
- src: "a\uffe0", // ¢
- nBuf: 2,
- dst: "a\u00a2", // ¢
- nDst: 1,
- nSrc: 1,
- atEOF: false,
- err: transform.ErrShortDst,
- nSpan: 1,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "short dst 2",
- src: "不夠",
- nBuf: 3,
- dst: "不夠",
- nDst: 3,
- nSrc: 3,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 6,
- errSpan: nil,
- }, {
- desc: "short dst fast path",
- src: "fast",
- nDst: 3,
- dst: "fast",
- nBuf: 3,
- nSrc: 3,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 4,
- errSpan: nil,
- }, {
- desc: "short dst larger buffer",
- src: "\uff21" + strings.Repeat("0", 127) + "B",
- nBuf: 128,
- dst: "A" + strings.Repeat("0", 127) + "B",
- nDst: 128,
- nSrc: 130,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "fast path alternation",
- src: "fast路徑fast路徑",
- nBuf: 20,
- dst: "fast路徑fast路徑",
- nDst: 20,
- nSrc: 20,
- atEOF: true,
- err: nil,
- nSpan: 20,
- errSpan: nil,
- }} {
- tc.doTest(t, Fold)
- }
- }
- func TestWidenSingleRunes(t *testing.T) {
- for r := rune(0); r < 0x1FFFF; r++ {
- if loSurrogate <= r && r <= hiSurrogate {
- continue
- }
- alt, _ := widenRune(r)
- want := string(alt)
- got := Widen.String(string(r))
- if got != want {
- t.Errorf("Widen().String(%U) = %+q; want %+q", r, got, want)
- }
- }
- }
- func TestWiden(t *testing.T) {
- for _, tc := range []transformTest{{
- desc: "empty",
- src: "",
- nBuf: 10,
- dst: "",
- nDst: 0,
- nSrc: 0,
- atEOF: false,
- err: nil,
- nSpan: 0,
- errSpan: nil,
- }, {
- desc: "short source 1",
- src: "a\xc2",
- nBuf: 10,
- dst: "a\xc2",
- nDst: 3,
- nSrc: 1,
- atEOF: false,
- err: transform.ErrShortSrc,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "short source 2",
- src: "a\xe0\x80",
- nBuf: 10,
- dst: "a\xe0\x80",
- nDst: 3,
- nSrc: 1,
- atEOF: false,
- err: transform.ErrShortSrc,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "incomplete but terminated source 1",
- src: "a\xc2",
- nBuf: 10,
- dst: "a\xc2",
- nDst: 4,
- nSrc: 2,
- atEOF: true,
- err: nil,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "incomplete but terminated source 2",
- src: "a\xe0\x80",
- nBuf: 10,
- dst: "a\xe0\x80",
- nDst: 5,
- nSrc: 3,
- atEOF: true,
- err: nil,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "short source 1 some span",
- src: "a\xc2",
- nBuf: 10,
- dst: "a\xc2",
- nDst: 3,
- nSrc: 3,
- atEOF: false,
- err: transform.ErrShortSrc,
- nSpan: 3,
- errSpan: transform.ErrShortSrc,
- }, {
- desc: "short source 2 some span",
- src: "a\xe0\x80",
- nBuf: 10,
- dst: "a\xe0\x80",
- nDst: 3,
- nSrc: 3,
- atEOF: false,
- err: transform.ErrShortSrc,
- nSpan: 3,
- errSpan: transform.ErrShortSrc,
- }, {
- desc: "incomplete but terminated source 1 some span",
- src: "a\xc2",
- nBuf: 10,
- dst: "a\xc2",
- nDst: 4,
- nSrc: 4,
- atEOF: true,
- err: nil,
- nSpan: 4,
- errSpan: nil,
- }, {
- desc: "incomplete but terminated source 2 some span",
- src: "a\xe0\x80",
- nBuf: 10,
- dst: "a\xe0\x80",
- nDst: 5,
- nSrc: 5,
- atEOF: true,
- err: nil,
- nSpan: 5,
- errSpan: nil,
- }, {
- desc: "exact fit dst",
- src: "a!",
- nBuf: 6,
- dst: "a\uff01",
- nDst: 6,
- nSrc: 2,
- atEOF: false,
- err: nil,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "empty dst",
- src: "\u0300",
- nBuf: 0,
- dst: "\u0300",
- nDst: 0,
- nSrc: 0,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 2,
- errSpan: nil,
- }, {
- desc: "empty dst ascii",
- src: "a",
- nBuf: 0,
- dst: "a",
- nDst: 0,
- nSrc: 0,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "short dst 1",
- src: "a\uffe0",
- nBuf: 4,
- dst: "a\uffe0",
- nDst: 3,
- nSrc: 1,
- atEOF: false,
- err: transform.ErrShortDst,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "short dst 2",
- src: "不夠",
- nBuf: 3,
- dst: "不夠",
- nDst: 3,
- nSrc: 3,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 6,
- errSpan: nil,
- }, {
- desc: "short dst ascii",
- src: "ascii",
- nBuf: 3,
- dst: "ascii", // U+ff41, ...
- nDst: 3,
- nSrc: 1,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "ambiguous",
- src: "\uffe9",
- nBuf: 4,
- dst: "\u2190",
- nDst: 3,
- nSrc: 3,
- atEOF: false,
- err: nil,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }} {
- tc.doTest(t, Widen)
- }
- }
- func TestNarrowSingleRunes(t *testing.T) {
- for r := rune(0); r < 0x1FFFF; r++ {
- if loSurrogate <= r && r <= hiSurrogate {
- continue
- }
- alt, _ := narrowRune(r)
- want := string(alt)
- got := Narrow.String(string(r))
- if got != want {
- t.Errorf("Narrow().String(%U) = %+q; want %+q", r, got, want)
- }
- }
- }
- func TestNarrow(t *testing.T) {
- for _, tc := range []transformTest{{
- desc: "empty",
- src: "",
- nBuf: 10,
- dst: "",
- nDst: 0,
- nSrc: 0,
- atEOF: false,
- err: nil,
- nSpan: 0,
- errSpan: nil,
- }, {
- desc: "short source 1",
- src: "a\xc2",
- nBuf: 10,
- dst: "a\xc2",
- nDst: 1,
- nSrc: 1,
- atEOF: false,
- err: transform.ErrShortSrc,
- nSpan: 1,
- errSpan: transform.ErrShortSrc,
- }, {
- desc: "short source 2",
- src: "a\xe0\x80",
- nBuf: 10,
- dst: "a\xe0\x80",
- nDst: 1,
- nSrc: 3,
- atEOF: false,
- err: transform.ErrShortSrc,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "incomplete but terminated source 1",
- src: "a\xc2",
- nBuf: 10,
- dst: "a\xc2",
- nDst: 2,
- nSrc: 4,
- atEOF: true,
- err: nil,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "incomplete but terminated source 2",
- src: "a\xe0\x80",
- nBuf: 10,
- dst: "a\xe0\x80",
- nDst: 3,
- nSrc: 5,
- atEOF: true,
- err: nil,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "exact fit dst",
- src: "a\uff01",
- nBuf: 2,
- dst: "a!",
- nDst: 2,
- nSrc: 6,
- atEOF: false,
- err: nil,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "exact fit dst some span",
- src: "a\uff01",
- nBuf: 2,
- dst: "a!",
- nDst: 2,
- nSrc: 4,
- atEOF: false,
- err: nil,
- nSpan: 1,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "empty dst",
- src: "\u0300",
- nBuf: 0,
- dst: "\u0300",
- nDst: 0,
- nSrc: 0,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 2,
- errSpan: nil,
- }, {
- desc: "empty dst ascii",
- src: "a",
- nBuf: 0,
- dst: "a",
- nDst: 0,
- nSrc: 0,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 1,
- errSpan: nil,
- }, {
- desc: "short dst 1",
- src: "a\uffe0", // ¢
- nBuf: 2,
- dst: "a\u00a2", // ¢
- nDst: 1,
- nSrc: 3,
- atEOF: false,
- err: transform.ErrShortDst,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "short dst 2",
- src: "不夠",
- nBuf: 3,
- dst: "不夠",
- nDst: 3,
- nSrc: 3,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 6,
- errSpan: nil,
- }, {
- // Create a narrow variant of ambiguous runes, if they exist.
- desc: "ambiguous",
- src: "\u2190",
- nBuf: 4,
- dst: "\uffe9",
- nDst: 3,
- nSrc: 3,
- atEOF: false,
- err: nil,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "short dst fast path",
- src: "fast",
- nBuf: 3,
- dst: "fast",
- nDst: 3,
- nSrc: 3,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 4,
- errSpan: nil,
- }, {
- desc: "short dst larger buffer",
- src: "\uff21" + strings.Repeat("0", 127) + "B",
- nBuf: 128,
- dst: "A" + strings.Repeat("0", 127) + "B",
- nDst: 128,
- nSrc: 130,
- atEOF: true,
- err: transform.ErrShortDst,
- nSpan: 0,
- errSpan: transform.ErrEndOfSpan,
- }, {
- desc: "fast path alternation",
- src: "fast路徑fast路徑",
- nBuf: 20,
- dst: "fast路徑fast路徑",
- nDst: 20,
- nSrc: 20,
- atEOF: true,
- err: nil,
- nSpan: 20,
- errSpan: nil,
- }} {
- tc.doTest(t, Narrow)
- }
- }
- func bench(b *testing.B, t Transformer, s string) {
- dst := make([]byte, 1024)
- src := []byte(s)
- b.SetBytes(int64(len(src)))
- b.ResetTimer()
- for i := 0; i < b.N; i++ {
- t.Transform(dst, src, true)
- }
- }
- func changingRunes(f func(r rune) (rune, bool)) string {
- buf := &bytes.Buffer{}
- for r := rune(0); r <= 0xFFFF; r++ {
- if _, ok := foldRune(r); ok {
- buf.WriteRune(r)
- }
- }
- return buf.String()
- }
- func BenchmarkFoldASCII(b *testing.B) {
- bench(b, Fold, testtext.ASCII)
- }
- func BenchmarkFoldCJK(b *testing.B) {
- bench(b, Fold, testtext.CJK)
- }
- func BenchmarkFoldNonCanonical(b *testing.B) {
- bench(b, Fold, changingRunes(foldRune))
- }
- func BenchmarkFoldOther(b *testing.B) {
- bench(b, Fold, testtext.TwoByteUTF8+testtext.ThreeByteUTF8)
- }
- func BenchmarkWideASCII(b *testing.B) {
- bench(b, Widen, testtext.ASCII)
- }
- func BenchmarkWideCJK(b *testing.B) {
- bench(b, Widen, testtext.CJK)
- }
- func BenchmarkWideNonCanonical(b *testing.B) {
- bench(b, Widen, changingRunes(widenRune))
- }
- func BenchmarkWideOther(b *testing.B) {
- bench(b, Widen, testtext.TwoByteUTF8+testtext.ThreeByteUTF8)
- }
- func BenchmarkNarrowASCII(b *testing.B) {
- bench(b, Narrow, testtext.ASCII)
- }
- func BenchmarkNarrowCJK(b *testing.B) {
- bench(b, Narrow, testtext.CJK)
- }
- func BenchmarkNarrowNonCanonical(b *testing.B) {
- bench(b, Narrow, changingRunes(narrowRune))
- }
- func BenchmarkNarrowOther(b *testing.B) {
- bench(b, Narrow, testtext.TwoByteUTF8+testtext.ThreeByteUTF8)
- }
|