123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258 |
- // Copyright 2015 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package charmap
- import (
- "testing"
- "golang.org/x/text/encoding"
- "golang.org/x/text/encoding/internal"
- "golang.org/x/text/encoding/internal/enctest"
- "golang.org/x/text/transform"
- )
- func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
- return "Decode", e.NewDecoder(), nil
- }
- func encASCIISuperset(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
- return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
- }
- func encEBCDIC(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
- return "Encode", e.NewEncoder(), internal.RepertoireError(0x3f)
- }
- func TestNonRepertoire(t *testing.T) {
- testCases := []struct {
- init func(e encoding.Encoding) (string, transform.Transformer, error)
- e encoding.Encoding
- src, want string
- }{
- {dec, Windows1252, "\x81", "\ufffd"},
- {encEBCDIC, CodePage037, "갂", ""},
- {encEBCDIC, CodePage1047, "갂", ""},
- {encEBCDIC, CodePage1047, "a¤갂", "\x81\x9F"},
- {encEBCDIC, CodePage1140, "갂", ""},
- {encEBCDIC, CodePage1140, "a€갂", "\x81\x9F"},
- {encASCIISuperset, Windows1252, "갂", ""},
- {encASCIISuperset, Windows1252, "a갂", "a"},
- {encASCIISuperset, Windows1252, "\u00E9갂", "\xE9"},
- }
- for _, tc := range testCases {
- dir, tr, wantErr := tc.init(tc.e)
- dst, _, err := transform.String(tr, tc.src)
- if err != wantErr {
- t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
- }
- if got := string(dst); got != tc.want {
- t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
- }
- }
- }
- func TestBasics(t *testing.T) {
- testCases := []struct {
- e encoding.Encoding
- encoded string
- utf8 string
- }{{
- e: CodePage037,
- encoded: "\xc8\x51\xba\x93\xcf",
- utf8: "Hé[lõ",
- }, {
- e: CodePage437,
- encoded: "H\x82ll\x93 \x9d\xa7\xf4\x9c\xbe",
- utf8: "Héllô ¥º⌠£╛",
- }, {
- e: CodePage866,
- encoded: "H\xf3\xd3o \x98\xfd\x9f\xdd\xa1",
- utf8: "Hє╙o Ш¤Я▌б",
- }, {
- e: CodePage1047,
- encoded: "\xc8\x54\x93\x93\x9f",
- utf8: "Hèll¤",
- }, {
- e: CodePage1140,
- encoded: "\xc8\x9f\x93\x93\xcf",
- utf8: "H€llõ",
- }, {
- e: ISO8859_2,
- encoded: "Hel\xe5\xf5",
- utf8: "Helĺő",
- }, {
- e: ISO8859_3,
- encoded: "He\xbd\xd4",
- utf8: "He½Ô",
- }, {
- e: ISO8859_4,
- encoded: "Hel\xb6\xf8",
- utf8: "Helļø",
- }, {
- e: ISO8859_5,
- encoded: "H\xd7\xc6o",
- utf8: "HзЦo",
- }, {
- e: ISO8859_6,
- encoded: "Hel\xc2\xc9",
- utf8: "Helآة",
- }, {
- e: ISO8859_7,
- encoded: "H\xeel\xebo",
- utf8: "Hξlλo",
- }, {
- e: ISO8859_8,
- encoded: "Hel\xf5\xed",
- utf8: "Helץם",
- }, {
- e: ISO8859_9,
- encoded: "\xdeayet",
- utf8: "Şayet",
- }, {
- e: ISO8859_10,
- encoded: "H\xea\xbfo",
- utf8: "Hęŋo",
- }, {
- e: ISO8859_13,
- encoded: "H\xe6l\xf9o",
- utf8: "Hęlło",
- }, {
- e: ISO8859_14,
- encoded: "He\xfe\xd0o",
- utf8: "HeŷŴo",
- }, {
- e: ISO8859_15,
- encoded: "H\xa4ll\xd8",
- utf8: "H€llØ",
- }, {
- e: ISO8859_16,
- encoded: "H\xe6ll\xbd",
- utf8: "Hællœ",
- }, {
- e: KOI8R,
- encoded: "He\x93\xad\x9c",
- utf8: "He⌠╜°",
- }, {
- e: KOI8U,
- encoded: "He\x93\xad\x9c",
- utf8: "He⌠ґ°",
- }, {
- e: Macintosh,
- encoded: "He\xdf\xd7",
- utf8: "Hefl◊",
- }, {
- e: MacintoshCyrillic,
- encoded: "He\xbe\x94",
- utf8: "HeЊФ",
- }, {
- e: Windows874,
- encoded: "He\xb7\xf0",
- utf8: "Heท๐",
- }, {
- e: Windows1250,
- encoded: "He\xe5\xe5o",
- utf8: "Heĺĺo",
- }, {
- e: Windows1251,
- encoded: "H\xball\xfe",
- utf8: "Hєllю",
- }, {
- e: Windows1252,
- encoded: "H\xe9ll\xf4 \xa5\xbA\xae\xa3\xd0",
- utf8: "Héllô ¥º®£Ð",
- }, {
- e: Windows1253,
- encoded: "H\xe5ll\xd6",
- utf8: "HεllΦ",
- }, {
- e: Windows1254,
- encoded: "\xd0ello",
- utf8: "Ğello",
- }, {
- e: Windows1255,
- encoded: "He\xd4o",
- utf8: "Heװo",
- }, {
- e: Windows1256,
- encoded: "H\xdbllo",
- utf8: "Hغllo",
- }, {
- e: Windows1257,
- encoded: "He\xeflo",
- utf8: "Heļlo",
- }, {
- e: Windows1258,
- encoded: "Hell\xf5",
- utf8: "Hellơ",
- }, {
- e: XUserDefined,
- encoded: "\x00\x40\x7f\x80\xab\xff",
- utf8: "\u0000\u0040\u007f\uf780\uf7ab\uf7ff",
- }}
- for _, tc := range testCases {
- enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, "", "")
- }
- }
- var windows1255TestCases = []struct {
- b byte
- ok bool
- r rune
- }{
- {'\x00', true, '\u0000'},
- {'\x1a', true, '\u001a'},
- {'\x61', true, '\u0061'},
- {'\x7f', true, '\u007f'},
- {'\x80', true, '\u20ac'},
- {'\x95', true, '\u2022'},
- {'\xa0', true, '\u00a0'},
- {'\xc0', true, '\u05b0'},
- {'\xfc', true, '\ufffd'},
- {'\xfd', true, '\u200e'},
- {'\xfe', true, '\u200f'},
- {'\xff', true, '\ufffd'},
- {encoding.ASCIISub, false, '\u0400'},
- {encoding.ASCIISub, false, '\u2603'},
- {encoding.ASCIISub, false, '\U0001f4a9'},
- }
- func TestDecodeByte(t *testing.T) {
- for _, tc := range windows1255TestCases {
- if !tc.ok {
- continue
- }
- got := Windows1255.DecodeByte(tc.b)
- want := tc.r
- if got != want {
- t.Errorf("DecodeByte(%#02x): got %#08x, want %#08x", tc.b, got, want)
- }
- }
- }
- func TestEncodeRune(t *testing.T) {
- for _, tc := range windows1255TestCases {
- // There can be multiple tc.b values that map to tc.r = '\ufffd'.
- if tc.r == '\ufffd' {
- continue
- }
- gotB, gotOK := Windows1255.EncodeRune(tc.r)
- wantB, wantOK := tc.b, tc.ok
- if gotB != wantB || gotOK != wantOK {
- t.Errorf("EncodeRune(%#08x): got (%#02x, %t), want (%#02x, %t)", tc.r, gotB, gotOK, wantB, wantOK)
- }
- }
- }
- func TestFiles(t *testing.T) { enctest.TestFile(t, Windows1252) }
- func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, Windows1252) }
|