123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248 |
- // Copyright 2016 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package utf32
- import (
- "testing"
- "golang.org/x/text/encoding"
- "golang.org/x/text/encoding/internal/enctest"
- "golang.org/x/text/transform"
- )
- var (
- utf32LEIB = UTF32(LittleEndian, IgnoreBOM) // UTF-32LE (atypical interpretation)
- utf32LEUB = UTF32(LittleEndian, UseBOM) // UTF-32, LE
- // utf32LEEB = UTF32(LittleEndian, ExpectBOM) // UTF-32, LE, Expect - covered in encoding_test.go
- utf32BEIB = UTF32(BigEndian, IgnoreBOM) // UTF-32BE (atypical interpretation)
- utf32BEUB = UTF32(BigEndian, UseBOM) // UTF-32 default
- utf32BEEB = UTF32(BigEndian, ExpectBOM) // UTF-32 Expect
- )
- func TestBasics(t *testing.T) {
- testCases := []struct {
- e encoding.Encoding
- encPrefix string
- encSuffix string
- encoded string
- utf8 string
- }{{
- e: utf32BEIB,
- encoded: "\x00\x00\x00\x57\x00\x00\x00\xe4\x00\x01\xd5\x65",
- utf8: "\x57\u00e4\U0001d565",
- }, {
- e: UTF32(BigEndian, ExpectBOM),
- encPrefix: "\x00\x00\xfe\xff",
- encoded: "\x00\x00\x00\x57\x00\x00\x00\xe4\x00\x01\xd5\x65",
- utf8: "\x57\u00e4\U0001d565",
- }, {
- e: UTF32(LittleEndian, IgnoreBOM),
- encoded: "\x57\x00\x00\x00\xe4\x00\x00\x00\x65\xd5\x01\x00",
- utf8: "\x57\u00e4\U0001d565",
- }, {
- e: UTF32(LittleEndian, ExpectBOM),
- encPrefix: "\xff\xfe\x00\x00",
- encoded: "\x57\x00\x00\x00\xe4\x00\x00\x00\x65\xd5\x01\x00",
- utf8: "\x57\u00e4\U0001d565",
- }}
- for _, tc := range testCases {
- enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, tc.encPrefix, tc.encSuffix)
- }
- }
- func TestFiles(t *testing.T) { enctest.TestFile(t, utf32BEIB) }
- func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, utf32BEIB) }
- func TestUTF32(t *testing.T) {
- testCases := []struct {
- desc string
- src string
- notEOF bool // the inverse of atEOF
- sizeDst int
- want string
- nSrc int
- err error
- t transform.Transformer
- }{{
- desc: "utf-32 IgnoreBOM dec: empty string",
- t: utf32BEIB.NewDecoder(),
- }, {
- desc: "utf-32 UseBOM dec: empty string",
- t: utf32BEUB.NewDecoder(),
- }, {
- desc: "utf-32 ExpectBOM dec: empty string",
- err: ErrMissingBOM,
- t: utf32BEEB.NewDecoder(),
- }, {
- desc: "utf-32be dec: Doesn't interpret U+FEFF as BOM",
- src: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
- sizeDst: 100,
- want: "\uFEFF\U00012345=Ra",
- nSrc: 20,
- t: utf32BEIB.NewDecoder(),
- }, {
- desc: "utf-32be dec: Interprets little endian U+FEFF as invalid",
- src: "\xFF\xFE\x00\x00\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
- sizeDst: 100,
- want: "\uFFFD\U00012345=Ra",
- nSrc: 20,
- t: utf32BEIB.NewDecoder(),
- }, {
- desc: "utf-32le dec: Doesn't interpret U+FEFF as BOM",
- src: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
- sizeDst: 100,
- want: "\uFEFF\U00012345=Ra",
- nSrc: 20,
- t: utf32LEIB.NewDecoder(),
- }, {
- desc: "utf-32le dec: Interprets big endian U+FEFF as invalid",
- src: "\x00\x00\xFE\xFF\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
- sizeDst: 100,
- want: "\uFFFD\U00012345=Ra",
- nSrc: 20,
- t: utf32LEIB.NewDecoder(),
- }, {
- desc: "utf-32 enc: Writes big-endian BOM",
- src: "\U00012345=Ra",
- sizeDst: 100,
- want: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
- nSrc: 7,
- t: utf32BEUB.NewEncoder(),
- }, {
- desc: "utf-32 enc: Writes little-endian BOM",
- src: "\U00012345=Ra",
- sizeDst: 100,
- want: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
- nSrc: 7,
- t: utf32LEUB.NewEncoder(),
- }, {
- desc: "utf-32 dec: Interprets text using big-endian default when BOM not present",
- src: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
- sizeDst: 100,
- want: "\U00012345=Ra",
- nSrc: 16,
- t: utf32BEUB.NewDecoder(),
- }, {
- desc: "utf-32 dec: Interprets text using little-endian default when BOM not present",
- src: "\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
- sizeDst: 100,
- want: "\U00012345=Ra",
- nSrc: 16,
- t: utf32LEUB.NewDecoder(),
- }, {
- desc: "utf-32 dec: BOM determines encoding BE",
- src: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
- sizeDst: 100,
- want: "\U00012345=Ra",
- nSrc: 20,
- t: utf32BEUB.NewDecoder(),
- }, {
- desc: "utf-32 dec: BOM determines encoding LE",
- src: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
- sizeDst: 100,
- want: "\U00012345=Ra",
- nSrc: 20,
- t: utf32LEUB.NewDecoder(),
- }, {
- desc: "utf-32 dec: BOM determines encoding LE, change default",
- src: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
- sizeDst: 100,
- want: "\U00012345=Ra",
- nSrc: 20,
- t: utf32BEUB.NewDecoder(),
- }, {
- desc: "utf-32 dec: BOM determines encoding BE, change default",
- src: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
- sizeDst: 100,
- want: "\U00012345=Ra",
- nSrc: 20,
- t: utf32LEUB.NewDecoder(),
- }, {
- desc: "utf-32 dec: Don't change big-endian byte order mid-stream",
- src: "\x00\x01\x23\x45\x00\x00\x00\x3D\xFF\xFE\x00\x00\x00\x00\xFE\xFF\x00\x00\x00\x52\x00\x00\x00\x61",
- sizeDst: 100,
- want: "\U00012345=\uFFFD\uFEFFRa",
- nSrc: 24,
- t: utf32BEUB.NewDecoder(),
- }, {
- desc: "utf-32 dec: Don't change little-endian byte order mid-stream",
- src: "\x45\x23\x01\x00\x3D\x00\x00\x00\x00\x00\xFE\xFF\xFF\xFE\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
- sizeDst: 100,
- want: "\U00012345=\uFFFD\uFEFFRa",
- nSrc: 24,
- t: utf32LEUB.NewDecoder(),
- }, {
- desc: "utf-32 dec: Fail on missing BOM when required",
- src: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
- sizeDst: 100,
- want: "",
- nSrc: 0,
- err: ErrMissingBOM,
- t: utf32BEEB.NewDecoder(),
- }, {
- desc: "utf-32 enc: Short dst",
- src: "\U00012345=Ra",
- sizeDst: 15,
- want: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52",
- nSrc: 6,
- err: transform.ErrShortDst,
- t: utf32BEIB.NewEncoder(),
- }, {
- desc: "utf-32 enc: Short src",
- src: "\U00012345=Ra\xC2",
- notEOF: true,
- sizeDst: 100,
- want: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
- nSrc: 7,
- err: transform.ErrShortSrc,
- t: utf32BEIB.NewEncoder(),
- }, {
- desc: "utf-32 enc: Invalid input",
- src: "\x80\xC1\xC2\x7F\xC2",
- sizeDst: 100,
- want: "\x00\x00\xFF\xFD\x00\x00\xFF\xFD\x00\x00\xFF\xFD\x00\x00\x00\x7F\x00\x00\xFF\xFD",
- nSrc: 5,
- t: utf32BEIB.NewEncoder(),
- }, {
- desc: "utf-32 dec: Short dst",
- src: "\x00\x00\x00\x41",
- sizeDst: 0,
- want: "",
- nSrc: 0,
- err: transform.ErrShortDst,
- t: utf32BEIB.NewDecoder(),
- }, {
- desc: "utf-32 dec: Short src",
- src: "\x00\x00\x00",
- notEOF: true,
- sizeDst: 4,
- want: "",
- nSrc: 0,
- err: transform.ErrShortSrc,
- t: utf32BEIB.NewDecoder(),
- }, {
- desc: "utf-32 dec: Invalid input",
- src: "\x00\x00\xD8\x00\x00\x00\xDF\xFF\x00\x11\x00\x00\x00\x00\x00",
- sizeDst: 100,
- want: "\uFFFD\uFFFD\uFFFD\uFFFD",
- nSrc: 15,
- t: utf32BEIB.NewDecoder(),
- }}
- for i, tc := range testCases {
- b := make([]byte, tc.sizeDst)
- nDst, nSrc, err := tc.t.Transform(b, []byte(tc.src), !tc.notEOF)
- if err != tc.err {
- t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
- }
- if got := string(b[:nDst]); got != tc.want {
- t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
- }
- if nSrc != tc.nSrc {
- t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
- }
- }
- }
|