charmap_test.go 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package charmap
  5. import (
  6. "testing"
  7. "golang.org/x/text/encoding"
  8. "golang.org/x/text/encoding/internal"
  9. "golang.org/x/text/encoding/internal/enctest"
  10. "golang.org/x/text/transform"
  11. )
  12. func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
  13. return "Decode", e.NewDecoder(), nil
  14. }
  15. func encASCIISuperset(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
  16. return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
  17. }
  18. func encEBCDIC(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
  19. return "Encode", e.NewEncoder(), internal.RepertoireError(0x3f)
  20. }
  21. func TestNonRepertoire(t *testing.T) {
  22. testCases := []struct {
  23. init func(e encoding.Encoding) (string, transform.Transformer, error)
  24. e encoding.Encoding
  25. src, want string
  26. }{
  27. {dec, Windows1252, "\x81", "\ufffd"},
  28. {encEBCDIC, CodePage037, "갂", ""},
  29. {encEBCDIC, CodePage1047, "갂", ""},
  30. {encEBCDIC, CodePage1047, "a¤갂", "\x81\x9F"},
  31. {encEBCDIC, CodePage1140, "갂", ""},
  32. {encEBCDIC, CodePage1140, "a€갂", "\x81\x9F"},
  33. {encASCIISuperset, Windows1252, "갂", ""},
  34. {encASCIISuperset, Windows1252, "a갂", "a"},
  35. {encASCIISuperset, Windows1252, "\u00E9갂", "\xE9"},
  36. }
  37. for _, tc := range testCases {
  38. dir, tr, wantErr := tc.init(tc.e)
  39. dst, _, err := transform.String(tr, tc.src)
  40. if err != wantErr {
  41. t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
  42. }
  43. if got := string(dst); got != tc.want {
  44. t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
  45. }
  46. }
  47. }
  48. func TestBasics(t *testing.T) {
  49. testCases := []struct {
  50. e encoding.Encoding
  51. encoded string
  52. utf8 string
  53. }{{
  54. e: CodePage037,
  55. encoded: "\xc8\x51\xba\x93\xcf",
  56. utf8: "Hé[lõ",
  57. }, {
  58. e: CodePage437,
  59. encoded: "H\x82ll\x93 \x9d\xa7\xf4\x9c\xbe",
  60. utf8: "Héllô ¥º⌠£╛",
  61. }, {
  62. e: CodePage866,
  63. encoded: "H\xf3\xd3o \x98\xfd\x9f\xdd\xa1",
  64. utf8: "Hє╙o Ш¤Я▌б",
  65. }, {
  66. e: CodePage1047,
  67. encoded: "\xc8\x54\x93\x93\x9f",
  68. utf8: "Hèll¤",
  69. }, {
  70. e: CodePage1140,
  71. encoded: "\xc8\x9f\x93\x93\xcf",
  72. utf8: "H€llõ",
  73. }, {
  74. e: ISO8859_2,
  75. encoded: "Hel\xe5\xf5",
  76. utf8: "Helĺő",
  77. }, {
  78. e: ISO8859_3,
  79. encoded: "He\xbd\xd4",
  80. utf8: "He½Ô",
  81. }, {
  82. e: ISO8859_4,
  83. encoded: "Hel\xb6\xf8",
  84. utf8: "Helļø",
  85. }, {
  86. e: ISO8859_5,
  87. encoded: "H\xd7\xc6o",
  88. utf8: "HзЦo",
  89. }, {
  90. e: ISO8859_6,
  91. encoded: "Hel\xc2\xc9",
  92. utf8: "Helآة",
  93. }, {
  94. e: ISO8859_7,
  95. encoded: "H\xeel\xebo",
  96. utf8: "Hξlλo",
  97. }, {
  98. e: ISO8859_8,
  99. encoded: "Hel\xf5\xed",
  100. utf8: "Helץם",
  101. }, {
  102. e: ISO8859_9,
  103. encoded: "\xdeayet",
  104. utf8: "Şayet",
  105. }, {
  106. e: ISO8859_10,
  107. encoded: "H\xea\xbfo",
  108. utf8: "Hęŋo",
  109. }, {
  110. e: ISO8859_13,
  111. encoded: "H\xe6l\xf9o",
  112. utf8: "Hęlło",
  113. }, {
  114. e: ISO8859_14,
  115. encoded: "He\xfe\xd0o",
  116. utf8: "HeŷŴo",
  117. }, {
  118. e: ISO8859_15,
  119. encoded: "H\xa4ll\xd8",
  120. utf8: "H€llØ",
  121. }, {
  122. e: ISO8859_16,
  123. encoded: "H\xe6ll\xbd",
  124. utf8: "Hællœ",
  125. }, {
  126. e: KOI8R,
  127. encoded: "He\x93\xad\x9c",
  128. utf8: "He⌠╜°",
  129. }, {
  130. e: KOI8U,
  131. encoded: "He\x93\xad\x9c",
  132. utf8: "He⌠ґ°",
  133. }, {
  134. e: Macintosh,
  135. encoded: "He\xdf\xd7",
  136. utf8: "Hefl◊",
  137. }, {
  138. e: MacintoshCyrillic,
  139. encoded: "He\xbe\x94",
  140. utf8: "HeЊФ",
  141. }, {
  142. e: Windows874,
  143. encoded: "He\xb7\xf0",
  144. utf8: "Heท๐",
  145. }, {
  146. e: Windows1250,
  147. encoded: "He\xe5\xe5o",
  148. utf8: "Heĺĺo",
  149. }, {
  150. e: Windows1251,
  151. encoded: "H\xball\xfe",
  152. utf8: "Hєllю",
  153. }, {
  154. e: Windows1252,
  155. encoded: "H\xe9ll\xf4 \xa5\xbA\xae\xa3\xd0",
  156. utf8: "Héllô ¥º®£Ð",
  157. }, {
  158. e: Windows1253,
  159. encoded: "H\xe5ll\xd6",
  160. utf8: "HεllΦ",
  161. }, {
  162. e: Windows1254,
  163. encoded: "\xd0ello",
  164. utf8: "Ğello",
  165. }, {
  166. e: Windows1255,
  167. encoded: "He\xd4o",
  168. utf8: "Heװo",
  169. }, {
  170. e: Windows1256,
  171. encoded: "H\xdbllo",
  172. utf8: "Hغllo",
  173. }, {
  174. e: Windows1257,
  175. encoded: "He\xeflo",
  176. utf8: "Heļlo",
  177. }, {
  178. e: Windows1258,
  179. encoded: "Hell\xf5",
  180. utf8: "Hellơ",
  181. }, {
  182. e: XUserDefined,
  183. encoded: "\x00\x40\x7f\x80\xab\xff",
  184. utf8: "\u0000\u0040\u007f\uf780\uf7ab\uf7ff",
  185. }}
  186. for _, tc := range testCases {
  187. enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, "", "")
  188. }
  189. }
  190. var windows1255TestCases = []struct {
  191. b byte
  192. ok bool
  193. r rune
  194. }{
  195. {'\x00', true, '\u0000'},
  196. {'\x1a', true, '\u001a'},
  197. {'\x61', true, '\u0061'},
  198. {'\x7f', true, '\u007f'},
  199. {'\x80', true, '\u20ac'},
  200. {'\x95', true, '\u2022'},
  201. {'\xa0', true, '\u00a0'},
  202. {'\xc0', true, '\u05b0'},
  203. {'\xfc', true, '\ufffd'},
  204. {'\xfd', true, '\u200e'},
  205. {'\xfe', true, '\u200f'},
  206. {'\xff', true, '\ufffd'},
  207. {encoding.ASCIISub, false, '\u0400'},
  208. {encoding.ASCIISub, false, '\u2603'},
  209. {encoding.ASCIISub, false, '\U0001f4a9'},
  210. }
  211. func TestDecodeByte(t *testing.T) {
  212. for _, tc := range windows1255TestCases {
  213. if !tc.ok {
  214. continue
  215. }
  216. got := Windows1255.DecodeByte(tc.b)
  217. want := tc.r
  218. if got != want {
  219. t.Errorf("DecodeByte(%#02x): got %#08x, want %#08x", tc.b, got, want)
  220. }
  221. }
  222. }
  223. func TestEncodeRune(t *testing.T) {
  224. for _, tc := range windows1255TestCases {
  225. // There can be multiple tc.b values that map to tc.r = '\ufffd'.
  226. if tc.r == '\ufffd' {
  227. continue
  228. }
  229. gotB, gotOK := Windows1255.EncodeRune(tc.r)
  230. wantB, wantOK := tc.b, tc.ok
  231. if gotB != wantB || gotOK != wantOK {
  232. t.Errorf("EncodeRune(%#08x): got (%#02x, %t), want (%#02x, %t)", tc.r, gotB, gotOK, wantB, wantOK)
  233. }
  234. }
  235. }
  236. func TestFiles(t *testing.T) { enctest.TestFile(t, Windows1252) }
  237. func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, Windows1252) }