Browse Source

html/charset: verify correct UTF-8 behavior

Change-Id: I4083c38468981128c3d74310cd02335c35eafa5d
Reviewed-on: https://go-review.googlesource.com/17966
Reviewed-by: Andy Balholm <andy@balholm.com>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
Marcel van Lohuizen 10 years ago
parent
commit
68a055e15f
1 changed files with 8 additions and 0 deletions
  1. 8 0
      html/charset/charset_test.go

+ 8 - 0
html/charset/charset_test.go

@@ -71,6 +71,11 @@ var testCases = []testCase{
 }
 
 func TestDecode(t *testing.T) {
+	testCases := append(testCases, []testCase{
+		// Replace multi-byte maximum subpart of ill-formed subsequence with
+		// single replacement character (WhatWG requirement).
+		{"Rés\ufffdumé", "Rés\xe1\x80umé", "utf8"},
+	}...)
 	for _, tc := range testCases {
 		e, _ := Lookup(tc.otherEncoding)
 		if e == nil {
@@ -90,9 +95,12 @@ func TestDecode(t *testing.T) {
 
 func TestEncode(t *testing.T) {
 	testCases := append(testCases, []testCase{
+		// Use Go-style replacement.
+		{"Rés\xe1\x80umé", "Rés\ufffd\ufffdumé", "utf8"},
 		// U+0144 LATIN SMALL LETTER N WITH ACUTE not supported by encoding.
 		{"Gdańsk", "Gda&#324;sk", "ISO-8859-11"},
 		{"\ufffd", "&#65533;", "ISO-8859-11"},
+		{"a\xe1\x80b", "a&#65533;&#65533;b", "ISO-8859-11"},
 	}...)
 	for _, tc := range testCases {
 		e, _ := Lookup(tc.otherEncoding)