Przeglądaj źródła

x/net/html/charset: add NewReaderByName

This provides a CharsetReader function for xml.Decoder.

Change-Id: Id00787bbdee90d267d38c84c98a06f9e10d93336
Reviewed-on: https://go-review.googlesource.com/4420
Reviewed-by: Nigel Tao <nigeltao@golang.org>
Andy Balholm 11 lat temu
rodzic
commit
ec18079348
2 zmienionych plików z 34 dodań i 0 usunięć
  1. 13 0
      html/charset/charset.go
  2. 21 0
      html/charset/charset_test.go

+ 13 - 0
html/charset/charset.go

@@ -10,6 +10,7 @@ package charset // import "golang.org/x/net/html/charset"
 
 import (
 	"bytes"
+	"fmt"
 	"io"
 	"mime"
 	"strings"
@@ -110,6 +111,18 @@ func NewReader(r io.Reader, contentType string) (io.Reader, error) {
 	return r, nil
 }
 
+// NewReaderByName returns a reader that converts from the specified charset to
+// UTF-8. It returns an error if the charset is not one of the standard
+// encodings for HTML. It is suitable for use as encoding/xml.Decoder's
+// CharsetReader function.
+func NewReaderByName(charset string, input io.Reader) (io.Reader, error) {
+	e, _ := Lookup(charset)
+	if e == nil {
+		return nil, fmt.Errorf("unsupported charset: %q", charset)
+	}
+	return transform.NewReader(input, e.NewDecoder()), nil
+}
+
 func prescan(content []byte) (e encoding.Encoding, name string) {
 	z := html.NewTokenizer(bytes.NewReader(content))
 	for {

+ 21 - 0
html/charset/charset_test.go

@@ -6,6 +6,7 @@ package charset
 
 import (
 	"bytes"
+	"encoding/xml"
 	"io/ioutil"
 	"runtime"
 	"strings"
@@ -213,3 +214,23 @@ func TestFromMeta(t *testing.T) {
 		}
 	}
 }
+
+func TestXML(t *testing.T) {
+	const s = "<?xml version=\"1.0\" encoding=\"windows-1252\"?><a><Word>r\xe9sum\xe9</Word></a>"
+
+	d := xml.NewDecoder(strings.NewReader(s))
+	d.CharsetReader = NewReaderByName
+
+	var a struct {
+		Word string
+	}
+	err := d.Decode(&a)
+	if err != nil {
+		t.Fatalf("Decode: %v", err)
+	}
+
+	want := "résumé"
+	if a.Word != want {
+		t.Errorf("got %q, want %q", a.Word, want)
+	}
+}