12 years ago · 480e7b06ec
--- a/html/token.go
+++ b/html/token.go
@@ -734,7 +734,6 @@ func (z *Tokenizer) readCDATA() bool {
 
				 			brackets = 0
			
 
				 		}
			
 
				 	}
			
 
				-	panic("unreachable")
			
 
				 }
			
 
				 
			
 
				 // startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end]
			
@@ -934,13 +933,13 @@ func (z *Tokenizer) readTagAttrVal() {
 
				 
			
 
				 // Next scans the next token and returns its type.
			
 
				 func (z *Tokenizer) Next() TokenType {
			
 
				+	z.raw.start = z.raw.end
			
 
				+	z.data.start = z.raw.end
			
 
				+	z.data.end = z.raw.end
			
 
				 	if z.err != nil {
			
 
				 		z.tt = ErrorToken
			
 
				 		return z.tt
			
 
				 	}
			
 
				-	z.raw.start = z.raw.end
			
 
				-	z.data.start = z.raw.end
			
 
				-	z.data.end = z.raw.end
			
 
				 	if z.rawTag != "" {
			
 
				 		if z.rawTag == "plaintext" {
			
 
				 			// Read everything up to EOF.
			
@@ -1010,12 +1009,11 @@ loop:
 
				 				break loop
			
 
				 			}
			
 
				 			if c == '>' {
			
 
				-				// "</>" does not generate a token at all.
			
 
				+				// "</>" does not generate a token at all. Generate an empty comment
			
 
				+				// to allow passthrough clients to pick up the data using Raw.
			
 
				 				// Reset the tokenizer state and start again.
			
 
				-				z.raw.start = z.raw.end
			
 
				-				z.data.start = z.raw.end
			
 
				-				z.data.end = z.raw.end
			
 
				-				continue loop
			
 
				+				z.tt = CommentToken
			
 
				+				return z.tt
			
 
				 			}
			
 
				 			if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
			
 
				 				z.readTag(false)
			
--- a/html/token_test.go
+++ b/html/token_test.go
@@ -63,12 +63,12 @@ var tokenTests = []tokenTest{
 
				 	{
			
 
				 		"not a tag #2",
			
 
				 		"</>",
			
 
				-		"",
			
 
				+		"<!---->",
			
 
				 	},
			
 
				 	{
			
 
				 		"not a tag #3",
			
 
				 		"a</>b",
			
 
				-		"a$b",
			
 
				+		"a$<!---->$b",
			
 
				 	},
			
 
				 	{
			
 
				 		"not a tag #4",
			
@@ -469,6 +469,25 @@ loop:
 
				 	}
			
 
				 }
			
 
				 
			
 
				+func TestPassthrough(t *testing.T) {
			
 
				+	// Accumulating the raw output for each parse event should reconstruct the
			
 
				+	// original input.
			
 
				+	for _, test := range tokenTests {
			
 
				+		z := NewTokenizer(strings.NewReader(test.html))
			
 
				+		var parsed bytes.Buffer
			
 
				+		for {
			
 
				+			tt := z.Next()
			
 
				+			parsed.Write(z.Raw())
			
 
				+			if tt == ErrorToken {
			
 
				+				break
			
 
				+			}
			
 
				+		}
			
 
				+		if got, want := parsed.String(), test.html; got != want {
			
 
				+			t.Errorf("%s: parsed output:\n got: %q\nwant: %q", test.desc, got, want)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 func TestBufAPI(t *testing.T) {
			
 
				 	s := "0<a>1</a>2<b>3<a>4<a>5</a>6</b>7</a>8<a/>9"
			
 
				 	z := NewTokenizer(bytes.NewBufferString(s))