소스 검색

go.net/html: Expose data read from the input reader but not yet tokenized in Tokenizer.

This allows clients to efficiently reconstruct the original input in the case of ErrBufferExceeded. TestMaxBufferReconstruction now properly verifies this.

R=bradfitz
CC=golang-codereviews
https://golang.org/cl/47770043
Michael Piatek 12 년 전
부모
커밋
4698117464
2개의 변경된 파일13개의 추가작업 그리고 10개의 파일을 삭제
  1. 5 0
      html/token.go
  2. 8 10
      html/token_test.go

+ 5 - 0
html/token.go

@@ -286,6 +286,11 @@ func (z *Tokenizer) readByte() byte {
 	return x
 	return x
 }
 }
 
 
+// Buffered returns a slice containing data buffered but not yet tokenized.
+func (z *Tokenizer) Buffered() []byte {
+	return z.buf[z.raw.end:]
+}
+
 // readAtLeastOneByte wraps an io.Reader so that reading cannot return (0, nil).
 // readAtLeastOneByte wraps an io.Reader so that reading cannot return (0, nil).
 // It returns io.ErrNoProgress if the underlying r.Read method returns (0, nil)
 // It returns io.ErrNoProgress if the underlying r.Read method returns (0, nil)
 // too many times in succession.
 // too many times in succession.

+ 8 - 10
html/token_test.go

@@ -490,7 +490,6 @@ func TestMaxBufferReconstruction(t *testing.T) {
 	// reconstructing the original input.
 	// reconstructing the original input.
 tests:
 tests:
 	for _, test := range tokenTests {
 	for _, test := range tokenTests {
-	buffer:
 		for maxBuf := 1; ; maxBuf++ {
 		for maxBuf := 1; ; maxBuf++ {
 			r := strings.NewReader(test.html)
 			r := strings.NewReader(test.html)
 			z := NewTokenizer(r)
 			z := NewTokenizer(r)
@@ -500,19 +499,14 @@ tests:
 				tt := z.Next()
 				tt := z.Next()
 				tokenized.Write(z.Raw())
 				tokenized.Write(z.Raw())
 				if tt == ErrorToken {
 				if tt == ErrorToken {
-					if z.Err() == ErrBufferExceeded {
-						continue buffer
-					}
-					// EOF is expected, and indicates that we found the max maxBuf that
-					// generates ErrBufferExceeded, so continue to the next test.
-					if err := z.Err(); err != io.EOF {
+					if err := z.Err(); err != io.EOF && err != ErrBufferExceeded {
 						t.Errorf("%s: unexpected error: %v", test.desc, err)
 						t.Errorf("%s: unexpected error: %v", test.desc, err)
 					}
 					}
 					break
 					break
 				}
 				}
 			}
 			}
-			// Anything tokenizing along with input left in the reader.
-			assembled, err := ioutil.ReadAll(io.MultiReader(&tokenized, r))
+			// Anything tokenized along with untokenized input or data left in the reader.
+			assembled, err := ioutil.ReadAll(io.MultiReader(&tokenized, bytes.NewReader(z.Buffered()), r))
 			if err != nil {
 			if err != nil {
 				t.Errorf("%s: ReadAll: %v", test.desc, err)
 				t.Errorf("%s: ReadAll: %v", test.desc, err)
 				continue tests
 				continue tests
@@ -521,7 +515,11 @@ tests:
 				t.Errorf("%s: reassembled html:\n got: %q\nwant: %q", test.desc, got, want)
 				t.Errorf("%s: reassembled html:\n got: %q\nwant: %q", test.desc, got, want)
 				continue tests
 				continue tests
 			}
 			}
-			break
+			// EOF indicates that we completed tokenization and hence found the max
+			// maxBuf that generates ErrBufferExceeded, so continue to the next test.
+			if z.Err() == io.EOF {
+				break
+			}
 		} // buffer sizes
 		} // buffer sizes
 	} // tests
 	} // tests
 }
 }