Просмотр исходного кода

Fix scanning of long comment lines.

Reported as #469.
Gustavo Niemeyer 6 лет назад
Родитель
Сommit
0932294a34
3 измененных файлов с 47 добавлено и 31 удалено
  1. 10 0
      decode_test.go
  2. 28 25
      scannerc.go
  3. 9 6
      yamlprivateh.go

+ 10 - 0
decode_test.go

@@ -768,6 +768,16 @@ var unmarshalTests = []struct {
 		"---\nhello\n...\n}not yaml",
 		"hello",
 	},
+
+	// Comment scan exhausting the input buffer (issue #469).
+	{
+		"true\n#" + strings.Repeat(" ", 512*3),
+		"true",
+	},
+	{
+		"true #" + strings.Repeat(" ", 512*3),
+		"true",
+	},
 }
 
 type M map[string]interface{}

+ 28 - 25
scannerc.go

@@ -2800,23 +2800,25 @@ func yaml_parser_scan_line_comment(parser *yaml_parser_t, token_mark yaml_mark_t
 			if len(*comment) > 0 {
 				*comment = append(*comment, '\n')
 			}
-			for !is_breakz(parser.buffer, parser.buffer_pos+peek) {
-				*comment = append(*comment, parser.buffer[parser.buffer_pos+peek])
-				peek++
-				if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
+
+			// Consume until after the consumed comment line.
+			seen := parser.mark.index+peek
+			for {
+				if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
 					return false
 				}
-			}
-
-			// Skip until after the consumed comment line.
-			until := parser.buffer_pos + peek
-			for parser.buffer_pos < until {
-				if is_break(parser.buffer, parser.buffer_pos) {
-					// The break should stay in the buffer so calling this function twice or just
-					// before parsing foot comments works correctly. But this should never happen
-					// anyway given the logic above that stops at the break.
-					panic("internal error: the impossible has just happened!")
+				if is_breakz(parser.buffer, parser.buffer_pos) {
+					if parser.mark.index >= seen {
+						break
+					}
+					if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
+						return false
+					}
+					skip_line(parser)
 				} else {
+					if parser.mark.index >= seen {
+						*comment = append(*comment, parser.buffer[parser.buffer_pos])
+					}
 					skip(parser)
 				}
 			}
@@ -2923,25 +2925,26 @@ func yaml_parser_scan_comments(parser *yaml_parser_t, scan_mark yaml_mark_t) boo
 			text = append(text, '\n')
 		}
 
-		// Find the end of the comment line.
 		recent_empty = false
-		for !is_breakz(parser.buffer, parser.buffer_pos+peek) {
-			text = append(text, parser.buffer[parser.buffer_pos+peek])
-			peek++
-			if parser.unread < peek+1 && !yaml_parser_update_buffer(parser, peek+1) {
+
+		// Consume until after the consumed comment line.
+		seen := parser.mark.index+peek
+		for {
+			if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) {
 				return false
 			}
-		}
-
-		// Skip until after the consumed comment line.
-		until := parser.buffer_pos + peek
-		for parser.buffer_pos < until {
-			if is_break(parser.buffer, parser.buffer_pos) {
+			if is_breakz(parser.buffer, parser.buffer_pos) {
+				if parser.mark.index >= seen {
+					break
+				}
 				if parser.unread < 2 && !yaml_parser_update_buffer(parser, 2) {
 					return false
 				}
 				skip_line(parser)
 			} else {
+				if parser.mark.index >= seen {
+					text = append(text, parser.buffer[parser.buffer_pos])
+				}
 				skip(parser)
 			}
 		}

+ 9 - 6
yamlprivateh.go

@@ -136,8 +136,9 @@ func is_crlf(b []byte, i int) bool {
 // Check if the character is a line break or NUL.
 func is_breakz(b []byte, i int) bool {
 	//return is_break(b, i) || is_z(b, i)
-	return (        // is_break:
-	b[i] == '\r' || // CR (#xD)
+	return (
+		// is_break:
+		b[i] == '\r' || // CR (#xD)
 		b[i] == '\n' || // LF (#xA)
 		b[i] == 0xC2 && b[i+1] == 0x85 || // NEL (#x85)
 		b[i] == 0xE2 && b[i+1] == 0x80 && b[i+2] == 0xA8 || // LS (#x2028)
@@ -149,8 +150,9 @@ func is_breakz(b []byte, i int) bool {
 // Check if the character is a line break, space, or NUL.
 func is_spacez(b []byte, i int) bool {
 	//return is_space(b, i) || is_breakz(b, i)
-	return ( // is_space:
-	b[i] == ' ' ||
+	return (
+		// is_space:
+		b[i] == ' ' ||
 		// is_breakz:
 		b[i] == '\r' || // CR (#xD)
 		b[i] == '\n' || // LF (#xA)
@@ -163,8 +165,9 @@ func is_spacez(b []byte, i int) bool {
 // Check if the character is a line break, space, tab, or NUL.
 func is_blankz(b []byte, i int) bool {
 	//return is_blank(b, i) || is_breakz(b, i)
-	return ( // is_blank:
-	b[i] == ' ' || b[i] == '\t' ||
+	return (
+		// is_blank:
+		b[i] == ' ' || b[i] == '\t' ||
 		// is_breakz:
 		b[i] == '\r' || // CR (#xD)
 		b[i] == '\n' || // LF (#xA)