Просмотр исходного кода

Fix UTF-16 LE and BE handling.

Thanks to John Lenton for the test cases.
Gustavo Niemeyer 10 лет назад
Родитель
Сommit
bd61a856f8
2 измененных файлов с 17 добавлено и 2 удалено
  1. 12 0
      decode_test.go
  2. 5 2
      readerc.go

+ 12 - 0
decode_test.go

@@ -559,6 +559,18 @@ var unmarshalTests = []struct {
 		"a: []",
 		&struct{ A []int }{[]int{}},
 	},
+
+	// UTF-16-LE
+	{
+		"\xff\xfe\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00\n\x00",
+		M{"ñoño":"very yes"},
+	},
+
+	// UTF-16-BE
+	{
+		"\xfe\xff\x00\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00\n",
+		M{"ñoño":"very yes"},
+	},
 }
 
 type M map[interface{}]interface{}

+ 5 - 2
readerc.go

@@ -247,7 +247,7 @@ func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool {
 				if parser.encoding == yaml_UTF16LE_ENCODING {
 					low, high = 0, 1
 				} else {
-					high, low = 1, 0
+					low, high = 1, 0
 				}
 
 				// The UTF-16 encoding is not as simple as one might
@@ -357,23 +357,26 @@ func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool {
 			if value <= 0x7F {
 				// 0000 0000-0000 007F . 0xxxxxxx
 				parser.buffer[buffer_len+0] = byte(value)
+				buffer_len += 1
 			} else if value <= 0x7FF {
 				// 0000 0080-0000 07FF . 110xxxxx 10xxxxxx
 				parser.buffer[buffer_len+0] = byte(0xC0 + (value >> 6))
 				parser.buffer[buffer_len+1] = byte(0x80 + (value & 0x3F))
+				buffer_len += 2
 			} else if value <= 0xFFFF {
 				// 0000 0800-0000 FFFF . 1110xxxx 10xxxxxx 10xxxxxx
 				parser.buffer[buffer_len+0] = byte(0xE0 + (value >> 12))
 				parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 6) & 0x3F))
 				parser.buffer[buffer_len+2] = byte(0x80 + (value & 0x3F))
+				buffer_len += 3
 			} else {
 				// 0001 0000-0010 FFFF . 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 				parser.buffer[buffer_len+0] = byte(0xF0 + (value >> 18))
 				parser.buffer[buffer_len+1] = byte(0x80 + ((value >> 12) & 0x3F))
 				parser.buffer[buffer_len+2] = byte(0x80 + ((value >> 6) & 0x3F))
 				parser.buffer[buffer_len+3] = byte(0x80 + (value & 0x3F))
+				buffer_len += 4
 			}
-			buffer_len += width
 
 			parser.unread++
 		}