Procházet zdrojové kódy

Fix interpretation of default bytes literal values (#427)

The protoc compiler passes input to protoc-gen-go as C-escaped strings.
Before formatting that string to a quoted Go-string literal, we have to unescape the C-string.
The unescape function operates on a best-effort basis.
Joshua Humphries před 8 roky
rodič
revize
11b8df1609

+ 62 - 1
protoc-gen-go/generator/generator.go

@@ -1984,7 +1984,7 @@ func (g *Generator) generateMessage(message *Descriptor) {
 		case typename == "string":
 			def = strconv.Quote(def)
 		case typename == "[]byte":
-			def = "[]byte(" + strconv.Quote(def) + ")"
+			def = "[]byte(" + strconv.Quote(unescape(def)) + ")"
 			kind = "var "
 		case def == "inf", def == "-inf", def == "nan":
 			// These names are known to, and defined by, the protocol language.
@@ -2508,6 +2508,67 @@ func (g *Generator) generateMessage(message *Descriptor) {
 	g.addInitf("%s.RegisterType((*%s)(nil), %q)", g.Pkg["proto"], ccTypeName, fullName)
 }
 
+var escapeChars = [256]byte{
+	'a': '\a', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', 'v': '\v', '\\': '\\', '"': '"', '\'': '\'', '?': '?',
+}
+
+// unescape reverses the "C" escaping that protoc does for default values of bytes fields.
+// It is best effort in that it effectively ignores malformed input. Seemingly invalid escape
+// sequences are conveyed, unmodified, into the decoded result.
+func unescape(s string) string {
+	// NB: Sadly, we can't use strconv.Unquote because protoc will escape both
+	// single and double quotes, but strconv.Unquote only allows one or the
+	// other (based on actual surrounding quotes of its input argument).
+
+	var out []byte
+	for len(s) > 0 {
+		// regular character, or too short to be valid escape
+		if s[0] != '\\' || len(s) < 2 {
+			out = append(out, s[0])
+			s = s[1:]
+		} else if c := escapeChars[s[1]]; c != 0 {
+			// escape sequence
+			out = append(out, c)
+			s = s[2:]
+		} else if s[1] == 'x' || s[1] == 'X' {
+			// hex escape, e.g. "\x80
+			if len(s) < 4 {
+				// too short to be valid
+				out = append(out, s[:2]...)
+				s = s[2:]
+				continue
+			}
+			v, err := strconv.ParseUint(s[2:4], 16, 8)
+			if err != nil {
+				out = append(out, s[:4]...)
+			} else {
+				out = append(out, byte(v))
+			}
+			s = s[4:]
+		} else if '0' <= s[1] && s[1] <= '7' {
+			// octal escape, can vary from 1 to 3 octal digits; e.g., "\0" "\40" or "\164"
+			// so consume up to 2 more bytes or up to end-of-string
+			n := len(s[1:]) - len(strings.TrimLeft(s[1:], "01234567"))
+			if n > 3 {
+				n = 3
+			}
+			v, err := strconv.ParseUint(s[1:1+n], 8, 8)
+			if err != nil {
+				out = append(out, s[:1+n]...)
+			} else {
+				out = append(out, byte(v))
+			}
+			s = s[1+n:]
+		} else {
+			// bad escape, just propagate the slash as-is
+			out = append(out, s[0])
+			s = s[1:]
+		}
+	}
+
+	return string(out)
+}
+
 func (g *Generator) generateExtension(ext *ExtensionDescriptor) {
 	ccTypeName := ext.DescName()
 

+ 29 - 0
protoc-gen-go/generator/name_test.go

@@ -83,3 +83,32 @@ func TestGoPackageOption(t *testing.T) {
 		}
 	}
 }
+
+func TestUnescape(t *testing.T) {
+	tests := []struct {
+		in   string
+		out  string
+	}{
+		// successful cases, including all kinds of escapes
+		{"", ""},
+		{"foo bar baz frob nitz", "foo bar baz frob nitz"},
+		{`\000\001\002\003\004\005\006\007`, string([]byte{0, 1, 2, 3, 4, 5, 6, 7})},
+		{`\a\b\f\n\r\t\v\\\?\'\"`, string([]byte{'\a', '\b', '\f', '\n', '\r', '\t', '\v', '\\', '?', '\'', '"'})},
+		{`\x10\x20\x30\x40\x50\x60\x70\x80`, string([]byte{16, 32, 48, 64, 80, 96, 112, 128})},
+		// variable length octal escapes
+		{`\0\018\222\377\3\04\005\6\07`, string([]byte{0, 1, '8', 0222, 255, 3, 4, 5, 6, 7})},
+		// malformed escape sequences left as is
+		{"foo \\g bar", "foo \\g bar"},
+		{"foo \\xg0 bar", "foo \\xg0 bar"},
+		{"\\", "\\"},
+		{"\\x", "\\x"},
+		{"\\xf", "\\xf"},
+		{"\\777", "\\777"}, // overflows byte
+	}
+	for _, tc := range tests {
+		s := unescape(tc.in)
+		if s != tc.out {
+			t.Errorf("doUnescape(%q) = %q; should have been %q", tc.in, s, tc.out)
+		}
+	}
+}