Преглед изворни кода

goprotobuf: Fix text format string encoding.

R=r
CC=golang-dev
http://codereview.appspot.com/4983061
David Symonds пре 14 година
родитељ
комит
4c95bfeb7b
3 измењених фајлова са 77 додато и 7 уклоњено
  1. 5 0
      proto/testdata/test.proto
  2. 43 5
      proto/text.go
  3. 29 2
      proto/text_test.go

+ 5 - 0
proto/testdata/test.proto

@@ -248,6 +248,11 @@ message MessageList {
   }
   }
 }
 }
 
 
+message Strings {
+  optional string string_field = 1;
+  optional bytes bytes_field = 2;
+}
+
 message Defaults {
 message Defaults {
   enum Color {
   enum Color {
     RED = 0;
     RED = 0;

+ 43 - 5
proto/text.go

@@ -41,7 +41,6 @@ import (
 	"os"
 	"os"
 	"reflect"
 	"reflect"
 	"sort"
 	"sort"
-	"strconv"
 	"strings"
 	"strings"
 )
 )
 
 
@@ -203,11 +202,9 @@ func writeAny(w *textWriter, v reflect.Value, props *Properties) {
 	switch v.Kind() {
 	switch v.Kind() {
 	case reflect.Slice:
 	case reflect.Slice:
 		// Should only be a []byte; repeated fields are handled in writeStruct.
 		// Should only be a []byte; repeated fields are handled in writeStruct.
-		// TODO: Should be strconv.QuoteToASCII, which should be released after 2011-06-20.
-		fmt.Fprint(w, strconv.Quote(string(v.Interface().([]byte))))
+		writeString(w, string(v.Interface().([]byte)))
 	case reflect.String:
 	case reflect.String:
-		// TODO: Should be strconv.QuoteToASCII, which should be released after 2011-06-20.
-		fmt.Fprint(w, strconv.Quote(v.String()))
+		writeString(w, v.String())
 	case reflect.Struct:
 	case reflect.Struct:
 		// Required/optional group/message.
 		// Required/optional group/message.
 		var bra, ket byte = '<', '>'
 		var bra, ket byte = '<', '>'
@@ -227,6 +224,47 @@ func writeAny(w *textWriter, v reflect.Value, props *Properties) {
 	}
 	}
 }
 }
 
 
+// equivalent to C's isprint.
+func isprint(c byte) bool {
+	return c >= 0x20 && c < 0x7f
+}
+
+// writeString writes a string in the protocol buffer text format.
+// It is similar to strconv.Quote except we don't use Go escape sequences,
+// we treat the string as a byte sequence, and we use octal escapes.
+// These differences are to maintain interoperability with the other
+// languages' implementations of the text format.
+func writeString(w *textWriter, s string) {
+	w.WriteByte('"')
+
+	// Loop over the bytes, not the runes.
+	for i := 0; i < len(s); i++ {
+		// Divergence from C++: we don't escape apostrophes.
+		// There's no need to escape them, and the C++ parser
+		// copes with a naked apostrophe.
+		switch c := s[i]; c {
+		case '\n':
+			w.Write([]byte{'\\', 'n'})
+		case '\r':
+			w.Write([]byte{'\\', 'r'})
+		case '\t':
+			w.Write([]byte{'\\', 't'})
+		case '"':
+			w.Write([]byte{'\\', '"'})
+		case '\\':
+			w.Write([]byte{'\\', '\\'})
+		default:
+			if isprint(c) {
+				w.WriteByte(c)
+			} else {
+				fmt.Fprintf(w, "\\%03o", c)
+			}
+		}
+	}
+
+	w.WriteByte('"')
+}
+
 func writeMessageSet(w *textWriter, ms *MessageSet) {
 func writeMessageSet(w *textWriter, ms *MessageSet) {
 	for _, item := range ms.Item {
 	for _, item := range ms.Item {
 		id := *item.TypeId
 		id := *item.TypeId

+ 29 - 2
proto/text_test.go

@@ -108,7 +108,7 @@ inner: <
 >
 >
 others: <
 others: <
   key: 3735928559
   key: 3735928559
-  value: "\x01A\a\f"
+  value: "\001A\007\014"
 >
 >
 others: <
 others: <
   weight: 6.022
   weight: 6.022
@@ -191,7 +191,34 @@ var compactText = compact(text)
 func TestCompactText(t *testing.T) {
 func TestCompactText(t *testing.T) {
 	s := proto.CompactTextString(newTestMessage())
 	s := proto.CompactTextString(newTestMessage())
 	if s != compactText {
 	if s != compactText {
-		t.Errorf("Got:\n===\n%v===\nExpected:\n===\n%v===\n", s, compactText)
+		t.Errorf("Got:\n===\n%v===\nExpected:\n===\n%v\n===\n", s, compactText)
+	}
+}
+
+func TestStringEscaping(t *testing.T) {
+	testCases := []struct {
+		in  *pb.Strings
+		out string
+	}{
+		{
+			// Test data from C++ test (TextFormatTest.StringEscape).
+			// Single divergence: we don't escape apostrophes.
+			&pb.Strings{StringField: proto.String("\"A string with ' characters \n and \r newlines and \t tabs and \001 slashes \\ and  multiple   spaces")},
+			"string_field: \"\\\"A string with ' characters \\n and \\r newlines and \\t tabs and \\001 slashes \\\\ and  multiple   spaces\"\n",
+		},
+		{
+			// Test data from the same C++ test.
+			&pb.Strings{StringField: proto.String("\350\260\267\346\255\214")},
+			"string_field: \"\\350\\260\\267\\346\\255\\214\"\n",
+		},
+	}
+
+	for i, tc := range testCases {
+		var buf bytes.Buffer
+		proto.MarshalText(&buf, tc.in)
+		if s := buf.String(); s != tc.out {
+			t.Errorf("#%d: Got:\n%s\nExpected:\n%s\n", i, s, tc.out)
+		}
 	}
 	}
 }
 }