13 سال پیش · fa94a1e2bc
--- a/proto/text_parser.go
+++ b/proto/text_parser.go
@@ -35,10 +35,12 @@ package proto
 
				 // TODO: message sets.
			
 
				 
			
 
				 import (
			
 
				+	"errors"
			
 
				 	"fmt"
			
 
				 	"reflect"
			
 
				 	"strconv"
			
 
				 	"strings"
			
 
				+	"unicode/utf8"
			
 
				 )
			
 
				 
			
 
				 type ParseError struct {
			
@@ -169,7 +171,7 @@ func (p *textParser) advance() {
 
				 			p.errorf("unmatched quote")
			
 
				 			return
			
 
				 		}
			
 
				-		unq, err := unquoteC(p.s[0 : i+1])
			
 
				+		unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
			
 
				 		if err != nil {
			
 
				 			p.errorf("invalid quoted string %v", p.s[0:i+1])
			
 
				 			return
			
@@ -190,35 +192,131 @@ func (p *textParser) advance() {
 
				 	p.offset += len(p.cur.value)
			
 
				 }
			
 
				 
			
 
				-// quoteSwap returns a single quote for a double quote, and vice versa.
			
 
				-// It is intended to be used with strings.Map.
			
 
				-func quoteSwap(r rune) rune {
			
 
				-	switch r {
			
 
				-	case '\'':
			
 
				-		return '"'
			
 
				-	case '"':
			
 
				-		return '\''
			
 
				+var (
			
 
				+	errBadUTF8 = errors.New("bad UTF-8")
			
 
				+	errBadHex  = errors.New("bad hexadecimal")
			
 
				+)
			
 
				+
			
 
				+func unquoteC(s string, quote rune) (string, error) {
			
 
				+	// This is based on C++'s tokenizer.cc.
			
 
				+	// Despite its name, this is *not* parsing C syntax.
			
 
				+	// For instance, "\0" is an invalid quoted string.
			
 
				+
			
 
				+	// Avoid allocation in trivial cases.
			
 
				+	simple := true
			
 
				+	for _, r := range s {
			
 
				+		if r == '\\' || r == quote {
			
 
				+			simple = false
			
 
				+			break
			
 
				+		}
			
 
				 	}
			
 
				-	return r
			
 
				+	if simple {
			
 
				+		return s, nil
			
 
				+	}
			
 
				+
			
 
				+	buf := make([]byte, 0, 3*len(s)/2)
			
 
				+	for len(s) > 0 {
			
 
				+		r, n := utf8.DecodeRuneInString(s)
			
 
				+		if r == utf8.RuneError && n == 1 {
			
 
				+			return "", errBadUTF8
			
 
				+		}
			
 
				+		s = s[n:]
			
 
				+		if r != '\\' {
			
 
				+			if r < utf8.RuneSelf {
			
 
				+				buf = append(buf, byte(r))
			
 
				+			} else {
			
 
				+				buf = append(buf, string(r)...)
			
 
				+			}
			
 
				+			continue
			
 
				+		}
			
 
				+
			
 
				+		ch, tail, err := unescape(s)
			
 
				+		if err != nil {
			
 
				+			return "", err
			
 
				+		}
			
 
				+		buf = append(buf, ch...)
			
 
				+		s = tail
			
 
				+	}
			
 
				+	return string(buf), nil
			
 
				 }
			
 
				 
			
 
				-func unquoteC(s string) (string, error) {
			
 
				-	// TODO: This is getting hacky. We should replace it work a self-contained parser.
			
 
				+func unescape(s string) (ch string, tail string, err error) {
			
 
				+	r, n := utf8.DecodeRuneInString(s)
			
 
				+	if r == utf8.RuneError && n == 1 {
			
 
				+		return "", "", errBadUTF8
			
 
				+	}
			
 
				+	s = s[n:]
			
 
				+	switch r {
			
 
				+	case 'a':
			
 
				+		return "\a", s, nil
			
 
				+	case 'b':
			
 
				+		return "\b", s, nil
			
 
				+	case 'f':
			
 
				+		return "\f", s, nil
			
 
				+	case 'n':
			
 
				+		return "\n", s, nil
			
 
				+	case 'r':
			
 
				+		return "\r", s, nil
			
 
				+	case 't':
			
 
				+		return "\t", s, nil
			
 
				+	case 'v':
			
 
				+		return "\v", s, nil
			
 
				+	case '?':
			
 
				+		return "?", s, nil // trigraph workaround
			
 
				+	case '\'', '"', '\\':
			
 
				+		return string(r), s, nil
			
 
				+	case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X':
			
 
				+		if len(s) < 2 {
			
 
				+			return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
			
 
				+		}
			
 
				+		base := 8
			
 
				+		ss := s[:2]
			
 
				+		s = s[2:]
			
 
				+		if r == 'x' || r == 'X' {
			
 
				+			base = 16
			
 
				+		} else {
			
 
				+			ss = string(r) + ss
			
 
				+		}
			
 
				+		i, err := strconv.ParseUint(ss, base, 8)
			
 
				+		if err != nil {
			
 
				+			return "", "", err
			
 
				+		}
			
 
				+		return string([]byte{byte(i)}), s, nil
			
 
				+	case 'u', 'U':
			
 
				+		n := 4
			
 
				+		if r == 'U' {
			
 
				+			n = 8
			
 
				+		}
			
 
				+		if len(s) < n {
			
 
				+			return "", "", fmt.Errorf(`\%c requires %d digits`, r, n)
			
 
				+		}
			
 
				 
			
 
				-	// strconv.Unquote is for Go strings, but text format strings may use
			
 
				-	// single *or* double quotes.
			
 
				-	if s[0] == '\'' {
			
 
				-		s = strings.Map(quoteSwap, s)
			
 
				-		s, err := unquoteC(s)
			
 
				-		s = strings.Map(quoteSwap, s)
			
 
				-		return s, err
			
 
				+		bs := make([]byte, n/2)
			
 
				+		for i := 0; i < n; i += 2 {
			
 
				+			a, ok1 := unhex(s[i])
			
 
				+			b, ok2 := unhex(s[i+1])
			
 
				+			if !ok1 || !ok2 {
			
 
				+				return "", "", errBadHex
			
 
				+			}
			
 
				+			bs[i/2] = a<<4 | b
			
 
				+		}
			
 
				+		s = s[n:]
			
 
				+		return string(bs), s, nil
			
 
				 	}
			
 
				+	return "", "", fmt.Errorf(`unknown escape \%c`, r)
			
 
				+}
			
 
				 
			
 
				-	// A notable divergence between quoted string literals in Go
			
 
				-	// and what is acceptable for text format protocol buffers:
			
 
				-	// the former considers \' invalid, but the latter considers it valid.
			
 
				-	s = strings.Replace(s, `\'`, "'", -1)
			
 
				-	return strconv.Unquote(s)
			
 
				+// Adapted from src/pkg/strconv/quote.go.
			
 
				+func unhex(b byte) (v byte, ok bool) {
			
 
				+	switch {
			
 
				+	case '0' <= b && b <= '9':
			
 
				+		return b - '0', true
			
 
				+	case 'a' <= b && b <= 'f':
			
 
				+		return b - 'a' + 10, true
			
 
				+	case 'A' <= b && b <= 'F':
			
 
				+		return b - 'A' + 10, true
			
 
				+	}
			
 
				+	return 0, false
			
 
				 }
			
 
				 
			
 
				 // Back off the parser by one token. Can only be done between calls to next().
			
--- a/proto/text_parser_test.go
+++ b/proto/text_parser_test.go
@@ -120,6 +120,33 @@ var unMarshalTextTests = []UnmarshalTextTest{
 
				 		},
			
 
				 	},
			
 
				 
			
 
				+	// Quoted string with all the accepted special characters from the C++ test
			
 
				+	{
			
 
				+		in: `count:42 name: ` + "\"\\\"A string with \\' characters \\n and \\r newlines and \\t tabs and \\001 slashes \\\\ and  multiple   spaces\"",
			
 
				+		out: &MyMessage{
			
 
				+			Count: Int32(42),
			
 
				+			Name:  String("\"A string with ' characters \n and \r newlines and \t tabs and \001 slashes \\ and  multiple   spaces"),
			
 
				+		},
			
 
				+	},
			
 
				+
			
 
				+	// Quoted string with quoted backslash
			
 
				+	{
			
 
				+		in: `count:42 name: "\\'xyz"`,
			
 
				+		out: &MyMessage{
			
 
				+			Count: Int32(42),
			
 
				+			Name:  String(`\'xyz`),
			
 
				+		},
			
 
				+	},
			
 
				+
			
 
				+	// Quoted string with UTF-8 bytes.
			
 
				+	{
			
 
				+		in: "count:42 name: '\303\277\302\201\xAB'",
			
 
				+		out: &MyMessage{
			
 
				+			Count: Int32(42),
			
 
				+			Name:  String("\303\277\302\201\xAB"),
			
 
				+		},
			
 
				+	},
			
 
				+
			
 
				 	// Bad quoted string
			
 
				 	{
			
 
				 		in:  `inner: < host: "\0" >` + "\n",
			
--- a/proto/text_test.go
+++ b/proto/text_test.go
@@ -218,13 +218,30 @@ func TestStringEscaping(t *testing.T) {
 
				 			&pb.Strings{StringField: proto.String("\350\260\267\346\255\214")},
			
 
				 			"string_field: \"\\350\\260\\267\\346\\255\\214\"\n",
			
 
				 		},
			
 
				+		{
			
 
				+			// Some UTF-8.
			
 
				+			&pb.Strings{StringField: proto.String("\x00\x01\xff\x81")},
			
 
				+			`string_field: "\000\001\377\201"` + "\n",
			
 
				+		},
			
 
				 	}
			
 
				 
			
 
				 	for i, tc := range testCases {
			
 
				 		var buf bytes.Buffer
			
 
				 		proto.MarshalText(&buf, tc.in)
			
 
				-		if s := buf.String(); s != tc.out {
			
 
				+		s := buf.String()
			
 
				+		if s != tc.out {
			
 
				 			t.Errorf("#%d: Got:\n%s\nExpected:\n%s\n", i, s, tc.out)
			
 
				+			continue
			
 
				+		}
			
 
				+
			
 
				+		// Check round-trip.
			
 
				+		pb := new(pb.Strings)
			
 
				+		if err := proto.UnmarshalText(s, pb); err != nil {
			
 
				+			t.Errorf("#%d: UnmarshalText: %v", i, err)
			
 
				+			continue
			
 
				+		}
			
 
				+		if !proto.Equal(pb, tc.in) {
			
 
				+			t.Errorf("#%d: Round-trip failed:\nstart: %v\n  end: %v", i, tc.in, pb)
			
 
				 		}
			
 
				 	}
			
 
				 }