Parcourir la source

encoding/prototext: add UnmarshalOptions.DiscardUnknown

This CL adds support for discarding unknown fields from the input.
We add support for parsing and resolving field numbers, so that
the DiscardUnknown option can ignore all unresolvable fields.
We continue to reject known fields identified by field number
since there are a number of edge cases that a difficult to resolve.

Change-Id: I5c88b7bae8656ce20e85e4b5c92d8564a5ff8bb6
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/195779
Reviewed-by: Herbie Ong <herbie@google.com>
Joe Tsai il y a 6 ans
Parent
commit
8689fa59f4
2 fichiers modifiés avec 68 ajouts et 23 suppressions
  1. 46 23
      encoding/prototext/decode.go
  2. 22 0
      encoding/prototext/decode_test.go

+ 46 - 23
encoding/prototext/decode.go

@@ -35,6 +35,12 @@ type UnmarshalOptions struct {
 	// return error if there are any missing required fields.
 	// return error if there are any missing required fields.
 	AllowPartial bool
 	AllowPartial bool
 
 
+	// DiscardUnknown specifies whether to ignore unknown fields when parsing.
+	// An unknown field is any field whose field name or field number does not
+	// resolve to any known or extension field in the message.
+	// By default, unmarshal rejects unknown fields as an error.
+	DiscardUnknown bool
+
 	// Resolver is used for looking up types when unmarshaling
 	// Resolver is used for looking up types when unmarshaling
 	// google.protobuf.Any messages or extension fields.
 	// google.protobuf.Any messages or extension fields.
 	// If nil, this defaults to using protoregistry.GlobalTypes.
 	// If nil, this defaults to using protoregistry.GlobalTypes.
@@ -92,57 +98,74 @@ func (o UnmarshalOptions) unmarshalMessage(tmsg [][2]text.Value, m pref.Message)
 		tkey := tfield[0]
 		tkey := tfield[0]
 		tval := tfield[1]
 		tval := tfield[1]
 
 
-		var fd pref.FieldDescriptor
+		// Resolve the field descriptor.
 		var name pref.Name
 		var name pref.Name
+		var fd pref.FieldDescriptor
+		var xt pref.ExtensionType
+		var xtErr error
 		switch tkey.Type() {
 		switch tkey.Type() {
 		case text.Name:
 		case text.Name:
 			name, _ = tkey.Name()
 			name, _ = tkey.Name()
 			fd = fieldDescs.ByName(name)
 			fd = fieldDescs.ByName(name)
-			switch {
-			case fd == nil:
+			if fd == nil {
 				// The proto name of a group field is in all lowercase,
 				// The proto name of a group field is in all lowercase,
 				// while the textproto field name is the group message name.
 				// while the textproto field name is the group message name.
-				// Check to make sure that group name is correct.
 				gd := fieldDescs.ByName(pref.Name(strings.ToLower(string(name))))
 				gd := fieldDescs.ByName(pref.Name(strings.ToLower(string(name))))
 				if gd != nil && gd.Kind() == pref.GroupKind && gd.Message().Name() == name {
 				if gd != nil && gd.Kind() == pref.GroupKind && gd.Message().Name() == name {
 					fd = gd
 					fd = gd
 				}
 				}
-			case fd.Kind() == pref.GroupKind && fd.Message().Name() != name:
+			} else if fd.Kind() == pref.GroupKind && fd.Message().Name() != name {
 				fd = nil // reset since field name is actually the message name
 				fd = nil // reset since field name is actually the message name
-			case fd.IsWeak() && fd.Message().IsPlaceholder():
-				fd = nil // reset since the weak reference is not linked in
 			}
 			}
 		case text.String:
 		case text.String:
 			// Handle extensions only. This code path is not for Any.
 			// Handle extensions only. This code path is not for Any.
 			if messageDesc.FullName() == "google.protobuf.Any" {
 			if messageDesc.FullName() == "google.protobuf.Any" {
 				break
 				break
 			}
 			}
-			// Extensions have to be registered first in the message's
-			// ExtensionTypes before setting a value to it.
-			extName := pref.FullName(tkey.String())
-			// Check first if it is already registered. This is the case for
-			// repeated fields.
-			xt, err := o.findExtension(extName)
-			if err != nil && err != protoregistry.NotFound {
-				return errors.New("unable to resolve [%v]: %v", extName, err)
+			xt, xtErr = o.findExtension(pref.FullName(tkey.String()))
+		case text.Uint:
+			v, _ := tkey.Uint(false)
+			num := pref.FieldNumber(v)
+			if !num.IsValid() {
+				return errors.New("invalid field number: %d", num)
 			}
 			}
-			if xt != nil {
-				fd = xt.TypeDescriptor()
-				if !messageDesc.ExtensionRanges().Has(fd.Number()) || fd.ContainingMessage().FullName() != messageDesc.FullName() {
-					return errors.New("message %v cannot be extended by %v", messageDesc.FullName(), fd.FullName())
-				}
+			fd = fieldDescs.ByNumber(num)
+			if fd == nil {
+				xt, xtErr = o.Resolver.FindExtensionByNumber(messageDesc.FullName(), num)
 			}
 			}
 		}
 		}
+		if xt != nil {
+			fd = xt.TypeDescriptor()
+			if !messageDesc.ExtensionRanges().Has(fd.Number()) || fd.ContainingMessage().FullName() != messageDesc.FullName() {
+				return errors.New("message %v cannot be extended by %v", messageDesc.FullName(), fd.FullName())
+			}
+		} else if xtErr != nil && xtErr != protoregistry.NotFound {
+			return errors.New("unable to resolve: %v", xtErr)
+		}
+		if fd != nil && fd.IsWeak() && fd.Message().IsPlaceholder() {
+			fd = nil // reset since the weak reference is not linked in
+		}
 
 
+		// Handle unknown fields.
 		if fd == nil {
 		if fd == nil {
-			// Ignore reserved names.
-			if messageDesc.ReservedNames().Has(name) {
+			if o.DiscardUnknown || messageDesc.ReservedNames().Has(name) {
 				continue
 				continue
 			}
 			}
-			// TODO: Can provide option to ignore unknown message fields.
 			return errors.New("%v contains unknown field: %v", messageDesc.FullName(), tkey)
 			return errors.New("%v contains unknown field: %v", messageDesc.FullName(), tkey)
 		}
 		}
 
 
+		// Handle fields identified by field number.
+		if tkey.Type() == text.Uint {
+			// TODO: Add an option to permit parsing field numbers.
+			//
+			// This requires careful thought as the MarshalOptions.EmitUnknown
+			// option allows formatting unknown fields as the field number
+			// and the best-effort textual representation of the field value.
+			// In that case, it may not be possible to unmarshal the value from
+			// a parser that does have information about the unknown field.
+			return errors.New("cannot specify field by number: %v", tkey)
+		}
+
 		switch {
 		switch {
 		case fd.IsList():
 		case fd.IsList():
 			// If input is not a list, turn it into a list.
 			// If input is not a list, turn it into a list.

+ 22 - 0
encoding/prototext/decode_test.go

@@ -174,6 +174,28 @@ s_string: "谷歌"
 		inputMessage: &pb3.Scalars{},
 		inputMessage: &pb3.Scalars{},
 		inputText:    "unknown_field: 456",
 		inputText:    "unknown_field: 456",
 		wantErr:      true,
 		wantErr:      true,
+	}, {
+		desc:         "proto2 message contains discarded unknown field",
+		umo:          prototext.UnmarshalOptions{DiscardUnknown: true},
+		inputMessage: &pb2.Scalars{},
+		inputText:    `unknown_field:123 1000:"hello"`,
+	}, {
+		desc:         "proto3 message contains discarded unknown field",
+		umo:          prototext.UnmarshalOptions{DiscardUnknown: true},
+		inputMessage: &pb3.Scalars{},
+		inputText:    `unknown_field:456 1000:"goodbye"`,
+	}, {
+		desc:         "proto2 message cannot parse field number",
+		umo:          prototext.UnmarshalOptions{DiscardUnknown: true},
+		inputMessage: &pb2.Scalars{},
+		inputText:    `13:"hello"`,
+		wantErr:      true,
+	}, {
+		desc:         "proto3 message cannot parse field number",
+		umo:          prototext.UnmarshalOptions{DiscardUnknown: true},
+		inputMessage: &pb3.Scalars{},
+		inputText:    `13:"goodbye"`,
+		wantErr:      true,
 	}, {
 	}, {
 		desc:         "proto2 numeric key field",
 		desc:         "proto2 numeric key field",
 		inputMessage: &pb2.Scalars{},
 		inputMessage: &pb2.Scalars{},