123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196 |
- // Copyright 2019 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // Package strs provides string manipulation functionality specific to protobuf.
- package strs
- import (
- "go/token"
- "strings"
- "unicode"
- "unicode/utf8"
- "google.golang.org/protobuf/internal/flags"
- "google.golang.org/protobuf/reflect/protoreflect"
- )
- // EnforceUTF8 reports whether to enforce strict UTF-8 validation.
- func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
- if flags.ProtoLegacy {
- if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
- return fd.EnforceUTF8()
- }
- }
- return fd.Syntax() == protoreflect.Proto3
- }
- // GoCamelCase camel-cases a protobuf name for use as a Go identifier.
- //
- // If there is an interior underscore followed by a lower case letter,
- // drop the underscore and convert the letter to upper case.
- func GoCamelCase(s string) string {
- // Invariant: if the next letter is lower case, it must be converted
- // to upper case.
- // That is, we process a word at a time, where words are marked by _ or
- // upper case letter. Digits are treated as words.
- var b []byte
- for i := 0; i < len(s); i++ {
- c := s[i]
- switch {
- case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
- // Skip over '.' in ".{{lowercase}}".
- case c == '.':
- b = append(b, '_') // convert '.' to '_'
- case c == '_' && (i == 0 || s[i-1] == '.'):
- // Convert initial '_' to ensure we start with a capital letter.
- // Do the same for '_' after '.' to match historic behavior.
- b = append(b, 'X') // convert '_' to 'X'
- case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
- // Skip over '_' in "_{{lowercase}}".
- case isASCIIDigit(c):
- b = append(b, c)
- default:
- // Assume we have a letter now - if not, it's a bogus identifier.
- // The next word is a sequence of characters that must start upper case.
- if isASCIILower(c) {
- c -= 'a' - 'A' // convert lowercase to uppercase
- }
- b = append(b, c)
- // Accept lower case sequence that follows.
- for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
- b = append(b, s[i+1])
- }
- }
- }
- return string(b)
- }
- // GoSanitized converts a string to a valid Go identifier.
- func GoSanitized(s string) string {
- // Sanitize the input to the set of valid characters,
- // which must be '_' or be in the Unicode L or N categories.
- s = strings.Map(func(r rune) rune {
- if unicode.IsLetter(r) || unicode.IsDigit(r) {
- return r
- }
- return '_'
- }, s)
- // Prepend '_' in the event of a Go keyword conflict or if
- // the identifier is invalid (does not start in the Unicode L category).
- r, _ := utf8.DecodeRuneInString(s)
- if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
- return "_" + s
- }
- return s
- }
- // JSONCamelCase converts a snake_case identifier to a camelCase identifier,
- // according to the protobuf JSON specification.
- func JSONCamelCase(s string) string {
- var b []byte
- var wasUnderscore bool
- for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
- c := s[i]
- if c != '_' {
- if wasUnderscore && isASCIILower(c) {
- c -= 'a' - 'A' // convert to uppercase
- }
- b = append(b, c)
- }
- wasUnderscore = c == '_'
- }
- return string(b)
- }
- // JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
- // according to the protobuf JSON specification.
- func JSONSnakeCase(s string) string {
- var b []byte
- for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
- c := s[i]
- if isASCIIUpper(c) {
- b = append(b, '_')
- c += 'a' - 'A' // convert to lowercase
- }
- b = append(b, c)
- }
- return string(b)
- }
- // MapEntryName derives the name of the map entry message given the field name.
- // See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
- func MapEntryName(s string) string {
- var b []byte
- upperNext := true
- for _, c := range s {
- switch {
- case c == '_':
- upperNext = true
- case upperNext:
- b = append(b, byte(unicode.ToUpper(c)))
- upperNext = false
- default:
- b = append(b, byte(c))
- }
- }
- b = append(b, "Entry"...)
- return string(b)
- }
- // EnumValueName derives the camel-cased enum value name.
- // See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
- func EnumValueName(s string) string {
- var b []byte
- upperNext := true
- for _, c := range s {
- switch {
- case c == '_':
- upperNext = true
- case upperNext:
- b = append(b, byte(unicode.ToUpper(c)))
- upperNext = false
- default:
- b = append(b, byte(unicode.ToLower(c)))
- upperNext = false
- }
- }
- return string(b)
- }
- // TrimEnumPrefix trims the enum name prefix from an enum value name,
- // where the prefix is all lowercase without underscores.
- // See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
- func TrimEnumPrefix(s, prefix string) string {
- s0 := s // original input
- for len(s) > 0 && len(prefix) > 0 {
- if s[0] == '_' {
- s = s[1:]
- continue
- }
- if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
- return s0 // no prefix match
- }
- s, prefix = s[1:], prefix[1:]
- }
- if len(prefix) > 0 {
- return s0 // no prefix match
- }
- s = strings.TrimLeft(s, "_")
- if len(s) == 0 {
- return s0 // avoid returning empty string
- }
- return s
- }
- func isASCIILower(c byte) bool {
- return 'a' <= c && c <= 'z'
- }
- func isASCIIUpper(c byte) bool {
- return 'A' <= c && c <= 'Z'
- }
- func isASCIIDigit(c byte) bool {
- return '0' <= c && c <= '9'
- }
|