string.go 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "io"
  7. "math/bits"
  8. "strconv"
  9. "unicode"
  10. "unicode/utf16"
  11. "unicode/utf8"
  12. "github.com/golang/protobuf/v2/internal/errors"
  13. )
  14. func (p *encoder) marshalString(v Value) error {
  15. var err error
  16. p.out, err = appendString(p.out, v)
  17. return err
  18. }
  19. func appendString(out []byte, v Value) ([]byte, error) {
  20. if v.Type() != String {
  21. return nil, errors.New("invalid type %v, expected string", v.Type())
  22. }
  23. if len(v.raw) > 0 {
  24. return append(out, v.raw...), nil
  25. }
  26. in := v.String()
  27. var nerr errors.NonFatal
  28. out = append(out, '"')
  29. i := indexNeedEscape(in)
  30. in, out = in[i:], append(out, in[:i]...)
  31. for len(in) > 0 {
  32. switch r, n := utf8.DecodeRuneInString(in); {
  33. case r == utf8.RuneError && n == 1:
  34. nerr.AppendInvalidUTF8("")
  35. in, out = in[1:], append(out, in[0]) // preserve invalid byte
  36. case r < ' ' || r == '"' || r == '\\':
  37. out = append(out, '\\')
  38. switch r {
  39. case '"', '\\':
  40. out = append(out, byte(r))
  41. case '\b':
  42. out = append(out, 'b')
  43. case '\f':
  44. out = append(out, 'f')
  45. case '\n':
  46. out = append(out, 'n')
  47. case '\r':
  48. out = append(out, 'r')
  49. case '\t':
  50. out = append(out, 't')
  51. default:
  52. out = append(out, 'u')
  53. out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
  54. out = strconv.AppendUint(out, uint64(r), 16)
  55. }
  56. in = in[n:]
  57. default:
  58. i := indexNeedEscape(in[n:])
  59. in, out = in[n+i:], append(out, in[:n+i]...)
  60. }
  61. }
  62. out = append(out, '"')
  63. return out, nerr.E
  64. }
  65. func (p *decoder) unmarshalString() (Value, error) {
  66. v, n, err := consumeString(p.in)
  67. p.consume(n)
  68. return v, err
  69. }
  70. func consumeString(in []byte) (Value, int, error) {
  71. var nerr errors.NonFatal
  72. in0 := in
  73. if len(in) == 0 {
  74. return Value{}, 0, io.ErrUnexpectedEOF
  75. }
  76. if in[0] != '"' {
  77. return Value{}, 0, newSyntaxError("invalid character %q at start of string", in[0])
  78. }
  79. in = in[1:]
  80. i := indexNeedEscape(string(in))
  81. in, out := in[i:], in[:i:i] // set cap to prevent mutations
  82. for len(in) > 0 {
  83. switch r, n := utf8.DecodeRune(in); {
  84. case r == utf8.RuneError && n == 1:
  85. nerr.AppendInvalidUTF8("")
  86. in, out = in[1:], append(out, in[0]) // preserve invalid byte
  87. case r < ' ':
  88. return Value{}, 0, newSyntaxError("invalid character %q in string", r)
  89. case r == '"':
  90. in = in[1:]
  91. n := len(in0) - len(in)
  92. v := rawValueOf(string(out), in0[:n:n])
  93. return v, n, nerr.E
  94. case r == '\\':
  95. if len(in) < 2 {
  96. return Value{}, 0, io.ErrUnexpectedEOF
  97. }
  98. switch r := in[1]; r {
  99. case '"', '\\', '/':
  100. in, out = in[2:], append(out, r)
  101. case 'b':
  102. in, out = in[2:], append(out, '\b')
  103. case 'f':
  104. in, out = in[2:], append(out, '\f')
  105. case 'n':
  106. in, out = in[2:], append(out, '\n')
  107. case 'r':
  108. in, out = in[2:], append(out, '\r')
  109. case 't':
  110. in, out = in[2:], append(out, '\t')
  111. case 'u':
  112. if len(in) < 6 {
  113. return Value{}, 0, io.ErrUnexpectedEOF
  114. }
  115. v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
  116. if err != nil {
  117. return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6])
  118. }
  119. in = in[6:]
  120. r := rune(v)
  121. if utf16.IsSurrogate(r) {
  122. if len(in) < 6 {
  123. return Value{}, 0, io.ErrUnexpectedEOF
  124. }
  125. v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
  126. r = utf16.DecodeRune(r, rune(v))
  127. if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil {
  128. return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6])
  129. }
  130. in = in[6:]
  131. }
  132. out = append(out, string(r)...)
  133. default:
  134. return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:2])
  135. }
  136. default:
  137. i := indexNeedEscape(string(in[n:]))
  138. in, out = in[n+i:], append(out, in[:n+i]...)
  139. }
  140. }
  141. return Value{}, 0, io.ErrUnexpectedEOF
  142. }
  143. // indexNeedEscape returns the index of the next character that needs escaping.
  144. // If no characters need escaping, this returns the input length.
  145. func indexNeedEscape(s string) int {
  146. for i, r := range s {
  147. if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
  148. return i
  149. }
  150. }
  151. return len(s)
  152. }