string.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "io"
  7. "math/bits"
  8. "strconv"
  9. "unicode"
  10. "unicode/utf16"
  11. "unicode/utf8"
  12. "github.com/golang/protobuf/v2/internal/errors"
  13. )
  14. func appendString(out []byte, in string) ([]byte, error) {
  15. var nerr errors.NonFatal
  16. out = append(out, '"')
  17. i := indexNeedEscapeInString(in)
  18. in, out = in[i:], append(out, in[:i]...)
  19. for len(in) > 0 {
  20. switch r, n := utf8.DecodeRuneInString(in); {
  21. case r == utf8.RuneError && n == 1:
  22. nerr.AppendInvalidUTF8("")
  23. in, out = in[1:], append(out, in[0]) // preserve invalid byte
  24. case r < ' ' || r == '"' || r == '\\':
  25. out = append(out, '\\')
  26. switch r {
  27. case '"', '\\':
  28. out = append(out, byte(r))
  29. case '\b':
  30. out = append(out, 'b')
  31. case '\f':
  32. out = append(out, 'f')
  33. case '\n':
  34. out = append(out, 'n')
  35. case '\r':
  36. out = append(out, 'r')
  37. case '\t':
  38. out = append(out, 't')
  39. default:
  40. out = append(out, 'u')
  41. out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
  42. out = strconv.AppendUint(out, uint64(r), 16)
  43. }
  44. in = in[n:]
  45. default:
  46. i := indexNeedEscapeInString(in[n:])
  47. in, out = in[n+i:], append(out, in[:n+i]...)
  48. }
  49. }
  50. out = append(out, '"')
  51. return out, nerr.E
  52. }
  53. func (d *Decoder) parseString(in []byte) (string, int, error) {
  54. var nerr errors.NonFatal
  55. in0 := in
  56. if len(in) == 0 {
  57. return "", 0, io.ErrUnexpectedEOF
  58. }
  59. if in[0] != '"' {
  60. return "", 0, d.newSyntaxError("invalid character %q at start of string", in[0])
  61. }
  62. in = in[1:]
  63. i := indexNeedEscapeInBytes(in)
  64. in, out := in[i:], in[:i:i] // set cap to prevent mutations
  65. for len(in) > 0 {
  66. switch r, n := utf8.DecodeRune(in); {
  67. case r == utf8.RuneError && n == 1:
  68. nerr.AppendInvalidUTF8("")
  69. in, out = in[1:], append(out, in[0]) // preserve invalid byte
  70. case r < ' ':
  71. return "", 0, d.newSyntaxError("invalid character %q in string", r)
  72. case r == '"':
  73. in = in[1:]
  74. n := len(in0) - len(in)
  75. return string(out), n, nerr.E
  76. case r == '\\':
  77. if len(in) < 2 {
  78. return "", 0, io.ErrUnexpectedEOF
  79. }
  80. switch r := in[1]; r {
  81. case '"', '\\', '/':
  82. in, out = in[2:], append(out, r)
  83. case 'b':
  84. in, out = in[2:], append(out, '\b')
  85. case 'f':
  86. in, out = in[2:], append(out, '\f')
  87. case 'n':
  88. in, out = in[2:], append(out, '\n')
  89. case 'r':
  90. in, out = in[2:], append(out, '\r')
  91. case 't':
  92. in, out = in[2:], append(out, '\t')
  93. case 'u':
  94. if len(in) < 6 {
  95. return "", 0, io.ErrUnexpectedEOF
  96. }
  97. v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
  98. if err != nil {
  99. return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
  100. }
  101. in = in[6:]
  102. r := rune(v)
  103. if utf16.IsSurrogate(r) {
  104. if len(in) < 6 {
  105. return "", 0, io.ErrUnexpectedEOF
  106. }
  107. v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
  108. r = utf16.DecodeRune(r, rune(v))
  109. if in[0] != '\\' || in[1] != 'u' ||
  110. r == unicode.ReplacementChar || err != nil {
  111. return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
  112. }
  113. in = in[6:]
  114. }
  115. out = append(out, string(r)...)
  116. default:
  117. return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:2])
  118. }
  119. default:
  120. i := indexNeedEscapeInBytes(in[n:])
  121. in, out = in[n+i:], append(out, in[:n+i]...)
  122. }
  123. }
  124. return "", 0, io.ErrUnexpectedEOF
  125. }
  126. // indexNeedEscapeInString returns the index of the character that needs
  127. // escaping. If no characters need escaping, this returns the input length.
  128. func indexNeedEscapeInString(s string) int {
  129. for i, r := range s {
  130. if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
  131. return i
  132. }
  133. }
  134. return len(s)
  135. }
  136. // indexNeedEscapeInBytes returns the index of the character that needs
  137. // escaping. If no characters need escaping, this returns the input length.
  138. // TODO: Remove this duplicate function when https://golang.org/issue/31506 gets
  139. // resolved.
  140. func indexNeedEscapeInBytes(b []byte) int {
  141. for i := 0; i < len(b); {
  142. r, n := utf8.DecodeRune(b[i:])
  143. if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
  144. return i
  145. }
  146. i += n
  147. }
  148. return len(b)
  149. }