string.go 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "io"
  7. "math/bits"
  8. "strconv"
  9. "unicode"
  10. "unicode/utf16"
  11. "unicode/utf8"
  12. "google.golang.org/protobuf/internal/errors"
  13. )
  14. func appendString(out []byte, in string) ([]byte, error) {
  15. out = append(out, '"')
  16. i := indexNeedEscapeInString(in)
  17. in, out = in[i:], append(out, in[:i]...)
  18. for len(in) > 0 {
  19. switch r, n := utf8.DecodeRuneInString(in); {
  20. case r == utf8.RuneError && n == 1:
  21. return out, errors.InvalidUTF8("")
  22. case r < ' ' || r == '"' || r == '\\':
  23. out = append(out, '\\')
  24. switch r {
  25. case '"', '\\':
  26. out = append(out, byte(r))
  27. case '\b':
  28. out = append(out, 'b')
  29. case '\f':
  30. out = append(out, 'f')
  31. case '\n':
  32. out = append(out, 'n')
  33. case '\r':
  34. out = append(out, 'r')
  35. case '\t':
  36. out = append(out, 't')
  37. default:
  38. out = append(out, 'u')
  39. out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
  40. out = strconv.AppendUint(out, uint64(r), 16)
  41. }
  42. in = in[n:]
  43. default:
  44. i := indexNeedEscapeInString(in[n:])
  45. in, out = in[n+i:], append(out, in[:n+i]...)
  46. }
  47. }
  48. out = append(out, '"')
  49. return out, nil
  50. }
  51. func (d *Decoder) parseString(in []byte) (string, int, error) {
  52. in0 := in
  53. if len(in) == 0 {
  54. return "", 0, io.ErrUnexpectedEOF
  55. }
  56. if in[0] != '"' {
  57. return "", 0, d.newSyntaxError("invalid character %q at start of string", in[0])
  58. }
  59. in = in[1:]
  60. i := indexNeedEscapeInBytes(in)
  61. in, out := in[i:], in[:i:i] // set cap to prevent mutations
  62. for len(in) > 0 {
  63. switch r, n := utf8.DecodeRune(in); {
  64. case r == utf8.RuneError && n == 1:
  65. return "", 0, d.newSyntaxError("invalid UTF-8 in string")
  66. case r < ' ':
  67. return "", 0, d.newSyntaxError("invalid character %q in string", r)
  68. case r == '"':
  69. in = in[1:]
  70. n := len(in0) - len(in)
  71. return string(out), n, nil
  72. case r == '\\':
  73. if len(in) < 2 {
  74. return "", 0, io.ErrUnexpectedEOF
  75. }
  76. switch r := in[1]; r {
  77. case '"', '\\', '/':
  78. in, out = in[2:], append(out, r)
  79. case 'b':
  80. in, out = in[2:], append(out, '\b')
  81. case 'f':
  82. in, out = in[2:], append(out, '\f')
  83. case 'n':
  84. in, out = in[2:], append(out, '\n')
  85. case 'r':
  86. in, out = in[2:], append(out, '\r')
  87. case 't':
  88. in, out = in[2:], append(out, '\t')
  89. case 'u':
  90. if len(in) < 6 {
  91. return "", 0, io.ErrUnexpectedEOF
  92. }
  93. v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
  94. if err != nil {
  95. return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
  96. }
  97. in = in[6:]
  98. r := rune(v)
  99. if utf16.IsSurrogate(r) {
  100. if len(in) < 6 {
  101. return "", 0, io.ErrUnexpectedEOF
  102. }
  103. v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
  104. r = utf16.DecodeRune(r, rune(v))
  105. if in[0] != '\\' || in[1] != 'u' ||
  106. r == unicode.ReplacementChar || err != nil {
  107. return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
  108. }
  109. in = in[6:]
  110. }
  111. out = append(out, string(r)...)
  112. default:
  113. return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:2])
  114. }
  115. default:
  116. i := indexNeedEscapeInBytes(in[n:])
  117. in, out = in[n+i:], append(out, in[:n+i]...)
  118. }
  119. }
  120. return "", 0, io.ErrUnexpectedEOF
  121. }
  122. // indexNeedEscapeInString returns the index of the character that needs
  123. // escaping. If no characters need escaping, this returns the input length.
  124. func indexNeedEscapeInString(s string) int {
  125. for i, r := range s {
  126. if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
  127. return i
  128. }
  129. }
  130. return len(s)
  131. }
  132. // indexNeedEscapeInBytes returns the index of the character that needs
  133. // escaping. If no characters need escaping, this returns the input length.
  134. // TODO: Remove this duplicate function when https://golang.org/issue/31506 gets
  135. // resolved.
  136. func indexNeedEscapeInBytes(b []byte) int {
  137. for i := 0; i < len(b); {
  138. r, n := utf8.DecodeRune(b[i:])
  139. if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
  140. return i
  141. }
  142. i += n
  143. }
  144. return len(b)
  145. }