string.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "io"
  7. "math/bits"
  8. "strconv"
  9. "unicode"
  10. "unicode/utf16"
  11. "unicode/utf8"
  12. "google.golang.org/protobuf/internal/errors"
  13. "google.golang.org/protobuf/internal/strs"
  14. )
  15. func appendString(out []byte, in string) ([]byte, error) {
  16. out = append(out, '"')
  17. i := indexNeedEscapeInString(in)
  18. in, out = in[i:], append(out, in[:i]...)
  19. for len(in) > 0 {
  20. switch r, n := utf8.DecodeRuneInString(in); {
  21. case r == utf8.RuneError && n == 1:
  22. return out, errors.InvalidUTF8("")
  23. case r < ' ' || r == '"' || r == '\\':
  24. out = append(out, '\\')
  25. switch r {
  26. case '"', '\\':
  27. out = append(out, byte(r))
  28. case '\b':
  29. out = append(out, 'b')
  30. case '\f':
  31. out = append(out, 'f')
  32. case '\n':
  33. out = append(out, 'n')
  34. case '\r':
  35. out = append(out, 'r')
  36. case '\t':
  37. out = append(out, 't')
  38. default:
  39. out = append(out, 'u')
  40. out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
  41. out = strconv.AppendUint(out, uint64(r), 16)
  42. }
  43. in = in[n:]
  44. default:
  45. i := indexNeedEscapeInString(in[n:])
  46. in, out = in[n+i:], append(out, in[:n+i]...)
  47. }
  48. }
  49. out = append(out, '"')
  50. return out, nil
  51. }
  52. func (d *Decoder) parseString(in []byte) (string, int, error) {
  53. in0 := in
  54. if len(in) == 0 {
  55. return "", 0, io.ErrUnexpectedEOF
  56. }
  57. if in[0] != '"' {
  58. return "", 0, d.newSyntaxError("invalid character %q at start of string", in[0])
  59. }
  60. in = in[1:]
  61. i := indexNeedEscapeInBytes(in)
  62. in, out := in[i:], in[:i:i] // set cap to prevent mutations
  63. for len(in) > 0 {
  64. switch r, n := utf8.DecodeRune(in); {
  65. case r == utf8.RuneError && n == 1:
  66. return "", 0, d.newSyntaxError("invalid UTF-8 in string")
  67. case r < ' ':
  68. return "", 0, d.newSyntaxError("invalid character %q in string", r)
  69. case r == '"':
  70. in = in[1:]
  71. n := len(in0) - len(in)
  72. return string(out), n, nil
  73. case r == '\\':
  74. if len(in) < 2 {
  75. return "", 0, io.ErrUnexpectedEOF
  76. }
  77. switch r := in[1]; r {
  78. case '"', '\\', '/':
  79. in, out = in[2:], append(out, r)
  80. case 'b':
  81. in, out = in[2:], append(out, '\b')
  82. case 'f':
  83. in, out = in[2:], append(out, '\f')
  84. case 'n':
  85. in, out = in[2:], append(out, '\n')
  86. case 'r':
  87. in, out = in[2:], append(out, '\r')
  88. case 't':
  89. in, out = in[2:], append(out, '\t')
  90. case 'u':
  91. if len(in) < 6 {
  92. return "", 0, io.ErrUnexpectedEOF
  93. }
  94. v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
  95. if err != nil {
  96. return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
  97. }
  98. in = in[6:]
  99. r := rune(v)
  100. if utf16.IsSurrogate(r) {
  101. if len(in) < 6 {
  102. return "", 0, io.ErrUnexpectedEOF
  103. }
  104. v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
  105. r = utf16.DecodeRune(r, rune(v))
  106. if in[0] != '\\' || in[1] != 'u' ||
  107. r == unicode.ReplacementChar || err != nil {
  108. return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
  109. }
  110. in = in[6:]
  111. }
  112. out = append(out, string(r)...)
  113. default:
  114. return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:2])
  115. }
  116. default:
  117. i := indexNeedEscapeInBytes(in[n:])
  118. in, out = in[n+i:], append(out, in[:n+i]...)
  119. }
  120. }
  121. return "", 0, io.ErrUnexpectedEOF
  122. }
  123. // indexNeedEscapeInString returns the index of the character that needs
  124. // escaping. If no characters need escaping, this returns the input length.
  125. func indexNeedEscapeInString(s string) int {
  126. for i, r := range s {
  127. if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
  128. return i
  129. }
  130. }
  131. return len(s)
  132. }
  133. func indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) }