feature_stream_string.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. package jsoniter
  2. import "unicode/utf8"
  3. // htmlSafeSet holds the value true if the ASCII character with the given
  4. // array position can be safely represented inside a JSON string, embedded
  5. // inside of HTML <script> tags, without any additional escaping.
  6. //
  7. // All values are true except for the ASCII control characters (0-31), the
  8. // double quote ("), the backslash character ("\"), HTML opening and closing
  9. // tags ("<" and ">"), and the ampersand ("&").
  10. var htmlSafeSet = [utf8.RuneSelf]bool{
  11. ' ': true,
  12. '!': true,
  13. '"': false,
  14. '#': true,
  15. '$': true,
  16. '%': true,
  17. '&': false,
  18. '\'': true,
  19. '(': true,
  20. ')': true,
  21. '*': true,
  22. '+': true,
  23. ',': true,
  24. '-': true,
  25. '.': true,
  26. '/': true,
  27. '0': true,
  28. '1': true,
  29. '2': true,
  30. '3': true,
  31. '4': true,
  32. '5': true,
  33. '6': true,
  34. '7': true,
  35. '8': true,
  36. '9': true,
  37. ':': true,
  38. ';': true,
  39. '<': false,
  40. '=': true,
  41. '>': false,
  42. '?': true,
  43. '@': true,
  44. 'A': true,
  45. 'B': true,
  46. 'C': true,
  47. 'D': true,
  48. 'E': true,
  49. 'F': true,
  50. 'G': true,
  51. 'H': true,
  52. 'I': true,
  53. 'J': true,
  54. 'K': true,
  55. 'L': true,
  56. 'M': true,
  57. 'N': true,
  58. 'O': true,
  59. 'P': true,
  60. 'Q': true,
  61. 'R': true,
  62. 'S': true,
  63. 'T': true,
  64. 'U': true,
  65. 'V': true,
  66. 'W': true,
  67. 'X': true,
  68. 'Y': true,
  69. 'Z': true,
  70. '[': true,
  71. '\\': false,
  72. ']': true,
  73. '^': true,
  74. '_': true,
  75. '`': true,
  76. 'a': true,
  77. 'b': true,
  78. 'c': true,
  79. 'd': true,
  80. 'e': true,
  81. 'f': true,
  82. 'g': true,
  83. 'h': true,
  84. 'i': true,
  85. 'j': true,
  86. 'k': true,
  87. 'l': true,
  88. 'm': true,
  89. 'n': true,
  90. 'o': true,
  91. 'p': true,
  92. 'q': true,
  93. 'r': true,
  94. 's': true,
  95. 't': true,
  96. 'u': true,
  97. 'v': true,
  98. 'w': true,
  99. 'x': true,
  100. 'y': true,
  101. 'z': true,
  102. '{': true,
  103. '|': true,
  104. '}': true,
  105. '~': true,
  106. '\u007f': true,
  107. }
  108. // safeSet holds the value true if the ASCII character with the given array
  109. // position can be represented inside a JSON string without any further
  110. // escaping.
  111. //
  112. // All values are true except for the ASCII control characters (0-31), the
  113. // double quote ("), and the backslash character ("\").
  114. var safeSet = [utf8.RuneSelf]bool{
  115. ' ': true,
  116. '!': true,
  117. '"': false,
  118. '#': true,
  119. '$': true,
  120. '%': true,
  121. '&': true,
  122. '\'': true,
  123. '(': true,
  124. ')': true,
  125. '*': true,
  126. '+': true,
  127. ',': true,
  128. '-': true,
  129. '.': true,
  130. '/': true,
  131. '0': true,
  132. '1': true,
  133. '2': true,
  134. '3': true,
  135. '4': true,
  136. '5': true,
  137. '6': true,
  138. '7': true,
  139. '8': true,
  140. '9': true,
  141. ':': true,
  142. ';': true,
  143. '<': true,
  144. '=': true,
  145. '>': true,
  146. '?': true,
  147. '@': true,
  148. 'A': true,
  149. 'B': true,
  150. 'C': true,
  151. 'D': true,
  152. 'E': true,
  153. 'F': true,
  154. 'G': true,
  155. 'H': true,
  156. 'I': true,
  157. 'J': true,
  158. 'K': true,
  159. 'L': true,
  160. 'M': true,
  161. 'N': true,
  162. 'O': true,
  163. 'P': true,
  164. 'Q': true,
  165. 'R': true,
  166. 'S': true,
  167. 'T': true,
  168. 'U': true,
  169. 'V': true,
  170. 'W': true,
  171. 'X': true,
  172. 'Y': true,
  173. 'Z': true,
  174. '[': true,
  175. '\\': false,
  176. ']': true,
  177. '^': true,
  178. '_': true,
  179. '`': true,
  180. 'a': true,
  181. 'b': true,
  182. 'c': true,
  183. 'd': true,
  184. 'e': true,
  185. 'f': true,
  186. 'g': true,
  187. 'h': true,
  188. 'i': true,
  189. 'j': true,
  190. 'k': true,
  191. 'l': true,
  192. 'm': true,
  193. 'n': true,
  194. 'o': true,
  195. 'p': true,
  196. 'q': true,
  197. 'r': true,
  198. 's': true,
  199. 't': true,
  200. 'u': true,
  201. 'v': true,
  202. 'w': true,
  203. 'x': true,
  204. 'y': true,
  205. 'z': true,
  206. '{': true,
  207. '|': true,
  208. '}': true,
  209. '~': true,
  210. '\u007f': true,
  211. }
  212. var hex = "0123456789abcdef"
  213. func (stream *Stream) WriteStringWithHtmlEscaped(s string) {
  214. stream.ensure(32)
  215. valLen := len(s)
  216. toWriteLen := valLen
  217. bufLengthMinusTwo := len(stream.buf) - 2 // make room for the quotes
  218. if stream.n+toWriteLen > bufLengthMinusTwo {
  219. toWriteLen = bufLengthMinusTwo - stream.n
  220. }
  221. n := stream.n
  222. stream.buf[n] = '"'
  223. n++
  224. // write string, the fast path, without utf8 and escape support
  225. i := 0
  226. for ; i < toWriteLen; i++ {
  227. c := s[i]
  228. if c > 31 && htmlSafeSet[c] {
  229. stream.buf[n] = c
  230. n++
  231. } else {
  232. break
  233. }
  234. }
  235. if i == valLen {
  236. stream.buf[n] = '"'
  237. n++
  238. stream.n = n
  239. return
  240. }
  241. stream.n = n
  242. start := 0
  243. // for the remaining parts, we process them char by char
  244. for ; i < valLen; i++ {
  245. if b := s[i]; b < utf8.RuneSelf {
  246. if htmlSafeSet[b] {
  247. i++
  248. continue
  249. }
  250. if start < i {
  251. stream.WriteRaw(s[start:i])
  252. }
  253. switch b {
  254. case '\\', '"':
  255. stream.writeTwoBytes('\\', b)
  256. case '\n':
  257. stream.writeTwoBytes('\\', 'n')
  258. case '\r':
  259. stream.writeTwoBytes('\\', 'r')
  260. case '\t':
  261. stream.writeTwoBytes('\\', 't')
  262. default:
  263. // This encodes bytes < 0x20 except for \t, \n and \r.
  264. // If escapeHTML is set, it also escapes <, >, and &
  265. // because they can lead to security holes when
  266. // user-controlled strings are rendered into JSON
  267. // and served to some browsers.
  268. stream.WriteRaw(`\u00`)
  269. stream.writeTwoBytes(hex[b>>4], hex[b&0xF])
  270. }
  271. i++
  272. start = i
  273. continue
  274. }
  275. c, size := utf8.DecodeRuneInString(s[i:])
  276. if c == utf8.RuneError && size == 1 {
  277. if start < i {
  278. stream.WriteRaw(s[start:i])
  279. }
  280. stream.WriteRaw(`\ufffd`)
  281. i += size
  282. start = i
  283. continue
  284. }
  285. // U+2028 is LINE SEPARATOR.
  286. // U+2029 is PARAGRAPH SEPARATOR.
  287. // They are both technically valid characters in JSON strings,
  288. // but don't work in JSONP, which has to be evaluated as JavaScript,
  289. // and can lead to security holes there. It is valid JSON to
  290. // escape them, so we do so unconditionally.
  291. // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
  292. if c == '\u2028' || c == '\u2029' {
  293. if start < i {
  294. stream.WriteRaw(s[start:i])
  295. }
  296. stream.WriteRaw(`\u202`)
  297. stream.writeByte(hex[c&0xF])
  298. i += size
  299. start = i
  300. continue
  301. }
  302. i += size
  303. }
  304. if start < len(s) {
  305. stream.WriteRaw(s[start:])
  306. }
  307. stream.writeByte('"')
  308. }
  309. func (stream *Stream) WriteString(s string) {
  310. stream.ensure(32)
  311. valLen := len(s)
  312. toWriteLen := valLen
  313. bufLengthMinusTwo := len(stream.buf) - 2 // make room for the quotes
  314. if stream.n+toWriteLen > bufLengthMinusTwo {
  315. toWriteLen = bufLengthMinusTwo - stream.n
  316. }
  317. n := stream.n
  318. stream.buf[n] = '"'
  319. n++
  320. // write string, the fast path, without utf8 and escape support
  321. i := 0
  322. for ; i < toWriteLen; i++ {
  323. c := s[i]
  324. if c > 31 && c != '"' && c != '\\' {
  325. stream.buf[n] = c
  326. n++
  327. } else {
  328. break
  329. }
  330. }
  331. if i == valLen {
  332. stream.buf[n] = '"'
  333. n++
  334. stream.n = n
  335. return
  336. }
  337. stream.n = n
  338. // for the remaining parts, we process them char by char
  339. for ; i < valLen; i++ {
  340. c := s[i]
  341. switch c {
  342. case '"':
  343. stream.writeTwoBytes('\\', '"')
  344. case '\\':
  345. stream.writeTwoBytes('\\', '\\')
  346. case '\b':
  347. stream.writeTwoBytes('\\', 'b')
  348. case '\f':
  349. stream.writeTwoBytes('\\', 'f')
  350. case '\n':
  351. stream.writeTwoBytes('\\', 'n')
  352. case '\r':
  353. stream.writeTwoBytes('\\', 'r')
  354. case '\t':
  355. stream.writeTwoBytes('\\', 't')
  356. default:
  357. stream.writeByte(c)
  358. }
  359. }
  360. stream.writeByte('"')
  361. }