httplex.go 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package httplex contains rules around lexical matters of various
  5. // HTTP-related specifications.
  6. //
  7. // This package is shared by the standard library (which vendors it)
  8. // and x/net/http2. It comes with no API stability promise.
  9. package httplex
  10. import (
  11. "net"
  12. "strings"
  13. "unicode/utf8"
  14. "golang.org/x/net/idna"
  15. )
  16. var isTokenTable = [127]bool{
  17. '!': true,
  18. '#': true,
  19. '$': true,
  20. '%': true,
  21. '&': true,
  22. '\'': true,
  23. '*': true,
  24. '+': true,
  25. '-': true,
  26. '.': true,
  27. '0': true,
  28. '1': true,
  29. '2': true,
  30. '3': true,
  31. '4': true,
  32. '5': true,
  33. '6': true,
  34. '7': true,
  35. '8': true,
  36. '9': true,
  37. 'A': true,
  38. 'B': true,
  39. 'C': true,
  40. 'D': true,
  41. 'E': true,
  42. 'F': true,
  43. 'G': true,
  44. 'H': true,
  45. 'I': true,
  46. 'J': true,
  47. 'K': true,
  48. 'L': true,
  49. 'M': true,
  50. 'N': true,
  51. 'O': true,
  52. 'P': true,
  53. 'Q': true,
  54. 'R': true,
  55. 'S': true,
  56. 'T': true,
  57. 'U': true,
  58. 'W': true,
  59. 'V': true,
  60. 'X': true,
  61. 'Y': true,
  62. 'Z': true,
  63. '^': true,
  64. '_': true,
  65. '`': true,
  66. 'a': true,
  67. 'b': true,
  68. 'c': true,
  69. 'd': true,
  70. 'e': true,
  71. 'f': true,
  72. 'g': true,
  73. 'h': true,
  74. 'i': true,
  75. 'j': true,
  76. 'k': true,
  77. 'l': true,
  78. 'm': true,
  79. 'n': true,
  80. 'o': true,
  81. 'p': true,
  82. 'q': true,
  83. 'r': true,
  84. 's': true,
  85. 't': true,
  86. 'u': true,
  87. 'v': true,
  88. 'w': true,
  89. 'x': true,
  90. 'y': true,
  91. 'z': true,
  92. '|': true,
  93. '~': true,
  94. }
  95. func IsTokenRune(r rune) bool {
  96. i := int(r)
  97. return i < len(isTokenTable) && isTokenTable[i]
  98. }
  99. func isNotToken(r rune) bool {
  100. return !IsTokenRune(r)
  101. }
  102. // HeaderValuesContainsToken reports whether any string in values
  103. // contains the provided token, ASCII case-insensitively.
  104. func HeaderValuesContainsToken(values []string, token string) bool {
  105. for _, v := range values {
  106. if headerValueContainsToken(v, token) {
  107. return true
  108. }
  109. }
  110. return false
  111. }
  112. // isOWS reports whether b is an optional whitespace byte, as defined
  113. // by RFC 7230 section 3.2.3.
  114. func isOWS(b byte) bool { return b == ' ' || b == '\t' }
  115. // trimOWS returns x with all optional whitespace removes from the
  116. // beginning and end.
  117. func trimOWS(x string) string {
  118. // TODO: consider using strings.Trim(x, " \t") instead,
  119. // if and when it's fast enough. See issue 10292.
  120. // But this ASCII-only code will probably always beat UTF-8
  121. // aware code.
  122. for len(x) > 0 && isOWS(x[0]) {
  123. x = x[1:]
  124. }
  125. for len(x) > 0 && isOWS(x[len(x)-1]) {
  126. x = x[:len(x)-1]
  127. }
  128. return x
  129. }
  130. // headerValueContainsToken reports whether v (assumed to be a
  131. // 0#element, in the ABNF extension described in RFC 7230 section 7)
  132. // contains token amongst its comma-separated tokens, ASCII
  133. // case-insensitively.
  134. func headerValueContainsToken(v string, token string) bool {
  135. v = trimOWS(v)
  136. if comma := strings.IndexByte(v, ','); comma != -1 {
  137. return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token)
  138. }
  139. return tokenEqual(v, token)
  140. }
  141. // lowerASCII returns the ASCII lowercase version of b.
  142. func lowerASCII(b byte) byte {
  143. if 'A' <= b && b <= 'Z' {
  144. return b + ('a' - 'A')
  145. }
  146. return b
  147. }
  148. // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
  149. func tokenEqual(t1, t2 string) bool {
  150. if len(t1) != len(t2) {
  151. return false
  152. }
  153. for i, b := range t1 {
  154. if b >= utf8.RuneSelf {
  155. // No UTF-8 or non-ASCII allowed in tokens.
  156. return false
  157. }
  158. if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
  159. return false
  160. }
  161. }
  162. return true
  163. }
  164. // isLWS reports whether b is linear white space, according
  165. // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
  166. // LWS = [CRLF] 1*( SP | HT )
  167. func isLWS(b byte) bool { return b == ' ' || b == '\t' }
  168. // isCTL reports whether b is a control byte, according
  169. // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
  170. // CTL = <any US-ASCII control character
  171. // (octets 0 - 31) and DEL (127)>
  172. func isCTL(b byte) bool {
  173. const del = 0x7f // a CTL
  174. return b < ' ' || b == del
  175. }
  176. // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
  177. // HTTP/2 imposes the additional restriction that uppercase ASCII
  178. // letters are not allowed.
  179. //
  180. // RFC 7230 says:
  181. // header-field = field-name ":" OWS field-value OWS
  182. // field-name = token
  183. // token = 1*tchar
  184. // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
  185. // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
  186. func ValidHeaderFieldName(v string) bool {
  187. if len(v) == 0 {
  188. return false
  189. }
  190. for _, r := range v {
  191. if !IsTokenRune(r) {
  192. return false
  193. }
  194. }
  195. return true
  196. }
  197. // ValidHostHeader reports whether h is a valid host header.
  198. func ValidHostHeader(h string) bool {
  199. // The latest spec is actually this:
  200. //
  201. // http://tools.ietf.org/html/rfc7230#section-5.4
  202. // Host = uri-host [ ":" port ]
  203. //
  204. // Where uri-host is:
  205. // http://tools.ietf.org/html/rfc3986#section-3.2.2
  206. //
  207. // But we're going to be much more lenient for now and just
  208. // search for any byte that's not a valid byte in any of those
  209. // expressions.
  210. for i := 0; i < len(h); i++ {
  211. if !validHostByte[h[i]] {
  212. return false
  213. }
  214. }
  215. return true
  216. }
  217. // See the validHostHeader comment.
  218. var validHostByte = [256]bool{
  219. '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
  220. '8': true, '9': true,
  221. 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
  222. 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
  223. 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
  224. 'y': true, 'z': true,
  225. 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
  226. 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
  227. 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
  228. 'Y': true, 'Z': true,
  229. '!': true, // sub-delims
  230. '$': true, // sub-delims
  231. '%': true, // pct-encoded (and used in IPv6 zones)
  232. '&': true, // sub-delims
  233. '(': true, // sub-delims
  234. ')': true, // sub-delims
  235. '*': true, // sub-delims
  236. '+': true, // sub-delims
  237. ',': true, // sub-delims
  238. '-': true, // unreserved
  239. '.': true, // unreserved
  240. ':': true, // IPv6address + Host expression's optional port
  241. ';': true, // sub-delims
  242. '=': true, // sub-delims
  243. '[': true,
  244. '\'': true, // sub-delims
  245. ']': true,
  246. '_': true, // unreserved
  247. '~': true, // unreserved
  248. }
  249. // ValidHeaderFieldValue reports whether v is a valid "field-value" according to
  250. // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
  251. //
  252. // message-header = field-name ":" [ field-value ]
  253. // field-value = *( field-content | LWS )
  254. // field-content = <the OCTETs making up the field-value
  255. // and consisting of either *TEXT or combinations
  256. // of token, separators, and quoted-string>
  257. //
  258. // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
  259. //
  260. // TEXT = <any OCTET except CTLs,
  261. // but including LWS>
  262. // LWS = [CRLF] 1*( SP | HT )
  263. // CTL = <any US-ASCII control character
  264. // (octets 0 - 31) and DEL (127)>
  265. //
  266. // RFC 7230 says:
  267. // field-value = *( field-content / obs-fold )
  268. // obj-fold = N/A to http2, and deprecated
  269. // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
  270. // field-vchar = VCHAR / obs-text
  271. // obs-text = %x80-FF
  272. // VCHAR = "any visible [USASCII] character"
  273. //
  274. // http2 further says: "Similarly, HTTP/2 allows header field values
  275. // that are not valid. While most of the values that can be encoded
  276. // will not alter header field parsing, carriage return (CR, ASCII
  277. // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
  278. // 0x0) might be exploited by an attacker if they are translated
  279. // verbatim. Any request or response that contains a character not
  280. // permitted in a header field value MUST be treated as malformed
  281. // (Section 8.1.2.6). Valid characters are defined by the
  282. // field-content ABNF rule in Section 3.2 of [RFC7230]."
  283. //
  284. // This function does not (yet?) properly handle the rejection of
  285. // strings that begin or end with SP or HTAB.
  286. func ValidHeaderFieldValue(v string) bool {
  287. for i := 0; i < len(v); i++ {
  288. b := v[i]
  289. if isCTL(b) && !isLWS(b) {
  290. return false
  291. }
  292. }
  293. return true
  294. }
  295. func isASCII(s string) bool {
  296. for i := 0; i < len(s); i++ {
  297. if s[i] >= utf8.RuneSelf {
  298. return false
  299. }
  300. }
  301. return true
  302. }
  303. // PunycodeHostPort returns the IDNA Punycode version
  304. // of the provided "host" or "host:port" string.
  305. func PunycodeHostPort(v string) (string, error) {
  306. if isASCII(v) {
  307. return v, nil
  308. }
  309. host, port, err := net.SplitHostPort(v)
  310. if err != nil {
  311. // The input 'v' argument was just a "host" argument,
  312. // without a port. This error should not be returned
  313. // to the caller.
  314. host = v
  315. port = ""
  316. }
  317. host, err = idna.ToASCII(host)
  318. if err != nil {
  319. // Non-UTF-8? Not representable in Punycode, in any
  320. // case.
  321. return "", err
  322. }
  323. if port == "" {
  324. return host, nil
  325. }
  326. return net.JoinHostPort(host, port), nil
  327. }