123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351 |
- // Copyright 2016 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // Package httplex contains rules around lexical matters of various
- // HTTP-related specifications.
- //
- // This package is shared by the standard library (which vendors it)
- // and x/net/http2. It comes with no API stability promise.
- package httplex
- import (
- "net"
- "strings"
- "unicode/utf8"
- "golang.org/x/net/idna"
- )
- var isTokenTable = [127]bool{
- '!': true,
- '#': true,
- '$': true,
- '%': true,
- '&': true,
- '\'': true,
- '*': true,
- '+': true,
- '-': true,
- '.': true,
- '0': true,
- '1': true,
- '2': true,
- '3': true,
- '4': true,
- '5': true,
- '6': true,
- '7': true,
- '8': true,
- '9': true,
- 'A': true,
- 'B': true,
- 'C': true,
- 'D': true,
- 'E': true,
- 'F': true,
- 'G': true,
- 'H': true,
- 'I': true,
- 'J': true,
- 'K': true,
- 'L': true,
- 'M': true,
- 'N': true,
- 'O': true,
- 'P': true,
- 'Q': true,
- 'R': true,
- 'S': true,
- 'T': true,
- 'U': true,
- 'W': true,
- 'V': true,
- 'X': true,
- 'Y': true,
- 'Z': true,
- '^': true,
- '_': true,
- '`': true,
- 'a': true,
- 'b': true,
- 'c': true,
- 'd': true,
- 'e': true,
- 'f': true,
- 'g': true,
- 'h': true,
- 'i': true,
- 'j': true,
- 'k': true,
- 'l': true,
- 'm': true,
- 'n': true,
- 'o': true,
- 'p': true,
- 'q': true,
- 'r': true,
- 's': true,
- 't': true,
- 'u': true,
- 'v': true,
- 'w': true,
- 'x': true,
- 'y': true,
- 'z': true,
- '|': true,
- '~': true,
- }
- func IsTokenRune(r rune) bool {
- i := int(r)
- return i < len(isTokenTable) && isTokenTable[i]
- }
- func isNotToken(r rune) bool {
- return !IsTokenRune(r)
- }
- // HeaderValuesContainsToken reports whether any string in values
- // contains the provided token, ASCII case-insensitively.
- func HeaderValuesContainsToken(values []string, token string) bool {
- for _, v := range values {
- if headerValueContainsToken(v, token) {
- return true
- }
- }
- return false
- }
- // isOWS reports whether b is an optional whitespace byte, as defined
- // by RFC 7230 section 3.2.3.
- func isOWS(b byte) bool { return b == ' ' || b == '\t' }
- // trimOWS returns x with all optional whitespace removes from the
- // beginning and end.
- func trimOWS(x string) string {
- // TODO: consider using strings.Trim(x, " \t") instead,
- // if and when it's fast enough. See issue 10292.
- // But this ASCII-only code will probably always beat UTF-8
- // aware code.
- for len(x) > 0 && isOWS(x[0]) {
- x = x[1:]
- }
- for len(x) > 0 && isOWS(x[len(x)-1]) {
- x = x[:len(x)-1]
- }
- return x
- }
- // headerValueContainsToken reports whether v (assumed to be a
- // 0#element, in the ABNF extension described in RFC 7230 section 7)
- // contains token amongst its comma-separated tokens, ASCII
- // case-insensitively.
- func headerValueContainsToken(v string, token string) bool {
- v = trimOWS(v)
- if comma := strings.IndexByte(v, ','); comma != -1 {
- return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token)
- }
- return tokenEqual(v, token)
- }
- // lowerASCII returns the ASCII lowercase version of b.
- func lowerASCII(b byte) byte {
- if 'A' <= b && b <= 'Z' {
- return b + ('a' - 'A')
- }
- return b
- }
- // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
- func tokenEqual(t1, t2 string) bool {
- if len(t1) != len(t2) {
- return false
- }
- for i, b := range t1 {
- if b >= utf8.RuneSelf {
- // No UTF-8 or non-ASCII allowed in tokens.
- return false
- }
- if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
- return false
- }
- }
- return true
- }
- // isLWS reports whether b is linear white space, according
- // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
- // LWS = [CRLF] 1*( SP | HT )
- func isLWS(b byte) bool { return b == ' ' || b == '\t' }
- // isCTL reports whether b is a control byte, according
- // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
- // CTL = <any US-ASCII control character
- // (octets 0 - 31) and DEL (127)>
- func isCTL(b byte) bool {
- const del = 0x7f // a CTL
- return b < ' ' || b == del
- }
- // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
- // HTTP/2 imposes the additional restriction that uppercase ASCII
- // letters are not allowed.
- //
- // RFC 7230 says:
- // header-field = field-name ":" OWS field-value OWS
- // field-name = token
- // token = 1*tchar
- // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
- // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
- func ValidHeaderFieldName(v string) bool {
- if len(v) == 0 {
- return false
- }
- for _, r := range v {
- if !IsTokenRune(r) {
- return false
- }
- }
- return true
- }
- // ValidHostHeader reports whether h is a valid host header.
- func ValidHostHeader(h string) bool {
- // The latest spec is actually this:
- //
- // http://tools.ietf.org/html/rfc7230#section-5.4
- // Host = uri-host [ ":" port ]
- //
- // Where uri-host is:
- // http://tools.ietf.org/html/rfc3986#section-3.2.2
- //
- // But we're going to be much more lenient for now and just
- // search for any byte that's not a valid byte in any of those
- // expressions.
- for i := 0; i < len(h); i++ {
- if !validHostByte[h[i]] {
- return false
- }
- }
- return true
- }
- // See the validHostHeader comment.
- var validHostByte = [256]bool{
- '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
- '8': true, '9': true,
- 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
- 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
- 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
- 'y': true, 'z': true,
- 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
- 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
- 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
- 'Y': true, 'Z': true,
- '!': true, // sub-delims
- '$': true, // sub-delims
- '%': true, // pct-encoded (and used in IPv6 zones)
- '&': true, // sub-delims
- '(': true, // sub-delims
- ')': true, // sub-delims
- '*': true, // sub-delims
- '+': true, // sub-delims
- ',': true, // sub-delims
- '-': true, // unreserved
- '.': true, // unreserved
- ':': true, // IPv6address + Host expression's optional port
- ';': true, // sub-delims
- '=': true, // sub-delims
- '[': true,
- '\'': true, // sub-delims
- ']': true,
- '_': true, // unreserved
- '~': true, // unreserved
- }
- // ValidHeaderFieldValue reports whether v is a valid "field-value" according to
- // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
- //
- // message-header = field-name ":" [ field-value ]
- // field-value = *( field-content | LWS )
- // field-content = <the OCTETs making up the field-value
- // and consisting of either *TEXT or combinations
- // of token, separators, and quoted-string>
- //
- // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
- //
- // TEXT = <any OCTET except CTLs,
- // but including LWS>
- // LWS = [CRLF] 1*( SP | HT )
- // CTL = <any US-ASCII control character
- // (octets 0 - 31) and DEL (127)>
- //
- // RFC 7230 says:
- // field-value = *( field-content / obs-fold )
- // obj-fold = N/A to http2, and deprecated
- // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
- // field-vchar = VCHAR / obs-text
- // obs-text = %x80-FF
- // VCHAR = "any visible [USASCII] character"
- //
- // http2 further says: "Similarly, HTTP/2 allows header field values
- // that are not valid. While most of the values that can be encoded
- // will not alter header field parsing, carriage return (CR, ASCII
- // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
- // 0x0) might be exploited by an attacker if they are translated
- // verbatim. Any request or response that contains a character not
- // permitted in a header field value MUST be treated as malformed
- // (Section 8.1.2.6). Valid characters are defined by the
- // field-content ABNF rule in Section 3.2 of [RFC7230]."
- //
- // This function does not (yet?) properly handle the rejection of
- // strings that begin or end with SP or HTAB.
- func ValidHeaderFieldValue(v string) bool {
- for i := 0; i < len(v); i++ {
- b := v[i]
- if isCTL(b) && !isLWS(b) {
- return false
- }
- }
- return true
- }
- func isASCII(s string) bool {
- for i := 0; i < len(s); i++ {
- if s[i] >= utf8.RuneSelf {
- return false
- }
- }
- return true
- }
- // PunycodeHostPort returns the IDNA Punycode version
- // of the provided "host" or "host:port" string.
- func PunycodeHostPort(v string) (string, error) {
- if isASCII(v) {
- return v, nil
- }
- host, port, err := net.SplitHostPort(v)
- if err != nil {
- // The input 'v' argument was just a "host" argument,
- // without a port. This error should not be returned
- // to the caller.
- host = v
- port = ""
- }
- host, err = idna.ToASCII(host)
- if err != nil {
- // Non-UTF-8? Not representable in Punycode, in any
- // case.
- return "", err
- }
- if port == "" {
- return host, nil
- }
- return net.JoinHostPort(host, port), nil
- }
|