浏览代码

http/httpproxy: new package

This factors out the HTTP proxy functionality from net/http,
with a view to vendoring it into net/http later.

See discussion in https://go-review.googlesource.com/c/go/+/68091

Change-Id: I8df8a92a13bca03504edd24b71a9a184f290b87d
Reviewed-on: https://go-review.googlesource.com/76910
Reviewed-by: roger peppe <rogpeppe@gmail.com>
Roger Peppe 8 年之前
父节点
当前提交
c7086645de
共有 3 个文件被更改,包括 544 次插入0 次删除
  1. 7 0
      http/httpproxy/export_test.go
  2. 239 0
      http/httpproxy/proxy.go
  3. 298 0
      http/httpproxy/proxy_test.go

+ 7 - 0
http/httpproxy/export_test.go

@@ -0,0 +1,7 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package httpproxy
+
+var ExportUseProxy = (*Config).useProxy

+ 239 - 0
http/httpproxy/proxy.go

@@ -0,0 +1,239 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package httpproxy provides support for HTTP proxy determination
+// based on environment variables, as provided by net/http's
+// ProxyFromEnvironment function.
+//
+// The API is not subject to the Go 1 compatibility promise and may change at
+// any time.
+package httpproxy
+
+import (
+	"errors"
+	"fmt"
+	"net"
+	"net/url"
+	"os"
+	"strings"
+	"unicode/utf8"
+
+	"golang.org/x/net/idna"
+)
+
+// Config holds configuration for HTTP proxy settings. See
+// FromEnvironment for details.
+type Config struct {
+	// HTTPProxy represents the value of the HTTP_PROXY or
+	// http_proxy environment variable. It will be used as the proxy
+	// URL for HTTP requests and HTTPS requests unless overridden by
+	// HTTPSProxy or NoProxy.
+	HTTPProxy string
+
+	// HTTPSProxy represents the HTTPS_PROXY or https_proxy
+	// environment variable. It will be used as the proxy URL for
+	// HTTPS requests unless overridden by NoProxy.
+	HTTPSProxy string
+
+	// NoProxy represents the NO_PROXY or no_proxy environment
+	// variable. It specifies URLs that should be excluded from
+	// proxying as a comma-separated list of domain names or a
+	// single asterisk (*) to indicate that no proxying should be
+	// done. A domain name matches that name and all subdomains. A
+	// domain name with a leading "." matches subdomains only. For
+	// example "foo.com" matches "foo.com" and "bar.foo.com";
+	// ".y.com" matches "x.y.com" but not "y.com".
+	NoProxy string
+
+	// CGI holds whether the current process is running
+	// as a CGI handler (FromEnvironment infers this from the
+	// presence of a REQUEST_METHOD environment variable).
+	// When this is set, ProxyForURL will return an error
+	// when HTTPProxy applies, because a client could be
+	// setting HTTP_PROXY maliciously. See https://golang.org/s/cgihttpproxy.
+	CGI bool
+}
+
+// FromEnvironment returns a Config instance populated from the
+// environment variables HTTP_PROXY, HTTPS_PROXY and NO_PROXY (or the
+// lowercase versions thereof). HTTPS_PROXY takes precedence over
+// HTTP_PROXY for https requests.
+//
+// The environment values may be either a complete URL or a
+// "host[:port]", in which case the "http" scheme is assumed. An error
+// is returned if the value is a different form.
+func FromEnvironment() *Config {
+	return &Config{
+		HTTPProxy:  getEnvAny("HTTP_PROXY", "http_proxy"),
+		HTTPSProxy: getEnvAny("HTTPS_PROXY", "https_proxy"),
+		NoProxy:    getEnvAny("NO_PROXY", "no_proxy"),
+		CGI:        os.Getenv("REQUEST_METHOD") != "",
+	}
+}
+
+func getEnvAny(names ...string) string {
+	for _, n := range names {
+		if val := os.Getenv(n); val != "" {
+			return val
+		}
+	}
+	return ""
+}
+
+// ProxyFunc returns a function that determines the proxy URL to use for
+// a given request URL. Changing the contents of cfg will not affect
+// proxy functions created earlier.
+//
+// A nil URL and nil error are returned if no proxy is defined in the
+// environment, or a proxy should not be used for the given request, as
+// defined by NO_PROXY.
+//
+// As a special case, if req.URL.Host is "localhost" (with or without a
+// port number), then a nil URL and nil error will be returned.
+func (cfg *Config) ProxyFunc() func(reqURL *url.URL) (*url.URL, error) {
+	// Prevent Config changes from affecting the function calculation.
+	// TODO Preprocess proxy settings for more efficient evaluation.
+	cfg1 := *cfg
+	return cfg1.proxyForURL
+}
+
+func (cfg *Config) proxyForURL(reqURL *url.URL) (*url.URL, error) {
+	var proxy string
+	if reqURL.Scheme == "https" {
+		proxy = cfg.HTTPSProxy
+	}
+	if proxy == "" {
+		proxy = cfg.HTTPProxy
+		if proxy != "" && cfg.CGI {
+			return nil, errors.New("refusing to use HTTP_PROXY value in CGI environment; see golang.org/s/cgihttpproxy")
+		}
+	}
+	if proxy == "" {
+		return nil, nil
+	}
+	if !cfg.useProxy(canonicalAddr(reqURL)) {
+		return nil, nil
+	}
+	proxyURL, err := url.Parse(proxy)
+	if err != nil ||
+		(proxyURL.Scheme != "http" &&
+			proxyURL.Scheme != "https" &&
+			proxyURL.Scheme != "socks5") {
+		// proxy was bogus. Try prepending "http://" to it and
+		// see if that parses correctly. If not, we fall
+		// through and complain about the original one.
+		if proxyURL, err := url.Parse("http://" + proxy); err == nil {
+			return proxyURL, nil
+		}
+	}
+	if err != nil {
+		return nil, fmt.Errorf("invalid proxy address %q: %v", proxy, err)
+	}
+	return proxyURL, nil
+}
+
+// useProxy reports whether requests to addr should use a proxy,
+// according to the NO_PROXY or no_proxy environment variable.
+// addr is always a canonicalAddr with a host and port.
+func (cfg *Config) useProxy(addr string) bool {
+	if len(addr) == 0 {
+		return true
+	}
+	host, _, err := net.SplitHostPort(addr)
+	if err != nil {
+		return false
+	}
+	if host == "localhost" {
+		return false
+	}
+	if ip := net.ParseIP(host); ip != nil {
+		if ip.IsLoopback() {
+			return false
+		}
+	}
+
+	noProxy := cfg.NoProxy
+	if noProxy == "*" {
+		return false
+	}
+
+	addr = strings.ToLower(strings.TrimSpace(addr))
+	if hasPort(addr) {
+		addr = addr[:strings.LastIndex(addr, ":")]
+	}
+
+	for _, p := range strings.Split(noProxy, ",") {
+		p = strings.ToLower(strings.TrimSpace(p))
+		if len(p) == 0 {
+			continue
+		}
+		if hasPort(p) {
+			p = p[:strings.LastIndex(p, ":")]
+		}
+		if addr == p {
+			return false
+		}
+		if len(p) == 0 {
+			// There is no host part, likely the entry is malformed; ignore.
+			continue
+		}
+		if p[0] == '.' && (strings.HasSuffix(addr, p) || addr == p[1:]) {
+			// no_proxy ".foo.com" matches "bar.foo.com" or "foo.com"
+			return false
+		}
+		if p[0] != '.' && strings.HasSuffix(addr, p) && addr[len(addr)-len(p)-1] == '.' {
+			// no_proxy "foo.com" matches "bar.foo.com"
+			return false
+		}
+	}
+	return true
+}
+
+var portMap = map[string]string{
+	"http":   "80",
+	"https":  "443",
+	"socks5": "1080",
+}
+
+// canonicalAddr returns url.Host but always with a ":port" suffix
+func canonicalAddr(url *url.URL) string {
+	addr := url.Hostname()
+	if v, err := idnaASCII(addr); err == nil {
+		addr = v
+	}
+	port := url.Port()
+	if port == "" {
+		port = portMap[url.Scheme]
+	}
+	return net.JoinHostPort(addr, port)
+}
+
+// Given a string of the form "host", "host:port", or "[ipv6::address]:port",
+// return true if the string includes a port.
+func hasPort(s string) bool { return strings.LastIndex(s, ":") > strings.LastIndex(s, "]") }
+
+func idnaASCII(v string) (string, error) {
+	// TODO: Consider removing this check after verifying performance is okay.
+	// Right now punycode verification, length checks, context checks, and the
+	// permissible character tests are all omitted. It also prevents the ToASCII
+	// call from salvaging an invalid IDN, when possible. As a result it may be
+	// possible to have two IDNs that appear identical to the user where the
+	// ASCII-only version causes an error downstream whereas the non-ASCII
+	// version does not.
+	// Note that for correct ASCII IDNs ToASCII will only do considerably more
+	// work, but it will not cause an allocation.
+	if isASCII(v) {
+		return v, nil
+	}
+	return idna.Lookup.ToASCII(v)
+}
+
+func isASCII(s string) bool {
+	for i := 0; i < len(s); i++ {
+		if s[i] >= utf8.RuneSelf {
+			return false
+		}
+	}
+	return true
+}

+ 298 - 0
http/httpproxy/proxy_test.go

@@ -0,0 +1,298 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package httpproxy_test
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"net/url"
+	"os"
+	"strings"
+	"testing"
+
+	"golang.org/x/net/http/httpproxy"
+)
+
+type proxyForURLTest struct {
+	cfg     httpproxy.Config
+	req     string // URL to fetch; blank means "http://example.com"
+	want    string
+	wanterr error
+}
+
+func (t proxyForURLTest) String() string {
+	var buf bytes.Buffer
+	space := func() {
+		if buf.Len() > 0 {
+			buf.WriteByte(' ')
+		}
+	}
+	if t.cfg.HTTPProxy != "" {
+		fmt.Fprintf(&buf, "http_proxy=%q", t.cfg.HTTPProxy)
+	}
+	if t.cfg.HTTPSProxy != "" {
+		space()
+		fmt.Fprintf(&buf, "https_proxy=%q", t.cfg.HTTPSProxy)
+	}
+	if t.cfg.NoProxy != "" {
+		space()
+		fmt.Fprintf(&buf, "no_proxy=%q", t.cfg.NoProxy)
+	}
+	req := "http://example.com"
+	if t.req != "" {
+		req = t.req
+	}
+	space()
+	fmt.Fprintf(&buf, "req=%q", req)
+	return strings.TrimSpace(buf.String())
+}
+
+var proxyForURLTests = []proxyForURLTest{{
+	cfg: httpproxy.Config{
+		HTTPProxy: "127.0.0.1:8080",
+	},
+	want: "http://127.0.0.1:8080",
+}, {
+	cfg: httpproxy.Config{
+		HTTPProxy: "cache.corp.example.com:1234",
+	},
+	want: "http://cache.corp.example.com:1234",
+}, {
+	cfg: httpproxy.Config{
+		HTTPProxy: "cache.corp.example.com",
+	},
+	want: "http://cache.corp.example.com",
+}, {
+	cfg: httpproxy.Config{
+		HTTPProxy: "https://cache.corp.example.com",
+	},
+	want: "https://cache.corp.example.com",
+}, {
+	cfg: httpproxy.Config{
+		HTTPProxy: "http://127.0.0.1:8080",
+	},
+	want: "http://127.0.0.1:8080",
+}, {
+	cfg: httpproxy.Config{
+		HTTPProxy: "https://127.0.0.1:8080",
+	},
+	want: "https://127.0.0.1:8080",
+}, {
+	cfg: httpproxy.Config{
+		HTTPProxy: "socks5://127.0.0.1",
+	},
+	want: "socks5://127.0.0.1",
+}, {
+	// Don't use secure for http
+	cfg: httpproxy.Config{
+		HTTPProxy:  "http.proxy.tld",
+		HTTPSProxy: "secure.proxy.tld",
+	},
+	req:  "http://insecure.tld/",
+	want: "http://http.proxy.tld",
+}, {
+	// Use secure for https.
+	cfg: httpproxy.Config{
+		HTTPProxy:  "http.proxy.tld",
+		HTTPSProxy: "secure.proxy.tld",
+	},
+	req:  "https://secure.tld/",
+	want: "http://secure.proxy.tld",
+}, {
+	cfg: httpproxy.Config{
+		HTTPProxy:  "http.proxy.tld",
+		HTTPSProxy: "https://secure.proxy.tld",
+	},
+	req:  "https://secure.tld/",
+	want: "https://secure.proxy.tld",
+}, {
+	// Issue 16405: don't use HTTP_PROXY in a CGI environment,
+	// where HTTP_PROXY can be attacker-controlled.
+	cfg: httpproxy.Config{
+		HTTPProxy: "http://10.1.2.3:8080",
+		CGI:       true,
+	},
+	want:    "<nil>",
+	wanterr: errors.New("refusing to use HTTP_PROXY value in CGI environment; see golang.org/s/cgihttpproxy"),
+}, {
+	// HTTPS proxy is still used even in CGI environment.
+	// (perhaps dubious but it's the historical behaviour).
+	cfg: httpproxy.Config{
+		HTTPSProxy: "https://secure.proxy.tld",
+		CGI:        true,
+	},
+	req:  "https://secure.tld/",
+	want: "https://secure.proxy.tld",
+}, {
+	want: "<nil>",
+}, {
+	cfg: httpproxy.Config{
+		NoProxy:   "example.com",
+		HTTPProxy: "proxy",
+	},
+	req:  "http://example.com/",
+	want: "<nil>",
+}, {
+	cfg: httpproxy.Config{
+		NoProxy:   ".example.com",
+		HTTPProxy: "proxy",
+	},
+	req:  "http://example.com/",
+	want: "<nil>",
+}, {
+	cfg: httpproxy.Config{
+		NoProxy:   "ample.com",
+		HTTPProxy: "proxy",
+	},
+	req:  "http://example.com/",
+	want: "http://proxy",
+}, {
+	cfg: httpproxy.Config{
+		NoProxy:   "example.com",
+		HTTPProxy: "proxy",
+	},
+	req:  "http://foo.example.com/",
+	want: "<nil>",
+}, {
+	cfg: httpproxy.Config{
+		NoProxy:   ".foo.com",
+		HTTPProxy: "proxy",
+	},
+	req:  "http://example.com/",
+	want: "http://proxy",
+}}
+
+func testProxyForURL(t *testing.T, tt proxyForURLTest) {
+	t.Helper()
+	reqURLStr := tt.req
+	if reqURLStr == "" {
+		reqURLStr = "http://example.com"
+	}
+	reqURL, err := url.Parse(reqURLStr)
+	if err != nil {
+		t.Errorf("invalid URL %q", reqURLStr)
+		return
+	}
+	cfg := tt.cfg
+	proxyForURL := cfg.ProxyFunc()
+	url, err := proxyForURL(reqURL)
+	if g, e := fmt.Sprintf("%v", err), fmt.Sprintf("%v", tt.wanterr); g != e {
+		t.Errorf("%v: got error = %q, want %q", tt, g, e)
+		return
+	}
+	if got := fmt.Sprintf("%s", url); got != tt.want {
+		t.Errorf("%v: got URL = %q, want %q", tt, url, tt.want)
+	}
+
+	// Check that changing the Config doesn't change the results
+	// of the functuon.
+	cfg = httpproxy.Config{}
+	url, err = proxyForURL(reqURL)
+	if g, e := fmt.Sprintf("%v", err), fmt.Sprintf("%v", tt.wanterr); g != e {
+		t.Errorf("(after mutating config) %v: got error = %q, want %q", tt, g, e)
+		return
+	}
+	if got := fmt.Sprintf("%s", url); got != tt.want {
+		t.Errorf("(after mutating config) %v: got URL = %q, want %q", tt, url, tt.want)
+	}
+}
+
+func TestProxyForURL(t *testing.T) {
+	for _, tt := range proxyForURLTests {
+		testProxyForURL(t, tt)
+	}
+}
+
+func TestFromEnvironment(t *testing.T) {
+	os.Setenv("HTTP_PROXY", "httpproxy")
+	os.Setenv("HTTPS_PROXY", "httpsproxy")
+	os.Setenv("NO_PROXY", "noproxy")
+	os.Setenv("REQUEST_METHOD", "")
+	got := httpproxy.FromEnvironment()
+	want := httpproxy.Config{
+		HTTPProxy:  "httpproxy",
+		HTTPSProxy: "httpsproxy",
+		NoProxy:    "noproxy",
+	}
+	if *got != want {
+		t.Errorf("unexpected proxy config, got %#v want %#v", got, want)
+	}
+}
+
+func TestFromEnvironmentWithRequestMethod(t *testing.T) {
+	os.Setenv("HTTP_PROXY", "httpproxy")
+	os.Setenv("HTTPS_PROXY", "httpsproxy")
+	os.Setenv("NO_PROXY", "noproxy")
+	os.Setenv("REQUEST_METHOD", "PUT")
+	got := httpproxy.FromEnvironment()
+	want := httpproxy.Config{
+		HTTPProxy:  "httpproxy",
+		HTTPSProxy: "httpsproxy",
+		NoProxy:    "noproxy",
+		CGI:        true,
+	}
+	if *got != want {
+		t.Errorf("unexpected proxy config, got %#v want %#v", got, want)
+	}
+}
+
+func TestFromEnvironmentLowerCase(t *testing.T) {
+	os.Setenv("http_proxy", "httpproxy")
+	os.Setenv("https_proxy", "httpsproxy")
+	os.Setenv("no_proxy", "noproxy")
+	os.Setenv("REQUEST_METHOD", "")
+	got := httpproxy.FromEnvironment()
+	want := httpproxy.Config{
+		HTTPProxy:  "httpproxy",
+		HTTPSProxy: "httpsproxy",
+		NoProxy:    "noproxy",
+	}
+	if *got != want {
+		t.Errorf("unexpected proxy config, got %#v want %#v", got, want)
+	}
+}
+
+var UseProxyTests = []struct {
+	host  string
+	match bool
+}{
+	// Never proxy localhost:
+	{"localhost", false},
+	{"127.0.0.1", false},
+	{"127.0.0.2", false},
+	{"[::1]", false},
+	{"[::2]", true}, // not a loopback address
+
+	{"barbaz.net", false},     // match as .barbaz.net
+	{"foobar.com", false},     // have a port but match
+	{"foofoobar.com", true},   // not match as a part of foobar.com
+	{"baz.com", true},         // not match as a part of barbaz.com
+	{"localhost.net", true},   // not match as suffix of address
+	{"local.localhost", true}, // not match as prefix as address
+	{"barbarbaz.net", true},   // not match because NO_PROXY have a '.'
+	{"www.foobar.com", false}, // match because NO_PROXY includes "foobar.com"
+}
+
+func TestUseProxy(t *testing.T) {
+	cfg := &httpproxy.Config{
+		NoProxy: "foobar.com, .barbaz.net",
+	}
+	for _, test := range UseProxyTests {
+		if httpproxy.ExportUseProxy(cfg, test.host+":80") != test.match {
+			t.Errorf("useProxy(%v) = %v, want %v", test.host, !test.match, test.match)
+		}
+	}
+}
+
+func TestInvalidNoProxy(t *testing.T) {
+	cfg := &httpproxy.Config{
+		NoProxy: ":1",
+	}
+	ok := httpproxy.ExportUseProxy(cfg, "example.com:80") // should not panic
+	if !ok {
+		t.Errorf("useProxy unexpected return; got false; want true")
+	}
+}