Browse Source

Merge pull request #8690 from tavish-stripe/tavish/svc-flag

pkg/srv, embed, etcdmain: Support multiple clusters in the same DNS domain
Sam Batschelet 8 years ago
parent
commit
7dd9c30ae7

+ 7 - 0
Documentation/op-guide/clustering.md

@@ -359,6 +359,13 @@ If `_etcd-client-ssl._tcp.example.com` is found, clients will attempt to communi
 
 If etcd is using TLS without a custom certificate authority, the discovery domain (e.g., example.com) must match the SRV record domain (e.g., infra1.example.com). This is to mitigate attacks that forge SRV records to point to a different domain; the domain would have a valid certificate under PKI but be controlled by an unknown third party.
 
+The `-discovery-srv-name` flag additionally configures a suffix to the SRV name that is queried during discovery.
+Use this flag to differentiate between multiple etcd clusters under the same domain.
+For example, if `discovery-srv=example.com` and `-discovery-srv-name=foo` are set, the following DNS SRV queries are made:
+
+* _etcd-server-ssl-foo._tcp.example.com
+* _etcd-server-foo._tcp.example.com
+
 #### Create DNS SRV records
 
 ```

+ 5 - 0
Documentation/op-guide/configuration.md

@@ -155,6 +155,11 @@ To start etcd automatically using custom settings at startup in Linux, using a [
 + default: ""
 + env variable: ETCD_DISCOVERY_SRV
 
+### --discovery-srv-name
++ Suffix to the DNS srv name queried when bootstrapping using DNS.
++ default: ""
++ env variable: ETCD_DISCOVERY_SRV_NAME
+
 ### --discovery-fallback
 + Expected behavior ("exit" or "proxy") when discovery services fails. "proxy" supports v2 API only.
 + default: "proxy"

+ 7 - 0
Documentation/v2/clustering.md

@@ -292,6 +292,13 @@ To help clients discover the etcd cluster, the following DNS SRV records are loo
 
 If `_etcd-client-ssl._tcp.example.com` is found, clients will attempt to communicate with the etcd cluster over SSL.
 
+The `-discovery-srv-name` flag additionally configures a suffix to the SRV name that is queried during discovery.
+Use this flag to differentiate between multiple etcd clusters under the same domain.
+For example, if `discovery-srv=example.com` and `-discovery-srv-name=foo` are set, the following DNS SRV queries are made:
+
+* _etcd-server-ssl-foo._tcp.example.com
+* _etcd-server-foo._tcp.example.com
+
 #### Create DNS SRV records
 
 ```

+ 5 - 0
Documentation/v2/configuration.md

@@ -127,6 +127,11 @@ To start etcd automatically using custom settings at startup in Linux, using a [
 + default: none
 + env variable: ETCD_DISCOVERY_SRV
 
+### --discovery-srv-name
++ Suffix to the DNS srv name queried when bootstrapping using DNS.
++ default: none
++ env variable: ETCD_DISCOVERY_SRV_NAME
+
 ### --discovery-fallback
 + Expected behavior ("exit" or "proxy") when discovery services fails.
 + default: "proxy"

+ 34 - 10
embed/config.go

@@ -129,15 +129,16 @@ type Config struct {
 
 	// clustering
 
-	APUrls, ACUrls      []url.URL
-	ClusterState        string `json:"initial-cluster-state"`
-	DNSCluster          string `json:"discovery-srv"`
-	Dproxy              string `json:"discovery-proxy"`
-	Durl                string `json:"discovery"`
-	InitialCluster      string `json:"initial-cluster"`
-	InitialClusterToken string `json:"initial-cluster-token"`
-	StrictReconfigCheck bool   `json:"strict-reconfig-check"`
-	EnableV2            bool   `json:"enable-v2"`
+	APUrls, ACUrls        []url.URL
+	ClusterState          string `json:"initial-cluster-state"`
+	DNSCluster            string `json:"discovery-srv"`
+	DNSClusterServiceName string `json:"discovery-srv-name"`
+	Dproxy                string `json:"discovery-proxy"`
+	Durl                  string `json:"discovery"`
+	InitialCluster        string `json:"initial-cluster"`
+	InitialClusterToken   string `json:"initial-cluster-token"`
+	StrictReconfigCheck   bool   `json:"strict-reconfig-check"`
+	EnableV2              bool   `json:"enable-v2"`
 
 	// security
 
@@ -463,7 +464,8 @@ func (cfg *Config) PeerURLsMapAndToken(which string) (urlsmap types.URLsMap, tok
 		urlsmap[cfg.Name] = cfg.APUrls
 		token = cfg.Durl
 	case cfg.DNSCluster != "":
-		clusterStrs, cerr := srv.GetCluster("etcd-server", cfg.Name, cfg.DNSCluster, cfg.APUrls)
+		clusterStrs, cerr := cfg.GetDNSClusterNames()
+
 		if cerr != nil {
 			plog.Errorf("couldn't resolve during SRV discovery (%v)", cerr)
 			return nil, "", cerr
@@ -490,6 +492,28 @@ func (cfg *Config) PeerURLsMapAndToken(which string) (urlsmap types.URLsMap, tok
 	return urlsmap, token, err
 }
 
+// GetDNSClusterNames uses DNS SRV records to get a list of initial nodes for cluster bootstrapping.
+func (cfg *Config) GetDNSClusterNames() ([]string, error) {
+	var (
+		clusterStrs       []string
+		cerr              error
+		serviceNameSuffix string
+	)
+	if cfg.DNSClusterServiceName != "" {
+		serviceNameSuffix = "-" + cfg.DNSClusterServiceName
+	}
+	// Use both etcd-server-ssl and etcd-server for discovery. Combine the results if both are available.
+	clusterStrs, cerr = srv.GetCluster("https", "etcd-server-ssl"+serviceNameSuffix, cfg.Name, cfg.DNSCluster, cfg.APUrls)
+	defaultHTTPClusterStrs, httpCerr := srv.GetCluster("http", "etcd-server"+serviceNameSuffix, cfg.Name, cfg.DNSCluster, cfg.APUrls)
+	if cerr != nil {
+		clusterStrs = make([]string, 0)
+	}
+	if httpCerr != nil {
+		clusterStrs = append(clusterStrs, defaultHTTPClusterStrs...)
+	}
+	return clusterStrs, cerr
+}
+
 func (cfg Config) InitialClusterFromName(name string) (ret string) {
 	if len(cfg.APUrls) == 0 {
 		return ""

+ 2 - 1
etcdmain/config.go

@@ -155,6 +155,7 @@ func newConfig() *config {
 
 	fs.StringVar(&cfg.ec.Dproxy, "discovery-proxy", cfg.ec.Dproxy, "HTTP proxy to use for traffic to discovery service.")
 	fs.StringVar(&cfg.ec.DNSCluster, "discovery-srv", cfg.ec.DNSCluster, "DNS domain used to bootstrap initial cluster.")
+	fs.StringVar(&cfg.ec.DNSClusterServiceName, "discovery-srv-name", cfg.ec.DNSClusterServiceName, "Service name to query when using DNS discovery.")
 	fs.StringVar(&cfg.ec.InitialCluster, "initial-cluster", cfg.ec.InitialCluster, "Initial cluster configuration for bootstrapping.")
 	fs.StringVar(&cfg.ec.InitialClusterToken, "initial-cluster-token", cfg.ec.InitialClusterToken, "Initial cluster token for the etcd cluster during bootstrap.")
 	fs.Var(cfg.cf.clusterState, "initial-cluster-state", "Initial cluster state ('new' or 'existing').")
@@ -285,7 +286,7 @@ func (cfg *config) configFromCmdLine() error {
 	}
 
 	// disable default initial-cluster if discovery is set
-	if (cfg.ec.Durl != "" || cfg.ec.DNSCluster != "") && !flags.IsSet(cfg.cf.flagSet, "initial-cluster") {
+	if (cfg.ec.Durl != "" || cfg.ec.DNSCluster != "" || cfg.ec.DNSClusterServiceName != "") && !flags.IsSet(cfg.cf.flagSet, "initial-cluster") {
 		cfg.ec.InitialCluster = ""
 	}
 

+ 3 - 14
pkg/srv/srv.go

@@ -32,7 +32,7 @@ var (
 
 // GetCluster gets the cluster information via DNS discovery.
 // Also sees each entry as a separate instance.
-func GetCluster(service, name, dns string, apurls types.URLs) ([]string, error) {
+func GetCluster(serviceScheme, service, name, dns string, apurls types.URLs) ([]string, error) {
 	tempName := int(0)
 	tcp2ap := make(map[string]url.URL)
 
@@ -83,20 +83,9 @@ func GetCluster(service, name, dns string, apurls types.URLs) ([]string, error)
 		return nil
 	}
 
-	failCount := 0
-	err := updateNodeMap(service+"-ssl", "https")
-	srvErr := make([]string, 2)
+	err := updateNodeMap(service, serviceScheme)
 	if err != nil {
-		srvErr[0] = fmt.Sprintf("error querying DNS SRV records for _%s-ssl %s", service, err)
-		failCount++
-	}
-	err = updateNodeMap(service, "http")
-	if err != nil {
-		srvErr[1] = fmt.Sprintf("error querying DNS SRV records for _%s %s", service, err)
-		failCount++
-	}
-	if failCount == 2 {
-		return nil, fmt.Errorf("srv: too many errors querying DNS SRV records (%q, %q)", srvErr[0], srvErr[1])
+		return nil, fmt.Errorf("error querying DNS SRV records for _%s %s", service, err)
 	}
 	return stringParts, nil
 }

+ 13 - 19
pkg/srv/srv_test.go

@@ -44,51 +44,51 @@ func TestSRVGetCluster(t *testing.T) {
 	}
 
 	tests := []struct {
-		withSSL    []*net.SRV
-		withoutSSL []*net.SRV
-		urls       []string
+		scheme  string
+		records []*net.SRV
+		urls    []string
 
 		expected string
 	}{
 		{
-			[]*net.SRV{},
+			"https",
 			[]*net.SRV{},
 			nil,
 
 			"",
 		},
 		{
+			"https",
 			srvAll,
-			[]*net.SRV{},
 			nil,
 
 			"0=https://1.example.com:2480,1=https://2.example.com:2480,2=https://3.example.com:2480",
 		},
 		{
+			"http",
 			srvAll,
-			[]*net.SRV{{Target: "4.example.com.", Port: 2380}},
 			nil,
 
-			"0=https://1.example.com:2480,1=https://2.example.com:2480,2=https://3.example.com:2480,3=http://4.example.com:2380",
+			"0=http://1.example.com:2480,1=http://2.example.com:2480,2=http://3.example.com:2480",
 		},
 		{
+			"https",
 			srvAll,
-			[]*net.SRV{{Target: "4.example.com.", Port: 2380}},
 			[]string{"https://10.0.0.1:2480"},
 
-			"dnsClusterTest=https://1.example.com:2480,0=https://2.example.com:2480,1=https://3.example.com:2480,2=http://4.example.com:2380",
+			"dnsClusterTest=https://1.example.com:2480,0=https://2.example.com:2480,1=https://3.example.com:2480",
 		},
 		// matching local member with resolved addr and return unresolved hostnames
 		{
+			"https",
 			srvAll,
-			nil,
 			[]string{"https://10.0.0.1:2480"},
 
 			"dnsClusterTest=https://1.example.com:2480,0=https://2.example.com:2480,1=https://3.example.com:2480",
 		},
 		// reject if apurls are TLS but SRV is only http
 		{
-			nil,
+			"http",
 			srvAll,
 			[]string{"https://10.0.0.1:2480"},
 
@@ -109,16 +109,10 @@ func TestSRVGetCluster(t *testing.T) {
 
 	for i, tt := range tests {
 		lookupSRV = func(service string, proto string, domain string) (string, []*net.SRV, error) {
-			if service == "etcd-server-ssl" {
-				return "", tt.withSSL, nil
-			}
-			if service == "etcd-server" {
-				return "", tt.withoutSSL, nil
-			}
-			return "", nil, errors.New("Unknown service in mock")
+			return "", tt.records, nil
 		}
 		urls := testutil.MustNewURLs(t, tt.urls)
-		str, err := GetCluster("etcd-server", name, "example.com", urls)
+		str, err := GetCluster(tt.scheme, "etcd-server", name, "example.com", urls)
 		if err != nil {
 			t.Fatalf("%d: err: %#v", i, err)
 		}