Преглед изворни кода

Merge pull request #7008 from heyitsanthony/fix-dns

retry on resolution failure for advertised peer DNS check
Anthony Romano пре 9 година
родитељ
комит
17873f7be8

+ 24 - 21
etcdserver/config.go

@@ -21,6 +21,8 @@ import (
 	"strings"
 	"time"
 
+	"golang.org/x/net/context"
+
 	"github.com/coreos/etcd/pkg/netutil"
 	"github.com/coreos/etcd/pkg/transport"
 	"github.com/coreos/etcd/pkg/types"
@@ -62,7 +64,10 @@ type ServerConfig struct {
 // VerifyBootstrap sanity-checks the initial config for bootstrap case
 // and returns an error for things that should never happen.
 func (c *ServerConfig) VerifyBootstrap() error {
-	if err := c.verifyLocalMember(true); err != nil {
+	if err := c.hasLocalMember(); err != nil {
+		return err
+	}
+	if err := c.advertiseMatchesCluster(); err != nil {
 		return err
 	}
 	if checkDuplicateURL(c.InitialPeerURLsMap) {
@@ -77,10 +82,9 @@ func (c *ServerConfig) VerifyBootstrap() error {
 // VerifyJoinExisting sanity-checks the initial config for join existing cluster
 // case and returns an error for things that should never happen.
 func (c *ServerConfig) VerifyJoinExisting() error {
-	// no need for strict checking since the member have announced its
-	// peer urls to the cluster before starting and do not have to set
-	// it in the configuration again.
-	if err := c.verifyLocalMember(false); err != nil {
+	// The member has announced its peer urls to the cluster before starting; no need to
+	// set the configuration again.
+	if err := c.hasLocalMember(); err != nil {
 		return err
 	}
 	if checkDuplicateURL(c.InitialPeerURLsMap) {
@@ -92,25 +96,24 @@ func (c *ServerConfig) VerifyJoinExisting() error {
 	return nil
 }
 
-// verifyLocalMember verifies the configured member is in configured
-// cluster. If strict is set, it also verifies the configured member
-// has the same peer urls as configured advertised peer urls.
-func (c *ServerConfig) verifyLocalMember(strict bool) error {
-	urls := c.InitialPeerURLsMap[c.Name]
-	// Make sure the cluster at least contains the local server.
-	if urls == nil {
+// hasLocalMember checks that the cluster at least contains the local server.
+func (c *ServerConfig) hasLocalMember() error {
+	if urls := c.InitialPeerURLsMap[c.Name]; urls == nil {
 		return fmt.Errorf("couldn't find local name %q in the initial cluster configuration", c.Name)
 	}
+	return nil
+}
 
-	if strict {
-		// Advertised peer URLs must match those in the cluster peer list
-		apurls := c.PeerURLs.StringSlice()
-		sort.Strings(apurls)
-		urls.Sort()
-		if !netutil.URLStringsEqual(apurls, urls.StringSlice()) {
-			umap := map[string]types.URLs{c.Name: c.PeerURLs}
-			return fmt.Errorf("--initial-cluster must include %s given --initial-advertise-peer-urls=%s", types.URLsMap(umap).String(), strings.Join(apurls, ","))
-		}
+// advertiseMatchesCluster confirms peer URLs match those in the cluster peer list.
+func (c *ServerConfig) advertiseMatchesCluster() error {
+	urls, apurls := c.InitialPeerURLsMap[c.Name], c.PeerURLs.StringSlice()
+	urls.Sort()
+	sort.Strings(apurls)
+	ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
+	defer cancel()
+	if !netutil.URLStringsEqual(ctx, apurls, urls.StringSlice()) {
+		umap := map[string]types.URLs{c.Name: c.PeerURLs}
+		return fmt.Errorf("--initial-cluster must include %s given --initial-advertise-peer-urls=%s", types.URLsMap(umap).String(), strings.Join(apurls, ","))
 	}
 	return nil
 }

+ 3 - 1
etcdserver/config_test.go

@@ -137,7 +137,9 @@ func TestConfigVerifyLocalMember(t *testing.T) {
 		if tt.apurls != nil {
 			cfg.PeerURLs = mustNewURLs(t, tt.apurls)
 		}
-		err = cfg.verifyLocalMember(tt.strict)
+		if err = cfg.hasLocalMember(); err == nil && tt.strict {
+			err = cfg.advertiseMatchesCluster()
+		}
 		if (err == nil) && tt.shouldError {
 			t.Errorf("#%d: Got no error where one was expected", i)
 		}

+ 6 - 1
etcdserver/membership/cluster.go

@@ -24,6 +24,9 @@ import (
 	"sort"
 	"strings"
 	"sync"
+	"time"
+
+	"golang.org/x/net/context"
 
 	"github.com/coreos/etcd/mvcc/backend"
 	"github.com/coreos/etcd/pkg/netutil"
@@ -484,8 +487,10 @@ func ValidateClusterAndAssignIDs(local *RaftCluster, existing *RaftCluster) erro
 	sort.Sort(MembersByPeerURLs(ems))
 	sort.Sort(MembersByPeerURLs(lms))
 
+	ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
+	defer cancel()
 	for i := range ems {
-		if !netutil.URLStringsEqual(ems[i].PeerURLs, lms[i].PeerURLs) {
+		if !netutil.URLStringsEqual(ctx, ems[i].PeerURLs, lms[i].PeerURLs) {
 			return fmt.Errorf("unmatched member while checking PeerURLs")
 		}
 		lms[i].ID = ems[i].ID

+ 39 - 19
pkg/netutil/netutil.go

@@ -20,6 +20,9 @@ import (
 	"net/url"
 	"reflect"
 	"sort"
+	"time"
+
+	"golang.org/x/net/context"
 
 	"github.com/coreos/etcd/pkg/types"
 	"github.com/coreos/pkg/capnslog"
@@ -32,10 +35,12 @@ var (
 	resolveTCPAddr = net.ResolveTCPAddr
 )
 
+const retryInterval = time.Second
+
 // resolveTCPAddrs is a convenience wrapper for net.ResolveTCPAddr.
 // resolveTCPAddrs return a new set of url.URLs, in which all DNS hostnames
 // are resolved.
-func resolveTCPAddrs(urls [][]url.URL) ([][]url.URL, error) {
+func resolveTCPAddrs(ctx context.Context, urls [][]url.URL) ([][]url.URL, error) {
 	newurls := make([][]url.URL, 0)
 	for _, us := range urls {
 		nus := make([]url.URL, len(us))
@@ -47,37 +52,52 @@ func resolveTCPAddrs(urls [][]url.URL) ([][]url.URL, error) {
 			nus[i] = *nu
 		}
 		for i, u := range nus {
-			host, _, err := net.SplitHostPort(u.Host)
+			h, err := resolveURL(ctx, u)
 			if err != nil {
-				plog.Errorf("could not parse url %s during tcp resolving", u.Host)
 				return nil, err
 			}
-			if host == "localhost" {
-				continue
-			}
-			if net.ParseIP(host) != nil {
-				continue
-			}
-			tcpAddr, err := resolveTCPAddr("tcp", u.Host)
-			if err != nil {
-				plog.Errorf("could not resolve host %s", u.Host)
-				return nil, err
+			if h != "" {
+				nus[i].Host = h
 			}
-			plog.Infof("resolving %s to %s", u.Host, tcpAddr.String())
-			nus[i].Host = tcpAddr.String()
 		}
 		newurls = append(newurls, nus)
 	}
 	return newurls, nil
 }
 
+func resolveURL(ctx context.Context, u url.URL) (string, error) {
+	for ctx.Err() == nil {
+		host, _, err := net.SplitHostPort(u.Host)
+		if err != nil {
+			plog.Errorf("could not parse url %s during tcp resolving", u.Host)
+			return "", err
+		}
+		if host == "localhost" || net.ParseIP(host) != nil {
+			return "", nil
+		}
+		tcpAddr, err := resolveTCPAddr("tcp", u.Host)
+		if err == nil {
+			plog.Infof("resolving %s to %s", u.Host, tcpAddr.String())
+			return tcpAddr.String(), nil
+		}
+		plog.Warningf("failed resolving host %s (%v); retrying in %v", u.Host, err, retryInterval)
+		select {
+		case <-ctx.Done():
+			plog.Errorf("could not resolve host %s", u.Host)
+			return "", err
+		case <-time.After(retryInterval):
+		}
+	}
+	return "", ctx.Err()
+}
+
 // urlsEqual checks equality of url.URLS between two arrays.
 // This check pass even if an URL is in hostname and opposite is in IP address.
-func urlsEqual(a []url.URL, b []url.URL) bool {
+func urlsEqual(ctx context.Context, a []url.URL, b []url.URL) bool {
 	if len(a) != len(b) {
 		return false
 	}
-	urls, err := resolveTCPAddrs([][]url.URL{a, b})
+	urls, err := resolveTCPAddrs(ctx, [][]url.URL{a, b})
 	if err != nil {
 		return false
 	}
@@ -93,7 +113,7 @@ func urlsEqual(a []url.URL, b []url.URL) bool {
 	return true
 }
 
-func URLStringsEqual(a []string, b []string) bool {
+func URLStringsEqual(ctx context.Context, a []string, b []string) bool {
 	if len(a) != len(b) {
 		return false
 	}
@@ -114,7 +134,7 @@ func URLStringsEqual(a []string, b []string) bool {
 		urlsB = append(urlsB, *u)
 	}
 
-	return urlsEqual(urlsA, urlsB)
+	return urlsEqual(ctx, urlsA, urlsB)
 }
 
 func IsNetworkTimeoutError(err error) bool {

+ 8 - 3
pkg/netutil/netutil_test.go

@@ -21,6 +21,9 @@ import (
 	"reflect"
 	"strconv"
 	"testing"
+	"time"
+
+	"golang.org/x/net/context"
 )
 
 func TestResolveTCPAddrs(t *testing.T) {
@@ -124,7 +127,9 @@ func TestResolveTCPAddrs(t *testing.T) {
 			}
 			return &net.TCPAddr{IP: net.ParseIP(tt.hostMap[host]), Port: i, Zone: ""}, nil
 		}
-		urls, err := resolveTCPAddrs(tt.urls)
+		ctx, cancel := context.WithTimeout(context.TODO(), time.Second)
+		urls, err := resolveTCPAddrs(ctx, tt.urls)
+		cancel()
 		if tt.hasError {
 			if err == nil {
 				t.Errorf("expected error")
@@ -244,14 +249,14 @@ func TestURLsEqual(t *testing.T) {
 	}
 
 	for _, test := range tests {
-		result := urlsEqual(test.a, test.b)
+		result := urlsEqual(context.TODO(), test.a, test.b)
 		if result != test.expect {
 			t.Errorf("a:%v b:%v, expected %v but %v", test.a, test.b, test.expect, result)
 		}
 	}
 }
 func TestURLStringsEqual(t *testing.T) {
-	result := URLStringsEqual([]string{"http://127.0.0.1:8080"}, []string{"http://127.0.0.1:8080"})
+	result := URLStringsEqual(context.TODO(), []string{"http://127.0.0.1:8080"}, []string{"http://127.0.0.1:8080"})
 	if !result {
 		t.Errorf("unexpected result %v", result)
 	}