Procházet zdrojové kódy

fix retry to not retry on createSelf()

Barak Michener před 11 roky
rodič
revize
f3870598b9
1 změnil soubory, kde provedl 20 přidání a 15 odebrání
  1. 20 15
      discovery/discovery.go

+ 20 - 15
discovery/discovery.go

@@ -23,7 +23,9 @@ var (
 	ErrDuplicateID    = errors.New("discovery: found duplicate id")
 	ErrFullCluster    = errors.New("discovery: cluster is full")
 	ErrTooManyRetries = errors.New("discovery: too many retries")
+)
 
+const (
 	// Number of retries discovery will attempt before giving up and erroring out.
 	nRetries = uint(3)
 )
@@ -73,12 +75,9 @@ func (d *discovery) Discover() (string, error) {
 	}
 
 	if err := d.createSelf(); err != nil {
-		if err == client.ErrTimeout {
-			if d.retries < nRetries {
-				d.logAndBackoffForRetry("registering self")
-				return d.Discover()
-			}
-		}
+		// Fails, even on a timeout, if createSelf times out.
+		// TODO(barakmich): Retrying the same node might want to succeed here
+		// (ie, createSelf should be idempotent for discovery).
 		return "", err
 	}
 
@@ -89,15 +88,6 @@ func (d *discovery) Discover() (string, error) {
 
 	all, err := d.waitNodes(nodes, size)
 	if err != nil {
-		if err == client.ErrTimeout {
-			// Our actual connection timed out (nodes can take awhile, but the discovery
-			// server stopped responding) increment our retry counter and we have to
-			// start from scratch. Calling createSelf() again should be idempotent.
-			if d.retries < nRetries {
-				d.logAndBackoffForRetry("waiting for other nodes")
-				return d.Discover()
-			}
-		}
 		return "", err
 	}
 
@@ -179,6 +169,18 @@ func (d *discovery) checkClusterRetry() (client.Nodes, int, error) {
 	return nil, 0, ErrTooManyRetries
 }
 
+func (d *discovery) waitNodesRetry() (client.Nodes, error) {
+	if d.retries < nRetries {
+		d.logAndBackoffForRetry("waiting for other nodes")
+		nodes, n, err := d.checkCluster()
+		if err != nil {
+			return nil, err
+		}
+		return d.waitNodes(nodes, n)
+	}
+	return nil, ErrTooManyRetries
+}
+
 func (d *discovery) waitNodes(nodes client.Nodes, size int) (client.Nodes, error) {
 	if len(nodes) > size {
 		nodes = nodes[:size]
@@ -190,6 +192,9 @@ func (d *discovery) waitNodes(nodes client.Nodes, size int) (client.Nodes, error
 	for len(all) < size {
 		resp, err := w.Next()
 		if err != nil {
+			if err == client.ErrTimeout {
+				return d.waitNodesRetry()
+			}
 			return nil, err
 		}
 		all = append(all, resp.Node)