Browse Source

Merge pull request #8070 from heyitsanthony/etcdctl-cluster-health

ctlv2: report unhealthy in cluster-health if any node is unavailable
Anthony Romano 8 years ago
parent
commit
933aa09b73
2 changed files with 27 additions and 12 deletions
  1. 18 4
      e2e/ctl_v2_test.go
  2. 9 8
      etcdctl/ctlv2/command/cluster_health.go

+ 18 - 4
e2e/ctl_v2_test.go

@@ -321,17 +321,31 @@ func TestCtlV2ClusterHealth(t *testing.T) {
 		}
 	}()
 
-	// has quorum
+	// all members available
 	if err := etcdctlClusterHealth(epc, "cluster is healthy"); err != nil {
 		t.Fatalf("cluster-health expected to be healthy (%v)", err)
 	}
 
-	// cut quorum
+	// missing members, has quorum
 	epc.procs[0].Stop()
+
+	for i := 0; i < 3; i++ {
+		err := etcdctlClusterHealth(epc, "cluster is degraded")
+		if err == nil {
+			break
+		} else if i == 2 {
+			t.Fatalf("cluster-health expected to be degraded (%v)", err)
+		}
+		// possibly no leader yet; retry
+		time.Sleep(time.Second)
+	}
+
+	// no quorum
 	epc.procs[1].Stop()
-	if err := etcdctlClusterHealth(epc, "cluster is unhealthy"); err != nil {
-		t.Fatalf("cluster-health expected to be unhealthy (%v)", err)
+	if err := etcdctlClusterHealth(epc, "cluster is unavailable"); err != nil {
+		t.Fatalf("cluster-health expected to be unavailable (%v)", err)
 	}
+
 	epc.procs[0], epc.procs[1] = nil, nil
 }
 

+ 9 - 8
etcdctl/ctlv2/command/cluster_health.go

@@ -70,7 +70,7 @@ func handleClusterHealth(c *cli.Context) error {
 	}
 
 	for {
-		health := false
+		healthyMembers := 0
 		for _, m := range ms {
 			if len(m.ClientURLs) == 0 {
 				fmt.Printf("member %s is unreachable: no available published client urls\n", m.ID)
@@ -105,8 +105,8 @@ func handleClusterHealth(c *cli.Context) error {
 
 				checked = true
 				if result.Health == "true" || nresult.Health {
-					health = true
 					fmt.Printf("member %s is healthy: got healthy result from %s\n", m.ID, url)
+					healthyMembers++
 				} else {
 					fmt.Printf("member %s is unhealthy: got unhealthy result from %s\n", m.ID, url)
 				}
@@ -116,19 +116,20 @@ func handleClusterHealth(c *cli.Context) error {
 				fmt.Printf("member %s is unreachable: %v are all unreachable\n", m.ID, m.ClientURLs)
 			}
 		}
-		if health {
+		switch healthyMembers {
+		case len(ms):
 			fmt.Println("cluster is healthy")
-		} else {
-			fmt.Println("cluster is unhealthy")
+		case 0:
+			fmt.Println("cluster is unavailable")
+		default:
+			fmt.Println("cluster is degraded")
 		}
 
 		if !forever {
-			if health {
+			if healthyMembers == len(ms) {
 				os.Exit(ExitSuccess)
-				return nil
 			}
 			os.Exit(ExitClusterNotHealthy)
-			return nil
 		}
 
 		fmt.Printf("\nnext check after 10 second...\n\n")