Browse Source

Merge pull request #6689 from fanminshi/function-tester-ensure-etcd-fullly-restarted

functional-tester: add logic to ensure etcd node is alive after fault recovery returns
Xiang Li 9 years ago
parent
commit
a2cfb56581

+ 3 - 3
tools/functional-tester/etcd-tester/checks.go

@@ -46,9 +46,8 @@ func (hc *hashChecker) Check() (err error) {
 		hashes map[string]int64
 		ok     bool
 	)
-	for i := 0; i < 7; i++ {
-		time.Sleep(time.Second)
-
+	// retry in case of transient failure
+	for i := 0; i < 3; i++ {
 		revs, hashes, err = hc.hrg.getRevisionHash()
 		if err != nil {
 			plog.Printf("#%d failed to get current revisions (%v)", i, err)
@@ -59,6 +58,7 @@ func (hc *hashChecker) Check() (err error) {
 		}
 
 		plog.Printf("#%d inconsistent current revisions %+v", i, revs)
+		time.Sleep(time.Second)
 	}
 	if !ok || err != nil {
 		return fmt.Errorf("checking current revisions failed [err: %v, revisions: %v]", err, revs)

+ 5 - 0
tools/functional-tester/etcd-tester/tester.go

@@ -104,6 +104,11 @@ func (tt *tester) doRound(round int) (bool, error) {
 			plog.Printf("%s recovery error: %v", tt.logPrefix(), err)
 			return false, nil
 		}
+		plog.Printf("%s wait until cluster is healthy", tt.logPrefix())
+		if err := tt.cluster.WaitHealth(); err != nil {
+			plog.Printf("%s wait full health error: %v", tt.logPrefix(), err)
+			return false, nil
+		}
 		plog.Printf("%s recovered failure", tt.logPrefix())
 
 		if err := tt.checkConsistency(); err != nil {