瀏覽代碼

functional-tester: add logic to ensure etcd node is alive after fault recovery returns

failure recovery needs to wait etcd node to become alive before returning

FIX #6654
fanmin shi 9 年之前
父節點
當前提交
94ea82c00d
共有 2 個文件被更改,包括 8 次插入3 次删除
  1. 3 3
      tools/functional-tester/etcd-tester/checks.go
  2. 5 0
      tools/functional-tester/etcd-tester/tester.go

+ 3 - 3
tools/functional-tester/etcd-tester/checks.go

@@ -46,9 +46,8 @@ func (hc *hashChecker) Check() (err error) {
 		hashes map[string]int64
 		ok     bool
 	)
-	for i := 0; i < 7; i++ {
-		time.Sleep(time.Second)
-
+	// retry in case of transient failure
+	for i := 0; i < 3; i++ {
 		revs, hashes, err = hc.hrg.getRevisionHash()
 		if err != nil {
 			plog.Printf("#%d failed to get current revisions (%v)", i, err)
@@ -59,6 +58,7 @@ func (hc *hashChecker) Check() (err error) {
 		}
 
 		plog.Printf("#%d inconsistent current revisions %+v", i, revs)
+		time.Sleep(time.Second)
 	}
 	if !ok || err != nil {
 		return fmt.Errorf("checking current revisions failed [err: %v, revisions: %v]", err, revs)

+ 5 - 0
tools/functional-tester/etcd-tester/tester.go

@@ -104,6 +104,11 @@ func (tt *tester) doRound(round int) (bool, error) {
 			plog.Printf("%s recovery error: %v", tt.logPrefix(), err)
 			return false, nil
 		}
+		plog.Printf("%s wait until cluster is healthy", tt.logPrefix())
+		if err := tt.cluster.WaitHealth(); err != nil {
+			plog.Printf("%s wait full health error: %v", tt.logPrefix(), err)
+			return false, nil
+		}
 		plog.Printf("%s recovered failure", tt.logPrefix())
 
 		if err := tt.checkConsistency(); err != nil {