Browse Source

functional-tester/tester: implement liveness mode failure case

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
Gyuho Lee 7 years ago
parent
commit
a43bd84631

+ 11 - 4
tools/functional-tester/tester/cluster.go

@@ -294,10 +294,9 @@ func (clus *Cluster) updateFailures() {
 			}
 			clus.failures = append(clus.failures, fpFailures...)
 		case "NO_FAIL_WITH_STRESS":
-			clus.failures = append(clus.failures, newFailureNoFailWithStress())
+			clus.failures = append(clus.failures, newFailureNoFailWithStress(clus))
 		case "NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS":
-			// TODO
-			clus.failures = append(clus.failures, newFailureNoFailWithNoStressForLiveness())
+			clus.failures = append(clus.failures, newFailureNoFailWithNoStressForLiveness(clus))
 		case "EXTERNAL":
 			clus.failures = append(clus.failures, newFailureExternal(clus.Tester.ExternalExecPath))
 		}
@@ -762,4 +761,12 @@ func (clus *Cluster) defrag() error {
 	return nil
 }
 
-func (clus *Cluster) Report() int64 { return clus.stresser.ModifiedKeys() }
+// GetFailureDelayDuration computes failure delay duration.
+func (clus *Cluster) GetFailureDelayDuration() time.Duration {
+	return time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond
+}
+
+// Report reports the number of modified keys.
+func (clus *Cluster) Report() int64 {
+	return clus.stresser.ModifiedKeys()
+}

+ 10 - 4
tools/functional-tester/tester/cluster_tester.go

@@ -19,6 +19,8 @@ import (
 	"os"
 	"time"
 
+	"github.com/coreos/etcd/tools/functional-tester/rpcpb"
+
 	"go.uber.org/zap"
 )
 
@@ -127,8 +129,10 @@ func (clus *Cluster) doRound() error {
 			return fmt.Errorf("wait full health error: %v", err)
 		}
 
-		// TODO: "NO_FAIL_WITH_STRESS"
-		// TODO: "NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS"
+		if fa.FailureCase() == rpcpb.FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS {
+			clus.lg.Info("pausing stresser after before injecting failures")
+			clus.pauseStresser()
+		}
 
 		clus.lg.Info(
 			"injecting failure",
@@ -165,8 +169,10 @@ func (clus *Cluster) doRound() error {
 			zap.String("desc", fa.Desc()),
 		)
 
-		clus.lg.Info("pausing stresser after failure recovery, before wait health")
-		clus.pauseStresser()
+		if fa.FailureCase() != rpcpb.FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS {
+			clus.lg.Info("pausing stresser after failure recovery, before wait health")
+			clus.pauseStresser()
+		}
 
 		clus.lg.Info("wait health after recovering failures")
 		if err := clus.WaitHealth(); err != nil {

+ 1 - 1
tools/functional-tester/tester/failure.go

@@ -223,7 +223,7 @@ func (f *failureUntilSnapshot) Desc() string {
 	if f.desc.Desc() != "" {
 		return f.desc.Desc()
 	}
-	return f.failureCase.String() + " (to trigger snapshot)"
+	return f.failureCase.String()
 }
 
 func (f *failureUntilSnapshot) FailureCase() rpcpb.FailureCase {

+ 1 - 2
tools/functional-tester/tester/failure_case_failpoints.go

@@ -20,7 +20,6 @@ import (
 	"net/http"
 	"strings"
 	"sync"
-	"time"
 
 	"github.com/coreos/etcd/tools/functional-tester/rpcpb"
 )
@@ -59,7 +58,7 @@ func failpointFailures(clus *Cluster) (ret []Failure, err error) {
 			} else {
 				fpFails[i] = &failureDelay{
 					Failure:       fpf,
-					delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
+					delayDuration: clus.GetFailureDelayDuration(),
 				}
 			}
 		}

+ 3 - 5
tools/functional-tester/tester/failure_case_network_blackhole.go

@@ -15,8 +15,6 @@
 package tester
 
 import (
-	"time"
-
 	"github.com/coreos/etcd/tools/functional-tester/rpcpb"
 )
 
@@ -37,7 +35,7 @@ func newFailureBlackholePeerPortTxRxOneFollower(clus *Cluster) Failure {
 	f := &failureFollower{ff, -1, -1}
 	return &failureDelay{
 		Failure:       f,
-		delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
+		delayDuration: clus.GetFailureDelayDuration(),
 	}
 }
 
@@ -50,7 +48,7 @@ func newFailureBlackholePeerPortTxRxLeader(clus *Cluster) Failure {
 	f := &failureLeader{ff, -1, -1}
 	return &failureDelay{
 		Failure:       f,
-		delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
+		delayDuration: clus.GetFailureDelayDuration(),
 	}
 }
 
@@ -62,6 +60,6 @@ func newFailureBlackholePeerPortTxRxAll(clus *Cluster) Failure {
 	}
 	return &failureDelay{
 		Failure:       f,
-		delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
+		delayDuration: clus.GetFailureDelayDuration(),
 	}
 }

+ 3 - 3
tools/functional-tester/tester/failure_case_network_slow.go

@@ -47,7 +47,7 @@ func newFailureDelayPeerPortTxRxOneFollower(clus *Cluster) Failure {
 	f := &failureFollower{ff, -1, -1}
 	return &failureDelay{
 		Failure:       f,
-		delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
+		delayDuration: clus.GetFailureDelayDuration(),
 	}
 }
 
@@ -60,7 +60,7 @@ func newFailureDelayPeerPortTxRxLeader(clus *Cluster) Failure {
 	f := &failureLeader{ff, -1, -1}
 	return &failureDelay{
 		Failure:       f,
-		delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
+		delayDuration: clus.GetFailureDelayDuration(),
 	}
 }
 
@@ -72,6 +72,6 @@ func newFailureDelayPeerPortTxRxAll(clus *Cluster) Failure {
 	}
 	return &failureDelay{
 		Failure:       f,
-		delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
+		delayDuration: clus.GetFailureDelayDuration(),
 	}
 }

+ 42 - 10
tools/functional-tester/tester/failure_case_no_op.go → tools/functional-tester/tester/failure_case_no_fail.go

@@ -18,36 +18,68 @@ import (
 	"time"
 
 	"github.com/coreos/etcd/tools/functional-tester/rpcpb"
+
+	"go.uber.org/zap"
 )
 
 type failureNoFailWithStress failureByFunc
 
-func (f *failureNoFailWithStress) Inject(clus *Cluster) error     { return nil }
-func (f *failureNoFailWithStress) Recover(clus *Cluster) error    { return nil }
-func (f *failureNoFailWithStress) FailureCase() rpcpb.FailureCase { return f.failureCase }
+func (f *failureNoFailWithStress) Inject(clus *Cluster) error {
+	return nil
+}
+
+func (f *failureNoFailWithStress) Recover(clus *Cluster) error {
+	return nil
+}
+
+func (f *failureNoFailWithStress) FailureCase() rpcpb.FailureCase {
+	return f.failureCase
+}
 
-func newFailureNoFailWithStress() Failure {
+func newFailureNoFailWithStress(clus *Cluster) Failure {
 	f := &failureNoFailWithStress{
 		failureCase: rpcpb.FailureCase_NO_FAIL_WITH_STRESS,
 	}
 	return &failureDelay{
 		Failure:       f,
-		delayDuration: 5 * time.Second,
+		delayDuration: clus.GetFailureDelayDuration(),
 	}
 }
 
 type failureNoFailWithNoStressForLiveness failureByFunc
 
-func (f *failureNoFailWithNoStressForLiveness) Inject(clus *Cluster) error     { return nil }
-func (f *failureNoFailWithNoStressForLiveness) Recover(clus *Cluster) error    { return nil }
-func (f *failureNoFailWithNoStressForLiveness) FailureCase() rpcpb.FailureCase { return f.failureCase }
+func (f *failureNoFailWithNoStressForLiveness) Inject(clus *Cluster) error {
+	clus.lg.Info(
+		"extra delay for liveness mode with no stresser",
+		zap.Int("round", clus.rd),
+		zap.Int("case", clus.cs),
+		zap.String("desc", f.Desc()),
+	)
+	time.Sleep(clus.GetFailureDelayDuration())
+
+	clus.lg.Info(
+		"wait health in liveness mode",
+		zap.Int("round", clus.rd),
+		zap.Int("case", clus.cs),
+		zap.String("desc", f.Desc()),
+	)
+	return clus.WaitHealth()
+}
+
+func (f *failureNoFailWithNoStressForLiveness) Recover(clus *Cluster) error {
+	return nil
+}
+
+func (f *failureNoFailWithNoStressForLiveness) FailureCase() rpcpb.FailureCase {
+	return f.failureCase
+}
 
-func newFailureNoFailWithNoStressForLiveness() Failure {
+func newFailureNoFailWithNoStressForLiveness(clus *Cluster) Failure {
 	f := &failureNoFailWithNoStressForLiveness{
 		failureCase: rpcpb.FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS,
 	}
 	return &failureDelay{
 		Failure:       f,
-		delayDuration: 7 * time.Second,
+		delayDuration: clus.GetFailureDelayDuration(),
 	}
 }