Browse Source

functional-tester/tester: shuffle failure cases with coprime

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
Gyuho Lee 7 years ago
parent
commit
2b9c810fa4

+ 67 - 21
tools/functional-tester/tester/cluster.go

@@ -19,6 +19,7 @@ import (
 	"errors"
 	"fmt"
 	"io/ioutil"
+	"math/rand"
 	"net/http"
 	"path/filepath"
 	"strings"
@@ -234,6 +235,33 @@ func NewCluster(logger *zap.Logger, fpath string) (*Cluster, error) {
 	}
 	go clus.serveTesterServer()
 
+	clus.updateFailures()
+
+	clus.rateLimiter = rate.NewLimiter(
+		rate.Limit(int(clus.Tester.StressQPS)),
+		int(clus.Tester.StressQPS),
+	)
+	clus.updateStresserChecker()
+	return clus, nil
+}
+
+func (clus *Cluster) serveTesterServer() {
+	clus.logger.Info(
+		"started tester HTTP server",
+		zap.String("tester-address", clus.Tester.TesterAddr),
+	)
+	err := clus.testerHTTPServer.ListenAndServe()
+	clus.logger.Info(
+		"tester HTTP server returned",
+		zap.String("tester-address", clus.Tester.TesterAddr),
+		zap.Error(err),
+	)
+	if err != nil && err != http.ErrServerClosed {
+		clus.logger.Fatal("tester HTTP errored", zap.Error(err))
+	}
+}
+
+func (clus *Cluster) updateFailures() {
 	for _, cs := range clus.Tester.FailureCases {
 		switch cs {
 		case "KILL_ONE_FOLLOWER":
@@ -270,33 +298,51 @@ func NewCluster(logger *zap.Logger, fpath string) (*Cluster, error) {
 			clus.failures = append(clus.failures, newFailureNoOp())
 		case "EXTERNAL":
 			clus.failures = append(clus.failures, newFailureExternal(clus.Tester.ExternalExecPath))
-		default:
-			return nil, fmt.Errorf("unknown failure %q", cs)
 		}
 	}
+}
 
-	clus.rateLimiter = rate.NewLimiter(
-		rate.Limit(int(clus.Tester.StressQPS)),
-		int(clus.Tester.StressQPS),
-	)
-	clus.updateStresserChecker()
-	return clus, nil
+func (clus *Cluster) shuffleFailures() {
+	rand.Seed(time.Now().UnixNano())
+	offset := rand.Intn(1000)
+	n := len(clus.failures)
+	cp := coprime(n)
+
+	clus.logger.Info("shuffling test failure cases", zap.Int("total", n))
+	fs := make([]Failure, n)
+	for i := 0; i < n; i++ {
+		fs[i] = clus.failures[(cp*i+offset)%n]
+	}
+	clus.failures = fs
+	clus.logger.Info("shuffled test failure cases", zap.Int("total", n))
 }
 
-func (clus *Cluster) serveTesterServer() {
-	clus.logger.Info(
-		"started tester HTTP server",
-		zap.String("tester-address", clus.Tester.TesterAddr),
-	)
-	err := clus.testerHTTPServer.ListenAndServe()
-	clus.logger.Info(
-		"tester HTTP server returned",
-		zap.String("tester-address", clus.Tester.TesterAddr),
-		zap.Error(err),
-	)
-	if err != nil && err != http.ErrServerClosed {
-		clus.logger.Fatal("tester HTTP errored", zap.Error(err))
+/*
+x and y of GCD 1 are coprime to each other
+
+x1 = ( coprime of n * idx1 + offset ) % n
+x2 = ( coprime of n * idx2 + offset ) % n
+(x2 - x1) = coprime of n * (idx2 - idx1) % n
+          = (idx2 - idx1) = 1
+
+Consecutive x's are guaranteed to be distinct
+*/
+func coprime(n int) int {
+	coprime := 1
+	for i := n / 2; i < n; i++ {
+		if gcd(i, n) == 1 {
+			coprime = i
+			break
+		}
+	}
+	return coprime
+}
+
+func gcd(x, y int) int {
+	if y == 0 {
+		return x
 	}
+	return gcd(y, x%y)
 }
 
 func (clus *Cluster) updateStresserChecker() {

+ 27 - 0
tools/functional-tester/tester/cluster_test.go

@@ -131,6 +131,7 @@ func Test_newCluster(t *testing.T) {
 				"DELAY_PEER_PORT_TX_RX_LEADER",
 				"DELAY_PEER_PORT_TX_RX_ALL",
 			},
+			FailureShuffle:          true,
 			FailpointCommands:       []string{`panic("etcd-tester")`},
 			RunnerExecPath:          "/etcd-runner",
 			ExternalExecPath:        "",
@@ -159,4 +160,30 @@ func Test_newCluster(t *testing.T) {
 	if !reflect.DeepEqual(exp, cfg) {
 		t.Fatalf("expected %+v, got %+v", exp, cfg)
 	}
+
+	cfg.logger = logger
+
+	cfg.updateFailures()
+	fs1 := make([]string, len(cfg.failures))
+	for i := range cfg.failures {
+		fs1[i] = cfg.failures[i].Desc()
+	}
+
+	cfg.shuffleFailures()
+	fs2 := make([]string, len(cfg.failures))
+	for i := range cfg.failures {
+		fs2[i] = cfg.failures[i].Desc()
+	}
+	if reflect.DeepEqual(fs1, fs2) {
+		t.Fatalf("expected shuffled failure cases, got %q", fs2)
+	}
+
+	cfg.shuffleFailures()
+	fs3 := make([]string, len(cfg.failures))
+	for i := range cfg.failures {
+		fs3[i] = cfg.failures[i].Desc()
+	}
+	if reflect.DeepEqual(fs2, fs3) {
+		t.Fatalf("expected reshuffled failure cases from %q, got %q", fs2, fs3)
+	}
 }

+ 3 - 0
tools/functional-tester/tester/tester.go → tools/functional-tester/tester/cluster_tester.go

@@ -106,6 +106,9 @@ func (clus *Cluster) StartTester() {
 }
 
 func (clus *Cluster) doRound(round int) error {
+	if clus.Tester.FailureShuffle {
+		clus.shuffleFailures()
+	}
 	for i, f := range clus.failures {
 		clus.cs = i
 

+ 1 - 3
tools/functional-tester/tester/local-test.yaml

@@ -98,9 +98,7 @@ tester-config:
   - DELAY_PEER_PORT_TX_RX_LEADER
   - DELAY_PEER_PORT_TX_RX_ALL
 
-  # TODO: shuffle
-  # fail-shuffle: true
-
+  failure-shuffle: true
   failpoint-commands:
   - panic("etcd-tester")
   # failpoint-commands: