Browse Source

functional/tester: clean up runner logging

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
Gyuho Lee 7 years ago
parent
commit
2922116be5

+ 2 - 5
functional/tester/cluster.go

@@ -245,11 +245,7 @@ func (clus *Cluster) updateStresserChecker() {
 		clus.checker = newNoChecker()
 	}
 
-	clus.lg.Info(
-		"updated stressers",
-		zap.Int("round", clus.rd),
-		zap.Int("case", clus.cs),
-	)
+	clus.lg.Info("updated stressers")
 }
 
 func (clus *Cluster) checkConsistency() (err error) {
@@ -646,6 +642,7 @@ func (clus *Cluster) defrag() error {
 		"defrag ALL PASS",
 		zap.Int("round", clus.rd),
 		zap.Int("case", clus.cs),
+		zap.Int("case-total", len(clus.failures)),
 	)
 	return nil
 }

+ 17 - 4
functional/tester/cluster_run.go

@@ -51,6 +51,7 @@ func (clus *Cluster) Run() {
 				"round FAIL",
 				zap.Int("round", clus.rd),
 				zap.Int("case", clus.cs),
+				zap.Int("case-total", len(clus.failures)),
 				zap.Error(err),
 			)
 			if clus.cleanup() != nil {
@@ -74,6 +75,7 @@ func (clus *Cluster) Run() {
 			"compact START",
 			zap.Int("round", clus.rd),
 			zap.Int("case", clus.cs),
+			zap.Int("case-total", len(clus.failures)),
 			zap.Duration("timeout", timeout),
 		)
 		if err := clus.compact(revToCompact, timeout); err != nil {
@@ -81,6 +83,7 @@ func (clus *Cluster) Run() {
 				"compact FAIL",
 				zap.Int("round", clus.rd),
 				zap.Int("case", clus.cs),
+				zap.Int("case-total", len(clus.failures)),
 				zap.Error(err),
 			)
 			if err = clus.cleanup(); err != nil {
@@ -88,6 +91,7 @@ func (clus *Cluster) Run() {
 					"cleanup FAIL",
 					zap.Int("round", clus.rd),
 					zap.Int("case", clus.cs),
+					zap.Int("case-total", len(clus.failures)),
 					zap.Error(err),
 				)
 				return
@@ -107,6 +111,7 @@ func (clus *Cluster) Run() {
 		"functional-tester PASS",
 		zap.Int("round", clus.rd),
 		zap.Int("case", clus.cs),
+		zap.Int("case-total", len(clus.failures)),
 	)
 }
 
@@ -119,8 +124,9 @@ func (clus *Cluster) doRound() error {
 	clus.lg.Info(
 		"round START",
 		zap.Int("round", clus.rd),
+		zap.Int("case", clus.cs),
+		zap.Int("case-total", len(clus.failures)),
 		zap.Strings("failures", clus.failureStrings()),
-		zap.Int("total-failures", len(clus.failures)),
 	)
 	for i, fa := range clus.failures {
 		clus.cs = i
@@ -133,8 +139,8 @@ func (clus *Cluster) doRound() error {
 			"case START",
 			zap.Int("round", clus.rd),
 			zap.Int("case", clus.cs),
+			zap.Int("case-total", len(clus.failures)),
 			zap.String("desc", fa.Desc()),
-			zap.Int("total-failures", len(clus.failures)),
 		)
 
 		clus.lg.Info("wait health before injecting failures")
@@ -149,6 +155,7 @@ func (clus *Cluster) doRound() error {
 				"stresser START",
 				zap.Int("round", clus.rd),
 				zap.Int("case", clus.cs),
+				zap.Int("case-total", len(clus.failures)),
 				zap.String("desc", fa.Desc()),
 			)
 			if err := clus.stresser.Stress(); err != nil {
@@ -161,6 +168,7 @@ func (clus *Cluster) doRound() error {
 			"inject START",
 			zap.Int("round", clus.rd),
 			zap.Int("case", clus.cs),
+			zap.Int("case-total", len(clus.failures)),
 			zap.String("desc", fa.Desc()),
 		)
 		if err := fa.Inject(clus); err != nil {
@@ -174,6 +182,7 @@ func (clus *Cluster) doRound() error {
 			"recover START",
 			zap.Int("round", clus.rd),
 			zap.Int("case", clus.cs),
+			zap.Int("case-total", len(clus.failures)),
 			zap.String("desc", fa.Desc()),
 		)
 		if err := fa.Recover(clus); err != nil {
@@ -218,8 +227,8 @@ func (clus *Cluster) doRound() error {
 			"case PASS",
 			zap.Int("round", clus.rd),
 			zap.Int("case", clus.cs),
+			zap.Int("case-total", len(clus.failures)),
 			zap.String("desc", fa.Desc()),
-			zap.Int("total-failures", len(clus.failures)),
 			zap.Duration("took", time.Since(caseNow)),
 		)
 	}
@@ -228,7 +237,7 @@ func (clus *Cluster) doRound() error {
 		"round ALL PASS",
 		zap.Int("round", clus.rd),
 		zap.Strings("failures", clus.failureStrings()),
-		zap.Int("total-failures", len(clus.failures)),
+		zap.Int("case-total", len(clus.failures)),
 		zap.Duration("took", time.Since(roundNow)),
 	)
 	return nil
@@ -291,6 +300,7 @@ func (clus *Cluster) failed() {
 		"functional-tester FAIL",
 		zap.Int("round", clus.rd),
 		zap.Int("case", clus.cs),
+		zap.Int("case-total", len(clus.failures)),
 	)
 	clus.DestroyEtcdAgents()
 	os.Exit(2)
@@ -310,6 +320,7 @@ func (clus *Cluster) cleanup() error {
 		"closing stressers before archiving failure data",
 		zap.Int("round", clus.rd),
 		zap.Int("case", clus.cs),
+		zap.Int("case-total", len(clus.failures)),
 	)
 	clus.stresser.Close()
 
@@ -318,6 +329,7 @@ func (clus *Cluster) cleanup() error {
 			"cleanup FAIL",
 			zap.Int("round", clus.rd),
 			zap.Int("case", clus.cs),
+			zap.Int("case-total", len(clus.failures)),
 			zap.Error(err),
 		)
 		return err
@@ -327,6 +339,7 @@ func (clus *Cluster) cleanup() error {
 			"restart FAIL",
 			zap.Int("round", clus.rd),
 			zap.Int("case", clus.cs),
+			zap.Int("case-total", len(clus.failures)),
 			zap.Error(err),
 		)
 		return err

+ 28 - 3
functional/tester/stress.go

@@ -52,6 +52,7 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser {
 			// TODO: Too intensive stressing clients can panic etcd member with
 			// 'out of memory' error. Put rate limits in server side.
 			stressers[i] = &keyStresser{
+				stype:             rpcpb.StressType_KV,
 				lg:                clus.lg,
 				m:                 m,
 				keySize:           int(clus.Tester.StressKeySize),
@@ -65,6 +66,7 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser {
 
 		case "LEASE":
 			stressers[i] = &leaseStresser{
+				stype:        rpcpb.StressType_LEASE,
 				lg:           clus.lg,
 				m:            m,
 				numLeases:    10, // TODO: configurable
@@ -84,6 +86,8 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser {
 				"--req-rate", fmt.Sprintf("%v", reqRate),
 			}
 			stressers[i] = newRunnerStresser(
+				rpcpb.StressType_ELECTION_RUNNER,
+				clus.lg,
 				clus.Tester.RunnerExecPath,
 				args,
 				clus.rateLimiter,
@@ -102,7 +106,14 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser {
 				"--rounds=0", // runs forever
 				"--req-rate", fmt.Sprintf("%v", reqRate),
 			}
-			stressers[i] = newRunnerStresser(clus.Tester.RunnerExecPath, args, clus.rateLimiter, reqRate)
+			stressers[i] = newRunnerStresser(
+				rpcpb.StressType_WATCH_RUNNER,
+				clus.lg,
+				clus.Tester.RunnerExecPath,
+				args,
+				clus.rateLimiter,
+				reqRate,
+			)
 
 		case "LOCK_RACER_RUNNER":
 			reqRate := 100
@@ -114,7 +125,14 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser {
 				"--rounds=0", // runs forever
 				"--req-rate", fmt.Sprintf("%v", reqRate),
 			}
-			stressers[i] = newRunnerStresser(clus.Tester.RunnerExecPath, args, clus.rateLimiter, reqRate)
+			stressers[i] = newRunnerStresser(
+				rpcpb.StressType_LOCK_RACER_RUNNER,
+				clus.lg,
+				clus.Tester.RunnerExecPath,
+				args,
+				clus.rateLimiter,
+				reqRate,
+			)
 
 		case "LEASE_RUNNER":
 			args := []string{
@@ -122,7 +140,14 @@ func newStresser(clus *Cluster, m *rpcpb.Member) Stresser {
 				"--ttl=30",
 				"--endpoints", m.EtcdClientEndpoint,
 			}
-			stressers[i] = newRunnerStresser(clus.Tester.RunnerExecPath, args, clus.rateLimiter, 0)
+			stressers[i] = newRunnerStresser(
+				rpcpb.StressType_LEASE_RUNNER,
+				clus.lg,
+				clus.Tester.RunnerExecPath,
+				args,
+				clus.rateLimiter,
+				0,
+			)
 		}
 	}
 	return &compositeStresser{stressers}

+ 8 - 4
functional/tester/stress_key.go

@@ -34,7 +34,8 @@ import (
 )
 
 type keyStresser struct {
-	lg *zap.Logger
+	stype rpcpb.StressType
+	lg    *zap.Logger
 
 	m *rpcpb.Member
 
@@ -102,7 +103,8 @@ func (s *keyStresser) Stress() error {
 	}
 
 	s.lg.Info(
-		"key stresser START",
+		"stress START",
+		zap.String("stress-type", s.stype.String()),
 		zap.String("endpoint", s.m.EtcdClientEndpoint),
 	)
 	return nil
@@ -156,7 +158,8 @@ func (s *keyStresser) run() {
 			return
 		default:
 			s.lg.Warn(
-				"key stresser exited with error",
+				"stress stopped",
+				zap.String("stress-type", s.stype.String()),
 				zap.String("endpoint", s.m.EtcdClientEndpoint),
 				zap.Error(err),
 			)
@@ -188,7 +191,8 @@ func (s *keyStresser) Close() map[string]int {
 	s.emu.Unlock()
 
 	s.lg.Info(
-		"key stresser STOP",
+		"stress STOP",
+		zap.String("stress-type", s.stype.String()),
 		zap.String("endpoint", s.m.EtcdClientEndpoint),
 	)
 	return ess

+ 26 - 7
functional/tester/stress_lease.go

@@ -38,7 +38,8 @@ const (
 )
 
 type leaseStresser struct {
-	lg *zap.Logger
+	stype rpcpb.StressType
+	lg    *zap.Logger
 
 	m      *rpcpb.Member
 	cli    *clientv3.Client
@@ -121,7 +122,8 @@ func (ls *leaseStresser) setupOnce() error {
 
 func (ls *leaseStresser) Stress() error {
 	ls.lg.Info(
-		"lease stresser START",
+		"stress START",
+		zap.String("stress-type", ls.stype.String()),
 		zap.String("endpoint", ls.m.EtcdClientEndpoint),
 	)
 
@@ -159,22 +161,26 @@ func (ls *leaseStresser) run() {
 		}
 
 		ls.lg.Debug(
-			"lease stresser is creating leases",
+			"stress creating leases",
+			zap.String("stress-type", ls.stype.String()),
 			zap.String("endpoint", ls.m.EtcdClientEndpoint),
 		)
 		ls.createLeases()
 		ls.lg.Debug(
-			"lease stresser created leases",
+			"stress created leases",
+			zap.String("stress-type", ls.stype.String()),
 			zap.String("endpoint", ls.m.EtcdClientEndpoint),
 		)
 
 		ls.lg.Debug(
-			"lease stresser is dropped leases",
+			"stress dropped leases",
+			zap.String("stress-type", ls.stype.String()),
 			zap.String("endpoint", ls.m.EtcdClientEndpoint),
 		)
 		ls.randomlyDropLeases()
 		ls.lg.Debug(
-			"lease stresser dropped leases",
+			"stress dropped leases",
+			zap.String("stress-type", ls.stype.String()),
 			zap.String("endpoint", ls.m.EtcdClientEndpoint),
 		)
 	}
@@ -243,6 +249,7 @@ func (ls *leaseStresser) createLeaseWithKeys(ttl int64) (int64, error) {
 	if err != nil {
 		ls.lg.Debug(
 			"createLease failed",
+			zap.String("stress-type", ls.stype.String()),
 			zap.String("endpoint", ls.m.EtcdClientEndpoint),
 			zap.Error(err),
 		)
@@ -251,6 +258,7 @@ func (ls *leaseStresser) createLeaseWithKeys(ttl int64) (int64, error) {
 
 	ls.lg.Debug(
 		"createLease created lease",
+		zap.String("stress-type", ls.stype.String()),
 		zap.String("endpoint", ls.m.EtcdClientEndpoint),
 		zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
 	)
@@ -284,6 +292,7 @@ func (ls *leaseStresser) randomlyDropLeases() {
 			}
 			ls.lg.Debug(
 				"randomlyDropLease dropped a lease",
+				zap.String("stress-type", ls.stype.String()),
 				zap.String("endpoint", ls.m.EtcdClientEndpoint),
 				zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
 			)
@@ -313,6 +322,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
 		case <-ls.ctx.Done():
 			ls.lg.Debug(
 				"keepLeaseAlive context canceled",
+				zap.String("stress-type", ls.stype.String()),
 				zap.String("endpoint", ls.m.EtcdClientEndpoint),
 				zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
 				zap.Error(ls.ctx.Err()),
@@ -327,6 +337,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
 				ls.aliveLeases.remove(leaseID)
 				ls.lg.Debug(
 					"keepLeaseAlive lease has not been renewed, dropped it",
+					zap.String("stress-type", ls.stype.String()),
 					zap.String("endpoint", ls.m.EtcdClientEndpoint),
 					zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
 				)
@@ -337,6 +348,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
 		if err != nil {
 			ls.lg.Debug(
 				"keepLeaseAlive lease creates stream error",
+				zap.String("stress-type", ls.stype.String()),
 				zap.String("endpoint", ls.m.EtcdClientEndpoint),
 				zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
 				zap.Error(err),
@@ -350,6 +362,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
 		if err != nil {
 			ls.lg.Debug(
 				"keepLeaseAlive failed to receive lease keepalive response",
+				zap.String("stress-type", ls.stype.String()),
 				zap.String("endpoint", ls.m.EtcdClientEndpoint),
 				zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
 				zap.Error(err),
@@ -359,6 +372,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
 
 		ls.lg.Debug(
 			"keepLeaseAlive waiting on lease stream",
+			zap.String("stress-type", ls.stype.String()),
 			zap.String("endpoint", ls.m.EtcdClientEndpoint),
 			zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
 		)
@@ -367,6 +381,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
 		if respRC == nil {
 			ls.lg.Debug(
 				"keepLeaseAlive received nil lease keepalive response",
+				zap.String("stress-type", ls.stype.String()),
 				zap.String("endpoint", ls.m.EtcdClientEndpoint),
 				zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
 			)
@@ -378,6 +393,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
 		if respRC.TTL <= 0 {
 			ls.lg.Debug(
 				"keepLeaseAlive stream received lease keepalive response TTL <= 0",
+				zap.String("stress-type", ls.stype.String()),
 				zap.String("endpoint", ls.m.EtcdClientEndpoint),
 				zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
 				zap.Int64("ttl", respRC.TTL),
@@ -388,6 +404,7 @@ func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
 		// renew lease timestamp only if lease is present
 		ls.lg.Debug(
 			"keepLeaseAlive renewed a lease",
+			zap.String("stress-type", ls.stype.String()),
 			zap.String("endpoint", ls.m.EtcdClientEndpoint),
 			zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
 		)
@@ -440,6 +457,7 @@ func (ls *leaseStresser) randomlyDropLease(leaseID int64) (bool, error) {
 
 	ls.lg.Debug(
 		"randomlyDropLease error",
+		zap.String("stress-type", ls.stype.String()),
 		zap.String("endpoint", ls.m.EtcdClientEndpoint),
 		zap.String("lease-id", fmt.Sprintf("%016x", leaseID)),
 		zap.Error(ls.ctx.Err()),
@@ -457,7 +475,8 @@ func (ls *leaseStresser) Close() map[string]int {
 	ls.aliveWg.Wait()
 	ls.cli.Close()
 	ls.lg.Info(
-		"lease stresser STOP",
+		"stress STOP",
+		zap.String("stress-type", ls.stype.String()),
 		zap.String("endpoint", ls.m.EtcdClientEndpoint),
 	)
 	return nil

+ 23 - 1
functional/tester/stress_runner.go

@@ -20,10 +20,16 @@ import (
 	"os/exec"
 	"syscall"
 
+	"github.com/coreos/etcd/functional/rpcpb"
+
+	"go.uber.org/zap"
 	"golang.org/x/time/rate"
 )
 
 type runnerStresser struct {
+	stype rpcpb.StressType
+	lg    *zap.Logger
+
 	cmd     *exec.Cmd
 	cmdStr  string
 	args    []string
@@ -34,9 +40,17 @@ type runnerStresser struct {
 	donec chan struct{}
 }
 
-func newRunnerStresser(cmdStr string, args []string, rl *rate.Limiter, reqRate int) *runnerStresser {
+func newRunnerStresser(
+	stype rpcpb.StressType,
+	lg *zap.Logger,
+	cmdStr string,
+	args []string,
+	rl *rate.Limiter,
+	reqRate int,
+) *runnerStresser {
 	rl.SetLimit(rl.Limit() - rate.Limit(reqRate))
 	return &runnerStresser{
+		stype:   stype,
 		cmdStr:  cmdStr,
 		args:    args,
 		rl:      rl,
@@ -71,6 +85,10 @@ func (rs *runnerStresser) setupOnce() (err error) {
 }
 
 func (rs *runnerStresser) Stress() (err error) {
+	rs.lg.Info(
+		"stress START",
+		zap.String("stress-type", rs.stype.String()),
+	)
 	if err = rs.setupOnce(); err != nil {
 		return err
 	}
@@ -78,6 +96,10 @@ func (rs *runnerStresser) Stress() (err error) {
 }
 
 func (rs *runnerStresser) Pause() map[string]int {
+	rs.lg.Info(
+		"stress STOP",
+		zap.String("stress-type", rs.stype.String()),
+	)
 	syscall.Kill(rs.cmd.Process.Pid, syscall.SIGSTOP)
 	return nil
 }