Browse Source

functional/tester: add TODO for member remove fail

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
Gyuho Lee 7 years ago
parent
commit
d7bf471199
1 changed files with 22 additions and 3 deletions
  1. 22 3
      functional/tester/failure_case_sigquit_remove.go

+ 22 - 3
functional/tester/failure_case_sigquit_remove.go

@@ -57,9 +57,15 @@ func inject_SIGQUIT_ETCD_AND_REMOVE_DATA(clus *Cluster, idx1 int) error {
 	id1 := sresp.Header.MemberId
 	id1 := sresp.Header.MemberId
 	is1 := fmt.Sprintf("%016x", id1)
 	is1 := fmt.Sprintf("%016x", id1)
 
 
+	clus.lg.Info(
+		"disastrous machine failure START",
+		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
+		zap.String("target-member-id", is1),
+		zap.Error(err),
+	)
 	err = clus.sendOp(idx1, rpcpb.Operation_SIGQUIT_ETCD_AND_REMOVE_DATA)
 	err = clus.sendOp(idx1, rpcpb.Operation_SIGQUIT_ETCD_AND_REMOVE_DATA)
 	clus.lg.Info(
 	clus.lg.Info(
-		"disastrous machine failure",
+		"disastrous machine failure END",
 		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
 		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
 		zap.String("target-member-id", is1),
 		zap.String("target-member-id", is1),
 		zap.Error(err),
 		zap.Error(err),
@@ -78,9 +84,22 @@ func inject_SIGQUIT_ETCD_AND_REMOVE_DATA(clus *Cluster, idx1 int) error {
 	}
 	}
 	defer cli2.Close()
 	defer cli2.Close()
 
 
-	_, err = cli2.MemberRemove(context.Background(), id1)
+	// FIXME(bug): this may block forever during
+	// "SIGQUIT_AND_REMOVE_LEADER_UNTIL_TRIGGER_SNAPSHOT"
+	// is the new leader too busy with snapshotting?
+	// is raft proposal dropped?
+	// enable client keepalive for failover?
+	clus.lg.Info(
+		"member remove after disaster START",
+		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
+		zap.String("target-member-id", is1),
+		zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
+	)
+	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+	_, err = cli2.MemberRemove(ctx, id1)
+	cancel()
 	clus.lg.Info(
 	clus.lg.Info(
-		"member remove after disaster",
+		"member remove after disaster END",
 		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
 		zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
 		zap.String("target-member-id", is1),
 		zap.String("target-member-id", is1),
 		zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
 		zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),