Browse Source

integration: test member removal which breaks active quorum is rejected

Anthony Romano 9 years ago
parent
commit
e742ff331f
2 changed files with 54 additions and 1 deletions
  1. 8 1
      integration/cluster.go
  2. 46 0
      integration/cluster_test.go

+ 8 - 1
integration/cluster.go

@@ -276,12 +276,18 @@ func (c *cluster) AddMember(t *testing.T) {
 }
 
 func (c *cluster) RemoveMember(t *testing.T, id uint64) {
+	if err := c.removeMember(t, id); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func (c *cluster) removeMember(t *testing.T, id uint64) error {
 	// send remove request to the cluster
 	cc := MustNewHTTPClient(t, c.URLs(), c.cfg.ClientTLS)
 	ma := client.NewMembersAPI(cc)
 	ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
 	if err := ma.Remove(ctx, types.ID(id).String()); err != nil {
-		t.Fatalf("unexpected remove error %v", err)
+		return err
 	}
 	cancel()
 	newMembers := make([]*member, 0)
@@ -302,6 +308,7 @@ func (c *cluster) RemoveMember(t *testing.T, id uint64) {
 	}
 	c.Members = newMembers
 	c.waitMembersMatch(t, c.HTTPMembers())
+	return nil
 }
 
 func (c *cluster) Terminate(t *testing.T) {

+ 46 - 0
integration/cluster_test.go

@@ -391,6 +391,52 @@ func TestRejectUnhealthyAdd(t *testing.T) {
 	}
 }
 
+// TestRejectUnhealthyRemove ensures an unhealthy cluster rejects removing members
+// if quorum will be lost.
+func TestRejectUnhealthyRemove(t *testing.T) {
+	defer testutil.AfterTest(t)
+	c := NewCluster(t, 5)
+	for _, m := range c.Members {
+		m.ServerConfig.StrictReconfigCheck = true
+	}
+	c.Launch(t)
+	defer c.Terminate(t)
+
+	// make cluster unhealthy and wait for downed peer; (3 up, 2 down)
+	c.Members[0].Stop(t)
+	c.Members[1].Stop(t)
+	c.WaitLeader(t)
+
+	// reject remove active member since (3,2)-(1,0) => (2,2) lacks quorum
+	err := c.removeMember(t, uint64(c.Members[2].s.ID()))
+	if err == nil {
+		t.Fatalf("should reject quorum breaking remove")
+	}
+	// TODO: client should return more descriptive error codes for internal errors
+	if !strings.Contains(err.Error(), "has no leader") {
+		t.Errorf("unexpected error (%v)", err)
+	}
+
+	// member stopped after launch; wait for missing heartbeats
+	time.Sleep(time.Duration(electionTicks * int(tickDuration)))
+
+	// permit remove dead member since (3,2) - (0,1) => (3,1) has quorum
+	if err = c.removeMember(t, uint64(c.Members[0].s.ID())); err != nil {
+		t.Fatalf("should accept removing down member")
+	}
+
+	// bring cluster to (4,1)
+	c.Members[0].Restart(t)
+
+	// restarted member must be connected for a HealthInterval before remove is accepted
+	time.Sleep((3 * etcdserver.HealthInterval) / 2)
+
+	// accept remove member since (4,1)-(1,0) => (3,1) has quorum
+	if err = c.removeMember(t, uint64(c.Members[0].s.ID())); err != nil {
+		t.Fatalf("expected to remove member, got error %v", err)
+	}
+}
+
 // clusterMustProgress ensures that cluster can make progress. It creates
 // a random key first, and check the new key could be got from all client urls
 // of the cluster.