Browse Source

raft: stop tickElection when the node is not in peer list

This prevents the bug like this:
1. a node sends join to a cluster and succeeds
2. it starts with empty peers and waits for sync, but it have not
received anything
3. election timeout passes, and it promotes itself to leader
4. it commits some log entry
5. its log conflicts with the cluster's
Yicheng Qin 11 years ago
parent
commit
b07be74a82
2 changed files with 37 additions and 0 deletions
  1. 6 0
      raft/raft.go
  2. 31 0
      raft/raft_test.go

+ 6 - 0
raft/raft.go

@@ -265,6 +265,12 @@ func (r *raft) appendEntry(e pb.Entry) {
 
 // tickElection is ran by followers and candidates after r.electionTimeout.
 func (r *raft) tickElection() {
+	// promotable indicates whether state machine can be promoted to leader,
+	// which is true when its own id is in progress list.
+	if _, promotable := r.prs[r.id]; !promotable {
+		r.elapsed = 0
+		return
+	}
 	r.elapsed++
 	// TODO (xiangli): elctionTimeout should be randomized.
 	if r.elapsed > r.electionTimeout {

+ 31 - 0
raft/raft_test.go

@@ -1053,6 +1053,37 @@ func TestRemoveNode(t *testing.T) {
 	}
 }
 
+func TestTickElectionElapsed(t *testing.T) {
+	electionTimeout := 10
+	tests := []struct {
+		promotable bool
+		e          int
+		we         int
+	}{
+		{true, 0, 1},
+		{true, electionTimeout - 1, electionTimeout},
+		{true, electionTimeout, 0},
+		{false, 0, 0},
+		{false, 1, 0},
+	}
+	for i, tt := range tests {
+		r := &raft{
+			id:              1,
+			raftLog:         newLog(),
+			prs:             make(map[int64]*progress),
+			electionTimeout: electionTimeout,
+			elapsed:         tt.e,
+		}
+		if tt.promotable {
+			r.prs[r.id] = &progress{}
+		}
+		r.tickElection()
+		if r.elapsed != tt.we {
+			t.Errorf("#%d: elapsed = %d, want %d", i, r.elapsed, tt.we)
+		}
+	}
+}
+
 func ents(terms ...int64) *raft {
 	ents := []pb.Entry{{}}
 	for _, term := range terms {