Pārlūkot izejas kodu

raft: stop tickElection when the node is not in peer list

This prevents the bug like this:
1. a node sends join to a cluster and succeeds
2. it starts with empty peers and waits for sync, but it have not
received anything
3. election timeout passes, and it promotes itself to leader
4. it commits some log entry
5. its log conflicts with the cluster's
Yicheng Qin 11 gadi atpakaļ
vecāks
revīzija
b07be74a82
2 mainītis faili ar 37 papildinājumiem un 0 dzēšanām
  1. 6 0
      raft/raft.go
  2. 31 0
      raft/raft_test.go

+ 6 - 0
raft/raft.go

@@ -265,6 +265,12 @@ func (r *raft) appendEntry(e pb.Entry) {
 
 // tickElection is ran by followers and candidates after r.electionTimeout.
 func (r *raft) tickElection() {
+	// promotable indicates whether state machine can be promoted to leader,
+	// which is true when its own id is in progress list.
+	if _, promotable := r.prs[r.id]; !promotable {
+		r.elapsed = 0
+		return
+	}
 	r.elapsed++
 	// TODO (xiangli): elctionTimeout should be randomized.
 	if r.elapsed > r.electionTimeout {

+ 31 - 0
raft/raft_test.go

@@ -1053,6 +1053,37 @@ func TestRemoveNode(t *testing.T) {
 	}
 }
 
+func TestTickElectionElapsed(t *testing.T) {
+	electionTimeout := 10
+	tests := []struct {
+		promotable bool
+		e          int
+		we         int
+	}{
+		{true, 0, 1},
+		{true, electionTimeout - 1, electionTimeout},
+		{true, electionTimeout, 0},
+		{false, 0, 0},
+		{false, 1, 0},
+	}
+	for i, tt := range tests {
+		r := &raft{
+			id:              1,
+			raftLog:         newLog(),
+			prs:             make(map[int64]*progress),
+			electionTimeout: electionTimeout,
+			elapsed:         tt.e,
+		}
+		if tt.promotable {
+			r.prs[r.id] = &progress{}
+		}
+		r.tickElection()
+		if r.elapsed != tt.we {
+			t.Errorf("#%d: elapsed = %d, want %d", i, r.elapsed, tt.we)
+		}
+	}
+}
+
 func ents(terms ...int64) *raft {
 	ents := []pb.Entry{{}}
 	for _, term := range terms {