소스 검색

raft: stop tickElection when the node is not in peer list

This prevents the bug like this:
1. a node sends join to a cluster and succeeds
2. it starts with empty peers and waits for sync, but it have not
received anything
3. election timeout passes, and it promotes itself to leader
4. it commits some log entry
5. its log conflicts with the cluster's
Yicheng Qin 11 년 전
부모
커밋
b07be74a82
2개의 변경된 파일37개의 추가작업 그리고 0개의 파일을 삭제
  1. 6 0
      raft/raft.go
  2. 31 0
      raft/raft_test.go

+ 6 - 0
raft/raft.go

@@ -265,6 +265,12 @@ func (r *raft) appendEntry(e pb.Entry) {
 
 // tickElection is ran by followers and candidates after r.electionTimeout.
 func (r *raft) tickElection() {
+	// promotable indicates whether state machine can be promoted to leader,
+	// which is true when its own id is in progress list.
+	if _, promotable := r.prs[r.id]; !promotable {
+		r.elapsed = 0
+		return
+	}
 	r.elapsed++
 	// TODO (xiangli): elctionTimeout should be randomized.
 	if r.elapsed > r.electionTimeout {

+ 31 - 0
raft/raft_test.go

@@ -1053,6 +1053,37 @@ func TestRemoveNode(t *testing.T) {
 	}
 }
 
+func TestTickElectionElapsed(t *testing.T) {
+	electionTimeout := 10
+	tests := []struct {
+		promotable bool
+		e          int
+		we         int
+	}{
+		{true, 0, 1},
+		{true, electionTimeout - 1, electionTimeout},
+		{true, electionTimeout, 0},
+		{false, 0, 0},
+		{false, 1, 0},
+	}
+	for i, tt := range tests {
+		r := &raft{
+			id:              1,
+			raftLog:         newLog(),
+			prs:             make(map[int64]*progress),
+			electionTimeout: electionTimeout,
+			elapsed:         tt.e,
+		}
+		if tt.promotable {
+			r.prs[r.id] = &progress{}
+		}
+		r.tickElection()
+		if r.elapsed != tt.we {
+			t.Errorf("#%d: elapsed = %d, want %d", i, r.elapsed, tt.we)
+		}
+	}
+}
+
 func ents(terms ...int64) *raft {
 	ents := []pb.Entry{{}}
 	for _, term := range terms {