Browse Source

raft: fall back to bad path when unreachable

Yicheng Qin 10 years ago
parent
commit
be0bf2a2bd
2 changed files with 17 additions and 3 deletions
  1. 12 2
      raft/raft.go
  2. 5 1
      raft/raft_test.go

+ 12 - 2
raft/raft.go

@@ -120,8 +120,18 @@ func (pr *Progress) waitSet(w int)       { pr.Wait = w }
 func (pr *Progress) waitReset()          { pr.Wait = 0 }
 func (pr *Progress) isUnreachable() bool { return pr.Unreachable }
 func (pr *Progress) reachable()          { pr.Unreachable = false }
-func (pr *Progress) unreachable()        { pr.Unreachable = true }
-func (pr *Progress) shouldWait() bool    { return (pr.Unreachable || pr.Match == 0) && pr.Wait > 0 }
+
+func (pr *Progress) unreachable() {
+	pr.Unreachable = true
+	// When in optimistic appending path, if the remote becomes unreachable,
+	// there is big probability that it loses MsgApp. Fall back to bad
+	// path to recover it steadily.
+	if pr.Match != 0 {
+		pr.Next = pr.Match + 1
+	}
+}
+
+func (pr *Progress) shouldWait() bool { return (pr.Unreachable || pr.Match == 0) && pr.Wait > 0 }
 
 func (pr *Progress) hasPendingSnapshot() bool    { return pr.PendingSnapshot != 0 }
 func (pr *Progress) setPendingSnapshot(i uint64) { pr.PendingSnapshot = i }

+ 5 - 1
raft/raft_test.go

@@ -1300,10 +1300,14 @@ func TestUnreachable(t *testing.T) {
 
 	// set node 2 to unreachable
 	r.prs[2].Match = 3
-	r.prs[2].Next = 4
+	r.prs[2].Next = 5
 	r.prs[2].Wait = 0
 	r.prs[2].unreachable()
 
+	if wnext := r.prs[2].Match + 1; r.prs[2].Next != wnext {
+		t.Errorf("next = %d, want %d", r.prs[2].Next, wnext)
+	}
+
 	for i := 0; i < 3; i++ {
 		// node 2 is unreachable, we expect that raft will only send out one msgAPP per heartbeat timeout
 		r.Step(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}})