|
@@ -60,6 +60,15 @@ type Progress struct {
|
|
|
// Unreachable will be unset if raft starts to receive message (msgAppResp,
|
|
// Unreachable will be unset if raft starts to receive message (msgAppResp,
|
|
|
// msgHeartbeatResp) from the remote peer of the Progress.
|
|
// msgHeartbeatResp) from the remote peer of the Progress.
|
|
|
Unreachable bool
|
|
Unreachable bool
|
|
|
|
|
+ // If there is a pending snapshot, the pendingSnapshot will be set to the
|
|
|
|
|
+ // index of the snapshot. If pendingSnapshot is set, the replication process of
|
|
|
|
|
+ // this Progress will be paused. raft will not resend snapshot until the pending one
|
|
|
|
|
+ // is reported to be failed.
|
|
|
|
|
+ //
|
|
|
|
|
+ // PendingSnapshot is set when raft sends out a snapshot to this Progress.
|
|
|
|
|
+ // PendingSnapshot is unset when the snapshot is reported to be successfully,
|
|
|
|
|
+ // or raft updates an equal or higher Match for this Progress.
|
|
|
|
|
+ PendingSnapshot uint64
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
func (pr *Progress) update(n uint64) {
|
|
func (pr *Progress) update(n uint64) {
|
|
@@ -114,6 +123,33 @@ func (pr *Progress) reachable() { pr.Unreachable = false }
|
|
|
func (pr *Progress) unreachable() { pr.Unreachable = true }
|
|
func (pr *Progress) unreachable() { pr.Unreachable = true }
|
|
|
func (pr *Progress) shouldWait() bool { return (pr.Unreachable || pr.Match == 0) && pr.Wait > 0 }
|
|
func (pr *Progress) shouldWait() bool { return (pr.Unreachable || pr.Match == 0) && pr.Wait > 0 }
|
|
|
|
|
|
|
|
|
|
+func (pr *Progress) hasPendingSnapshot() bool { return pr.PendingSnapshot != 0 }
|
|
|
|
|
+func (pr *Progress) setPendingSnapshot(i uint64) { pr.PendingSnapshot = i }
|
|
|
|
|
+
|
|
|
|
|
+// finishSnapshot unsets the pending snapshot and optimistically increase Next to
|
|
|
|
|
+// the index of pendingSnapshot + 1. The next replication message is expected
|
|
|
|
|
+// to be msgApp.
|
|
|
|
|
+func (pr *Progress) snapshotFinish() {
|
|
|
|
|
+ pr.Next = pr.PendingSnapshot + 1
|
|
|
|
|
+ pr.PendingSnapshot = 0
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// snapshotFail unsets the pending snapshot. The next replication message is expected
|
|
|
|
|
+// to be another msgSnap.
|
|
|
|
|
+func (pr *Progress) snapshotFail() {
|
|
|
|
|
+ pr.PendingSnapshot = 0
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// maybeSnapshotAbort unsets pendingSnapshot if Match is equal or higher than
|
|
|
|
|
+// the pendingSnapshot
|
|
|
|
|
+func (pr *Progress) maybeSnapshotAbort() bool {
|
|
|
|
|
+ if pr.hasPendingSnapshot() && pr.Match >= pr.PendingSnapshot {
|
|
|
|
|
+ pr.PendingSnapshot = 0
|
|
|
|
|
+ return true
|
|
|
|
|
+ }
|
|
|
|
|
+ return false
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
func (pr *Progress) String() string {
|
|
func (pr *Progress) String() string {
|
|
|
return fmt.Sprintf("next = %d, match = %d, wait = %v", pr.Next, pr.Match, pr.Wait)
|
|
return fmt.Sprintf("next = %d, match = %d, wait = %v", pr.Next, pr.Match, pr.Wait)
|
|
|
}
|
|
}
|
|
@@ -227,7 +263,7 @@ func (r *raft) send(m pb.Message) {
|
|
|
// sendAppend sends RRPC, with entries to the given peer.
|
|
// sendAppend sends RRPC, with entries to the given peer.
|
|
|
func (r *raft) sendAppend(to uint64) {
|
|
func (r *raft) sendAppend(to uint64) {
|
|
|
pr := r.prs[to]
|
|
pr := r.prs[to]
|
|
|
- if pr.shouldWait() {
|
|
|
|
|
|
|
+ if pr.shouldWait() || pr.hasPendingSnapshot() {
|
|
|
return
|
|
return
|
|
|
}
|
|
}
|
|
|
m := pb.Message{}
|
|
m := pb.Message{}
|
|
@@ -251,7 +287,8 @@ func (r *raft) sendAppend(to uint64) {
|
|
|
sindex, sterm := snapshot.Metadata.Index, snapshot.Metadata.Term
|
|
sindex, sterm := snapshot.Metadata.Index, snapshot.Metadata.Term
|
|
|
log.Printf("raft: %x [firstindex: %d, commit: %d] sent snapshot[index: %d, term: %d] to %x [%s]",
|
|
log.Printf("raft: %x [firstindex: %d, commit: %d] sent snapshot[index: %d, term: %d] to %x [%s]",
|
|
|
r.id, r.raftLog.firstIndex(), r.Commit, sindex, sterm, to, pr)
|
|
r.id, r.raftLog.firstIndex(), r.Commit, sindex, sterm, to, pr)
|
|
|
- pr.waitSet(r.electionTimeout)
|
|
|
|
|
|
|
+ pr.setPendingSnapshot(sindex)
|
|
|
|
|
+ log.Printf("raft: %x paused sending replication messages to %x [%s]", r.id, to, pr)
|
|
|
} else {
|
|
} else {
|
|
|
m.Type = pb.MsgApp
|
|
m.Type = pb.MsgApp
|
|
|
m.Index = pr.Next - 1
|
|
m.Index = pr.Next - 1
|
|
@@ -509,6 +546,9 @@ func stepLeader(r *raft, m pb.Message) {
|
|
|
} else {
|
|
} else {
|
|
|
oldWait := pr.shouldWait()
|
|
oldWait := pr.shouldWait()
|
|
|
pr.update(m.Index)
|
|
pr.update(m.Index)
|
|
|
|
|
+ if r.prs[m.From].maybeSnapshotAbort() {
|
|
|
|
|
+ log.Printf("raft: %x snapshot aborted, resumed sending replication messages to %x [%s]", r.id, m.From, pr)
|
|
|
|
|
+ }
|
|
|
if r.maybeCommit() {
|
|
if r.maybeCommit() {
|
|
|
r.bcastAppend()
|
|
r.bcastAppend()
|
|
|
} else if oldWait {
|
|
} else if oldWait {
|
|
@@ -526,6 +566,20 @@ func stepLeader(r *raft, m pb.Message) {
|
|
|
log.Printf("raft: %x [logterm: %d, index: %d, vote: %x] rejected vote from %x [logterm: %d, index: %d] at term %d",
|
|
log.Printf("raft: %x [logterm: %d, index: %d, vote: %x] rejected vote from %x [logterm: %d, index: %d] at term %d",
|
|
|
r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.From, m.LogTerm, m.Index, r.Term)
|
|
r.id, r.raftLog.lastTerm(), r.raftLog.lastIndex(), r.Vote, m.From, m.LogTerm, m.Index, r.Term)
|
|
|
r.send(pb.Message{To: m.From, Type: pb.MsgVoteResp, Reject: true})
|
|
r.send(pb.Message{To: m.From, Type: pb.MsgVoteResp, Reject: true})
|
|
|
|
|
+ case pb.MsgSnapStatus:
|
|
|
|
|
+ if !pr.hasPendingSnapshot() {
|
|
|
|
|
+ return
|
|
|
|
|
+ }
|
|
|
|
|
+ if m.Reject {
|
|
|
|
|
+ pr.snapshotFail()
|
|
|
|
|
+ log.Printf("raft: %x snapshot failed, resumed sending replication messages to %x [%s]", r.id, m.From, pr)
|
|
|
|
|
+ } else {
|
|
|
|
|
+ pr.snapshotFinish()
|
|
|
|
|
+ log.Printf("raft: %x snapshot succeeded resumed sending replication messages to %x [%s]", r.id, m.From, pr)
|
|
|
|
|
+ // wait for the msgAppResp from the remote node before sending
|
|
|
|
|
+ // out the next msgApp
|
|
|
|
|
+ pr.waitSet(r.electionTimeout)
|
|
|
|
|
+ }
|
|
|
case pb.MsgUnreachable:
|
|
case pb.MsgUnreachable:
|
|
|
r.prs[m.From].unreachable()
|
|
r.prs[m.From].unreachable()
|
|
|
}
|
|
}
|