ソースを参照

raft: make tick unblock and fix potential live lock

Xiang Li 9 年 前
コミット
848f539536
2 ファイル変更15 行追加4 行削除
  1. 13 4
      raft/node.go
  2. 2 0
      raft/node_test.go

+ 13 - 4
raft/node.go

@@ -200,6 +200,7 @@ func StartNode(c *Config, peers []Peer) Node {
 	}
 
 	n := newNode()
+	n.logger = c.Logger
 	go n.run(r)
 	return &n
 }
@@ -212,6 +213,7 @@ func RestartNode(c *Config) Node {
 	r := newRaft(c)
 
 	n := newNode()
+	n.logger = c.Logger
 	go n.run(r)
 	return &n
 }
@@ -228,6 +230,8 @@ type node struct {
 	done       chan struct{}
 	stop       chan struct{}
 	status     chan chan Status
+
+	logger Logger
 }
 
 func newNode() node {
@@ -238,10 +242,13 @@ func newNode() node {
 		confstatec: make(chan pb.ConfState),
 		readyc:     make(chan Ready),
 		advancec:   make(chan struct{}),
-		tickc:      make(chan struct{}),
-		done:       make(chan struct{}),
-		stop:       make(chan struct{}),
-		status:     make(chan chan Status),
+		// make tickc a buffered chan, so raft node can buffer some ticks when the node
+		// is busy processing raft messages. Raft node will resume process buffered
+		// ticks when it becomes idle.
+		tickc:  make(chan struct{}, 128),
+		done:   make(chan struct{}),
+		stop:   make(chan struct{}),
+		status: make(chan chan Status),
 	}
 }
 
@@ -381,6 +388,8 @@ func (n *node) Tick() {
 	select {
 	case n.tickc <- struct{}{}:
 	case <-n.done:
+	default:
+		n.logger.Warningf("A tick missed to fire. Node blocks too long!")
 	}
 }
 

+ 2 - 0
raft/node_test.go

@@ -226,6 +226,7 @@ func TestNodeTick(t *testing.T) {
 	go n.run(r)
 	elapsed := r.electionElapsed
 	n.Tick()
+	testutil.WaitSchedule()
 	n.Stop()
 	if r.electionElapsed != elapsed+1 {
 		t.Errorf("elapsed = %d, want %d", r.electionElapsed, elapsed+1)
@@ -247,6 +248,7 @@ func TestNodeStop(t *testing.T) {
 
 	elapsed := r.electionElapsed
 	n.Tick()
+	testutil.WaitSchedule()
 	n.Stop()
 
 	select {