|
@@ -55,6 +55,7 @@ const ElectionTimeoutThresholdPercent = 0.8
|
|
|
var NotLeaderError = errors.New("raft.Server: Not current leader")
|
|
var NotLeaderError = errors.New("raft.Server: Not current leader")
|
|
|
var DuplicatePeerError = errors.New("raft.Server: Duplicate peer")
|
|
var DuplicatePeerError = errors.New("raft.Server: Duplicate peer")
|
|
|
var CommandTimeoutError = errors.New("raft: Command timeout")
|
|
var CommandTimeoutError = errors.New("raft: Command timeout")
|
|
|
|
|
+var StopError = errors.New("raft: Has been stopped")
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//------------------------------------------------------------------------------
|
|
|
//
|
|
//
|
|
@@ -123,7 +124,7 @@ type server struct {
|
|
|
mutex sync.RWMutex
|
|
mutex sync.RWMutex
|
|
|
syncedPeer map[string]bool
|
|
syncedPeer map[string]bool
|
|
|
|
|
|
|
|
- stopped chan chan bool
|
|
|
|
|
|
|
+ stopped chan bool
|
|
|
c chan *ev
|
|
c chan *ev
|
|
|
electionTimeout time.Duration
|
|
electionTimeout time.Duration
|
|
|
heartbeatInterval time.Duration
|
|
heartbeatInterval time.Duration
|
|
@@ -140,6 +141,8 @@ type server struct {
|
|
|
maxLogEntriesPerRequest uint64
|
|
maxLogEntriesPerRequest uint64
|
|
|
|
|
|
|
|
connectionString string
|
|
connectionString string
|
|
|
|
|
+
|
|
|
|
|
+ routineGroup sync.WaitGroup
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// An internal event to be processed by the server's event loop.
|
|
// An internal event to be processed by the server's event loop.
|
|
@@ -177,7 +180,6 @@ func NewServer(name string, path string, transporter Transporter, stateMachine S
|
|
|
state: Stopped,
|
|
state: Stopped,
|
|
|
peers: make(map[string]*Peer),
|
|
peers: make(map[string]*Peer),
|
|
|
log: newLog(),
|
|
log: newLog(),
|
|
|
- stopped: make(chan chan bool),
|
|
|
|
|
c: make(chan *ev, 256),
|
|
c: make(chan *ev, 256),
|
|
|
electionTimeout: DefaultElectionTimeout,
|
|
electionTimeout: DefaultElectionTimeout,
|
|
|
heartbeatInterval: DefaultHeartbeatInterval,
|
|
heartbeatInterval: DefaultHeartbeatInterval,
|
|
@@ -440,6 +442,9 @@ func (s *server) Start() error {
|
|
|
return err
|
|
return err
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ // stopped needs to be allocated each time server starts
|
|
|
|
|
+ // because it is closed at `Stop`.
|
|
|
|
|
+ s.stopped = make(chan bool)
|
|
|
s.setState(Follower)
|
|
s.setState(Follower)
|
|
|
|
|
|
|
|
// If no log entries exist then
|
|
// If no log entries exist then
|
|
@@ -457,7 +462,11 @@ func (s *server) Start() error {
|
|
|
|
|
|
|
|
debugln(s.GetState())
|
|
debugln(s.GetState())
|
|
|
|
|
|
|
|
- go s.loop()
|
|
|
|
|
|
|
+ s.routineGroup.Add(1)
|
|
|
|
|
+ go func() {
|
|
|
|
|
+ defer s.routineGroup.Done()
|
|
|
|
|
+ s.loop()
|
|
|
|
|
+ }()
|
|
|
|
|
|
|
|
return nil
|
|
return nil
|
|
|
}
|
|
}
|
|
@@ -507,11 +516,11 @@ func (s *server) Stop() {
|
|
|
return
|
|
return
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- stop := make(chan bool)
|
|
|
|
|
- s.stopped <- stop
|
|
|
|
|
|
|
+ close(s.stopped)
|
|
|
|
|
+
|
|
|
|
|
+ // make sure all goroutines have stopped before we close the log
|
|
|
|
|
+ s.routineGroup.Wait()
|
|
|
|
|
|
|
|
- // make sure the server has stopped before we close the log
|
|
|
|
|
- <-stop
|
|
|
|
|
s.log.close()
|
|
s.log.close()
|
|
|
s.setState(Stopped)
|
|
s.setState(Stopped)
|
|
|
}
|
|
}
|
|
@@ -605,9 +614,17 @@ func (s *server) loop() {
|
|
|
// until the event is actually processed before returning.
|
|
// until the event is actually processed before returning.
|
|
|
func (s *server) send(value interface{}) (interface{}, error) {
|
|
func (s *server) send(value interface{}) (interface{}, error) {
|
|
|
event := &ev{target: value, c: make(chan error, 1)}
|
|
event := &ev{target: value, c: make(chan error, 1)}
|
|
|
- s.c <- event
|
|
|
|
|
- err := <-event.c
|
|
|
|
|
- return event.returnValue, err
|
|
|
|
|
|
|
+ select {
|
|
|
|
|
+ case s.c <- event:
|
|
|
|
|
+ case <-s.stopped:
|
|
|
|
|
+ return nil, StopError
|
|
|
|
|
+ }
|
|
|
|
|
+ select {
|
|
|
|
|
+ case <-s.stopped:
|
|
|
|
|
+ return nil, StopError
|
|
|
|
|
+ case err := <-event.c:
|
|
|
|
|
+ return event.returnValue, err
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
func (s *server) sendAsync(value interface{}) {
|
|
func (s *server) sendAsync(value interface{}) {
|
|
@@ -621,8 +638,13 @@ func (s *server) sendAsync(value interface{}) {
|
|
|
default:
|
|
default:
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ s.routineGroup.Add(1)
|
|
|
go func() {
|
|
go func() {
|
|
|
- s.c <- event
|
|
|
|
|
|
|
+ defer s.routineGroup.Done()
|
|
|
|
|
+ select {
|
|
|
|
|
+ case s.c <- event:
|
|
|
|
|
+ case <-s.stopped:
|
|
|
|
|
+ }
|
|
|
}()
|
|
}()
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -640,9 +662,8 @@ func (s *server) followerLoop() {
|
|
|
var err error
|
|
var err error
|
|
|
update := false
|
|
update := false
|
|
|
select {
|
|
select {
|
|
|
- case stop := <-s.stopped:
|
|
|
|
|
|
|
+ case <-s.stopped:
|
|
|
s.setState(Stopped)
|
|
s.setState(Stopped)
|
|
|
- stop <- true
|
|
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
case e := <-s.c:
|
|
case e := <-s.c:
|
|
@@ -717,7 +738,11 @@ func (s *server) candidateLoop() {
|
|
|
// Send RequestVote RPCs to all other servers.
|
|
// Send RequestVote RPCs to all other servers.
|
|
|
respChan = make(chan *RequestVoteResponse, len(s.peers))
|
|
respChan = make(chan *RequestVoteResponse, len(s.peers))
|
|
|
for _, peer := range s.peers {
|
|
for _, peer := range s.peers {
|
|
|
- go peer.sendVoteRequest(newRequestVoteRequest(s.currentTerm, s.name, lastLogIndex, lastLogTerm), respChan)
|
|
|
|
|
|
|
+ s.routineGroup.Add(1)
|
|
|
|
|
+ go func(peer *Peer) {
|
|
|
|
|
+ defer s.routineGroup.Done()
|
|
|
|
|
+ peer.sendVoteRequest(newRequestVoteRequest(s.currentTerm, s.name, lastLogIndex, lastLogTerm), respChan)
|
|
|
|
|
+ }(peer)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Wait for either:
|
|
// Wait for either:
|
|
@@ -740,9 +765,8 @@ func (s *server) candidateLoop() {
|
|
|
|
|
|
|
|
// Collect votes from peers.
|
|
// Collect votes from peers.
|
|
|
select {
|
|
select {
|
|
|
- case stop := <-s.stopped:
|
|
|
|
|
|
|
+ case <-s.stopped:
|
|
|
s.setState(Stopped)
|
|
s.setState(Stopped)
|
|
|
- stop <- true
|
|
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
case resp := <-respChan:
|
|
case resp := <-respChan:
|
|
@@ -786,19 +810,22 @@ func (s *server) leaderLoop() {
|
|
|
// "Upon election: send initial empty AppendEntries RPCs (heartbeat) to
|
|
// "Upon election: send initial empty AppendEntries RPCs (heartbeat) to
|
|
|
// each server; repeat during idle periods to prevent election timeouts
|
|
// each server; repeat during idle periods to prevent election timeouts
|
|
|
// (§5.2)". The heartbeats started above do the "idle" period work.
|
|
// (§5.2)". The heartbeats started above do the "idle" period work.
|
|
|
- go s.Do(NOPCommand{})
|
|
|
|
|
|
|
+ s.routineGroup.Add(1)
|
|
|
|
|
+ go func() {
|
|
|
|
|
+ defer s.routineGroup.Done()
|
|
|
|
|
+ s.Do(NOPCommand{})
|
|
|
|
|
+ }()
|
|
|
|
|
|
|
|
// Begin to collect response from followers
|
|
// Begin to collect response from followers
|
|
|
for s.State() == Leader {
|
|
for s.State() == Leader {
|
|
|
var err error
|
|
var err error
|
|
|
select {
|
|
select {
|
|
|
- case stop := <-s.stopped:
|
|
|
|
|
|
|
+ case <-s.stopped:
|
|
|
// Stop all peers before stop
|
|
// Stop all peers before stop
|
|
|
for _, peer := range s.peers {
|
|
for _, peer := range s.peers {
|
|
|
peer.stopHeartbeat(false)
|
|
peer.stopHeartbeat(false)
|
|
|
}
|
|
}
|
|
|
s.setState(Stopped)
|
|
s.setState(Stopped)
|
|
|
- stop <- true
|
|
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
case e := <-s.c:
|
|
case e := <-s.c:
|
|
@@ -826,9 +853,8 @@ func (s *server) snapshotLoop() {
|
|
|
for s.State() == Snapshotting {
|
|
for s.State() == Snapshotting {
|
|
|
var err error
|
|
var err error
|
|
|
select {
|
|
select {
|
|
|
- case stop := <-s.stopped:
|
|
|
|
|
|
|
+ case <-s.stopped:
|
|
|
s.setState(Stopped)
|
|
s.setState(Stopped)
|
|
|
- stop <- true
|
|
|
|
|
return
|
|
return
|
|
|
|
|
|
|
|
case e := <-s.c:
|
|
case e := <-s.c:
|
|
@@ -1109,7 +1135,11 @@ func (s *server) RemovePeer(name string) error {
|
|
|
// So we might be holding log lock and waiting for log lock,
|
|
// So we might be holding log lock and waiting for log lock,
|
|
|
// which lead to a deadlock.
|
|
// which lead to a deadlock.
|
|
|
// TODO(xiangli) refactor log lock
|
|
// TODO(xiangli) refactor log lock
|
|
|
- go peer.stopHeartbeat(true)
|
|
|
|
|
|
|
+ s.routineGroup.Add(1)
|
|
|
|
|
+ go func() {
|
|
|
|
|
+ defer s.routineGroup.Done()
|
|
|
|
|
+ peer.stopHeartbeat(true)
|
|
|
|
|
+ }()
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
delete(s.peers, name)
|
|
delete(s.peers, name)
|