raft.go 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. package raft
  2. import (
  3. "errors"
  4. "sort"
  5. )
  6. const none = -1
  7. type messageType int
  8. const (
  9. msgHup messageType = iota
  10. msgBeat
  11. msgProp
  12. msgApp
  13. msgAppResp
  14. msgVote
  15. msgVoteResp
  16. )
  17. var mtmap = [...]string{
  18. msgHup: "msgHup",
  19. msgBeat: "msgBeat",
  20. msgProp: "msgProp",
  21. msgApp: "msgApp",
  22. msgAppResp: "msgAppResp",
  23. msgVote: "msgVote",
  24. msgVoteResp: "msgVoteResp",
  25. }
  26. func (mt messageType) String() string {
  27. return mtmap[int(mt)]
  28. }
  29. var errNoLeader = errors.New("no leader")
  30. const (
  31. stateFollower stateType = iota
  32. stateCandidate
  33. stateLeader
  34. )
  35. type stateType int
  36. var stmap = [...]string{
  37. stateFollower: "stateFollower",
  38. stateCandidate: "stateCandidate",
  39. stateLeader: "stateLeader",
  40. }
  41. func (st stateType) String() string {
  42. return stmap[int(st)]
  43. }
  44. type Message struct {
  45. Type messageType
  46. To int
  47. From int
  48. Term int
  49. LogTerm int
  50. Index int
  51. PrevTerm int
  52. Entries []Entry
  53. Commit int
  54. Data []byte
  55. }
  56. type index struct {
  57. match, next int
  58. }
  59. func (in *index) update(n int) {
  60. in.match = n
  61. in.next = n + 1
  62. }
  63. func (in *index) decr() {
  64. if in.next--; in.next < 1 {
  65. in.next = 1
  66. }
  67. }
  68. type stateMachine struct {
  69. // k is the number of peers
  70. k int
  71. // addr is an integer representation of our address amoungst our peers. It is 0 <= addr < k.
  72. addr int
  73. // the term we are participating in at any time
  74. term int
  75. // who we voted for in term
  76. vote int
  77. // the log
  78. log *log
  79. ins []index
  80. state stateType
  81. votes map[int]bool
  82. msgs []Message
  83. // the leader addr
  84. lead int
  85. }
  86. func newStateMachine(k, addr int) *stateMachine {
  87. sm := &stateMachine{k: k, addr: addr, log: newLog()}
  88. sm.reset()
  89. return sm
  90. }
  91. func (sm *stateMachine) canStep(m Message) bool {
  92. if m.Type == msgProp {
  93. return sm.lead != none
  94. }
  95. return true
  96. }
  97. func (sm *stateMachine) poll(addr int, v bool) (granted int) {
  98. if _, ok := sm.votes[addr]; !ok {
  99. sm.votes[addr] = v
  100. }
  101. for _, vv := range sm.votes {
  102. if vv {
  103. granted++
  104. }
  105. }
  106. return granted
  107. }
  108. // send persists state to stable storage and then sends to its mailbox.
  109. func (sm *stateMachine) send(m Message) {
  110. m.From = sm.addr
  111. m.Term = sm.term
  112. sm.msgs = append(sm.msgs, m)
  113. }
  114. // sendAppend sends RRPC, with entries to the given peer.
  115. func (sm *stateMachine) sendAppend(to int) {
  116. in := sm.ins[to]
  117. m := Message{}
  118. m.Type = msgApp
  119. m.To = to
  120. m.Index = in.next - 1
  121. m.LogTerm = sm.log.term(in.next - 1)
  122. m.Entries = sm.log.entries(in.next)
  123. m.Commit = sm.log.committed
  124. sm.send(m)
  125. }
  126. // bcastAppend sends RRPC, with entries to all peers that are not up-to-date according to sm.mis.
  127. func (sm *stateMachine) bcastAppend() {
  128. for i := 0; i < sm.k; i++ {
  129. if i == sm.addr {
  130. continue
  131. }
  132. sm.sendAppend(i)
  133. }
  134. }
  135. func (sm *stateMachine) maybeCommit() bool {
  136. // TODO(bmizerany): optimize.. Currently naive
  137. mis := make([]int, len(sm.ins))
  138. for i := range mis {
  139. mis[i] = sm.ins[i].match
  140. }
  141. sort.Sort(sort.Reverse(sort.IntSlice(mis)))
  142. mci := mis[sm.q()-1]
  143. return sm.log.maybeCommit(mci, sm.term)
  144. }
  145. // nextEnts returns the appliable entries and updates the applied index
  146. func (sm *stateMachine) nextEnts() (ents []Entry) {
  147. return sm.log.nextEnts()
  148. }
  149. func (sm *stateMachine) reset() {
  150. sm.lead = none
  151. sm.vote = none
  152. sm.votes = make(map[int]bool)
  153. sm.ins = make([]index, sm.k)
  154. for i := range sm.ins {
  155. sm.ins[i] = index{next: sm.log.lastIndex() + 1}
  156. if i == sm.addr {
  157. sm.ins[i].match = sm.log.lastIndex()
  158. }
  159. }
  160. }
  161. func (sm *stateMachine) q() int {
  162. return sm.k/2 + 1
  163. }
  164. func (sm *stateMachine) becomeFollower(term, lead int) {
  165. sm.reset()
  166. sm.term = term
  167. sm.lead = lead
  168. sm.state = stateFollower
  169. }
  170. func (sm *stateMachine) becomeCandidate() {
  171. // TODO(xiangli) remove the panic when the raft implementation is stable
  172. if sm.state == stateLeader {
  173. panic("invalid transition [leader -> candidate]")
  174. }
  175. sm.reset()
  176. sm.term++
  177. sm.vote = sm.addr
  178. sm.state = stateCandidate
  179. }
  180. func (sm *stateMachine) becomeLeader() {
  181. // TODO(xiangli) remove the panic when the raft implementation is stable
  182. if sm.state == stateFollower {
  183. panic("invalid transition [follower -> leader]")
  184. }
  185. sm.reset()
  186. sm.lead = sm.addr
  187. sm.state = stateLeader
  188. }
  189. func (sm *stateMachine) Msgs() []Message {
  190. msgs := sm.msgs
  191. sm.msgs = make([]Message, 0)
  192. return msgs
  193. }
  194. func (sm *stateMachine) Step(m Message) {
  195. switch m.Type {
  196. case msgHup:
  197. sm.becomeCandidate()
  198. if sm.q() == sm.poll(sm.addr, true) {
  199. sm.becomeLeader()
  200. return
  201. }
  202. for i := 0; i < sm.k; i++ {
  203. if i == sm.addr {
  204. continue
  205. }
  206. lasti := sm.log.lastIndex()
  207. sm.send(Message{To: i, Type: msgVote, Index: lasti, LogTerm: sm.log.term(lasti)})
  208. }
  209. return
  210. case msgBeat:
  211. if sm.state != stateLeader {
  212. return
  213. }
  214. sm.bcastAppend()
  215. case msgProp:
  216. switch sm.lead {
  217. case sm.addr:
  218. sm.log.append(sm.log.lastIndex(), Entry{Term: sm.term, Data: m.Data})
  219. sm.ins[sm.addr].update(sm.log.lastIndex())
  220. sm.log.maybeCommit(sm.log.lastIndex(), sm.term)
  221. sm.bcastAppend()
  222. case none:
  223. panic("msgProp given without leader")
  224. default:
  225. m.To = sm.lead
  226. sm.send(m)
  227. }
  228. return
  229. }
  230. switch {
  231. case m.Term > sm.term:
  232. sm.becomeFollower(m.Term, m.From)
  233. case m.Term < sm.term:
  234. // ignore
  235. return
  236. }
  237. handleAppendEntries := func() {
  238. if sm.log.maybeAppend(m.Index, m.LogTerm, m.Commit, m.Entries...) {
  239. sm.send(Message{To: m.From, Type: msgAppResp, Index: sm.log.lastIndex()})
  240. } else {
  241. sm.send(Message{To: m.From, Type: msgAppResp, Index: -1})
  242. }
  243. }
  244. switch sm.state {
  245. case stateLeader:
  246. switch m.Type {
  247. case msgAppResp:
  248. if m.Index < 0 {
  249. sm.ins[m.From].decr()
  250. sm.sendAppend(m.From)
  251. } else {
  252. sm.ins[m.From].update(m.Index)
  253. if sm.maybeCommit() {
  254. sm.bcastAppend()
  255. }
  256. }
  257. case msgVote:
  258. sm.send(Message{To: m.From, Type: msgVoteResp, Index: -1})
  259. }
  260. case stateCandidate:
  261. switch m.Type {
  262. case msgApp:
  263. sm.becomeFollower(sm.term, m.From)
  264. handleAppendEntries()
  265. case msgVote:
  266. sm.send(Message{To: m.From, Type: msgVoteResp, Index: -1})
  267. case msgVoteResp:
  268. gr := sm.poll(m.From, m.Index >= 0)
  269. switch sm.q() {
  270. case gr:
  271. sm.becomeLeader()
  272. sm.bcastAppend()
  273. case len(sm.votes) - gr:
  274. sm.becomeFollower(sm.term, none)
  275. }
  276. }
  277. case stateFollower:
  278. switch m.Type {
  279. case msgApp:
  280. handleAppendEntries()
  281. case msgVote:
  282. if (sm.vote == none || sm.vote == m.From) && sm.log.isUpToDate(m.Index, m.LogTerm) {
  283. sm.vote = m.From
  284. sm.send(Message{To: m.From, Type: msgVoteResp, Index: sm.log.lastIndex()})
  285. } else {
  286. sm.send(Message{To: m.From, Type: msgVoteResp, Index: -1})
  287. }
  288. }
  289. }
  290. }