raft.go 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. package raft
  2. import (
  3. "errors"
  4. "sort"
  5. )
  6. const none = -1
  7. type messageType int
  8. const (
  9. msgHup messageType = iota
  10. msgBeat
  11. msgProp
  12. msgApp
  13. msgAppResp
  14. msgVote
  15. msgVoteResp
  16. )
  17. var mtmap = [...]string{
  18. msgHup: "msgHup",
  19. msgBeat: "msgBeat",
  20. msgProp: "msgProp",
  21. msgApp: "msgApp",
  22. msgAppResp: "msgAppResp",
  23. msgVote: "msgVote",
  24. msgVoteResp: "msgVoteResp",
  25. }
  26. func (mt messageType) String() string {
  27. return mtmap[int(mt)]
  28. }
  29. var errNoLeader = errors.New("no leader")
  30. const (
  31. stateFollower stateType = iota
  32. stateCandidate
  33. stateLeader
  34. )
  35. type stateType int
  36. var stmap = [...]string{
  37. stateFollower: "stateFollower",
  38. stateCandidate: "stateCandidate",
  39. stateLeader: "stateLeader",
  40. }
  41. func (st stateType) String() string {
  42. return stmap[int(st)]
  43. }
  44. type Message struct {
  45. Type messageType
  46. To int
  47. From int
  48. Term int
  49. LogTerm int
  50. Index int
  51. PrevTerm int
  52. Entries []Entry
  53. Commit int
  54. }
  55. type index struct {
  56. match, next int
  57. }
  58. func (in *index) update(n int) {
  59. in.match = n
  60. in.next = n + 1
  61. }
  62. func (in *index) decr() {
  63. if in.next--; in.next < 1 {
  64. in.next = 1
  65. }
  66. }
  67. type stateMachine struct {
  68. addr int
  69. // the term we are participating in at any time
  70. term int
  71. // who we voted for in term
  72. vote int
  73. // the log
  74. log *log
  75. ins map[int]*index
  76. state stateType
  77. votes map[int]bool
  78. msgs []Message
  79. // the leader addr
  80. lead int
  81. // pending reconfiguration
  82. pendingConf bool
  83. }
  84. func newStateMachine(addr int, peer []int) *stateMachine {
  85. sm := &stateMachine{addr: addr, log: newLog(), ins: make(map[int]*index)}
  86. for p := range peer {
  87. sm.ins[p] = &index{}
  88. }
  89. sm.reset()
  90. return sm
  91. }
  92. func (sm *stateMachine) canStep(m Message) bool {
  93. if m.Type == msgProp {
  94. return sm.lead != none
  95. }
  96. return true
  97. }
  98. func (sm *stateMachine) poll(addr int, v bool) (granted int) {
  99. if _, ok := sm.votes[addr]; !ok {
  100. sm.votes[addr] = v
  101. }
  102. for _, vv := range sm.votes {
  103. if vv {
  104. granted++
  105. }
  106. }
  107. return granted
  108. }
  109. // send persists state to stable storage and then sends to its mailbox.
  110. func (sm *stateMachine) send(m Message) {
  111. m.From = sm.addr
  112. m.Term = sm.term
  113. sm.msgs = append(sm.msgs, m)
  114. }
  115. // sendAppend sends RRPC, with entries to the given peer.
  116. func (sm *stateMachine) sendAppend(to int) {
  117. in := sm.ins[to]
  118. m := Message{}
  119. m.Type = msgApp
  120. m.To = to
  121. m.Index = in.next - 1
  122. m.LogTerm = sm.log.term(in.next - 1)
  123. m.Entries = sm.log.entries(in.next)
  124. m.Commit = sm.log.committed
  125. sm.send(m)
  126. }
  127. // bcastAppend sends RRPC, with entries to all peers that are not up-to-date according to sm.mis.
  128. func (sm *stateMachine) bcastAppend() {
  129. for i := range sm.ins {
  130. if i == sm.addr {
  131. continue
  132. }
  133. sm.sendAppend(i)
  134. }
  135. }
  136. func (sm *stateMachine) maybeCommit() bool {
  137. // TODO(bmizerany): optimize.. Currently naive
  138. mis := make([]int, len(sm.ins))
  139. for i := range mis {
  140. mis[i] = sm.ins[i].match
  141. }
  142. sort.Sort(sort.Reverse(sort.IntSlice(mis)))
  143. mci := mis[sm.q()-1]
  144. return sm.log.maybeCommit(mci, sm.term)
  145. }
  146. // nextEnts returns the appliable entries and updates the applied index
  147. func (sm *stateMachine) nextEnts() (ents []Entry) {
  148. return sm.log.nextEnts()
  149. }
  150. func (sm *stateMachine) reset() {
  151. sm.lead = none
  152. sm.vote = none
  153. sm.votes = make(map[int]bool)
  154. for i := range sm.ins {
  155. sm.ins[i] = &index{next: sm.log.lastIndex() + 1}
  156. if i == sm.addr {
  157. sm.ins[i].match = sm.log.lastIndex()
  158. }
  159. }
  160. }
  161. func (sm *stateMachine) q() int {
  162. return len(sm.ins)/2 + 1
  163. }
  164. func (sm *stateMachine) becomeFollower(term, lead int) {
  165. sm.reset()
  166. sm.term = term
  167. sm.lead = lead
  168. sm.state = stateFollower
  169. }
  170. func (sm *stateMachine) becomeCandidate() {
  171. // TODO(xiangli) remove the panic when the raft implementation is stable
  172. if sm.state == stateLeader {
  173. panic("invalid transition [leader -> candidate]")
  174. }
  175. sm.reset()
  176. sm.term++
  177. sm.vote = sm.addr
  178. sm.state = stateCandidate
  179. }
  180. func (sm *stateMachine) becomeLeader() {
  181. // TODO(xiangli) remove the panic when the raft implementation is stable
  182. if sm.state == stateFollower {
  183. panic("invalid transition [follower -> leader]")
  184. }
  185. sm.reset()
  186. sm.lead = sm.addr
  187. sm.state = stateLeader
  188. }
  189. func (sm *stateMachine) Msgs() []Message {
  190. msgs := sm.msgs
  191. sm.msgs = make([]Message, 0)
  192. return msgs
  193. }
  194. func (sm *stateMachine) Step(m Message) {
  195. switch m.Type {
  196. case msgHup:
  197. sm.becomeCandidate()
  198. if sm.q() == sm.poll(sm.addr, true) {
  199. sm.becomeLeader()
  200. return
  201. }
  202. for i := range sm.ins {
  203. if i == sm.addr {
  204. continue
  205. }
  206. lasti := sm.log.lastIndex()
  207. sm.send(Message{To: i, Type: msgVote, Index: lasti, LogTerm: sm.log.term(lasti)})
  208. }
  209. return
  210. case msgBeat:
  211. if sm.state != stateLeader {
  212. return
  213. }
  214. sm.bcastAppend()
  215. return
  216. case msgProp:
  217. if len(m.Entries) != 1 {
  218. panic("unexpected length(entries) of a msgProp")
  219. }
  220. switch sm.lead {
  221. case sm.addr:
  222. e := m.Entries[0]
  223. if e.Type == config {
  224. if sm.pendingConf {
  225. // todo: deny
  226. return
  227. }
  228. sm.pendingConf = true
  229. }
  230. e.Term = sm.term
  231. sm.log.append(sm.log.lastIndex(), e)
  232. sm.ins[sm.addr].update(sm.log.lastIndex())
  233. sm.maybeCommit()
  234. sm.bcastAppend()
  235. case none:
  236. panic("msgProp given without leader")
  237. default:
  238. m.To = sm.lead
  239. sm.send(m)
  240. }
  241. return
  242. }
  243. switch {
  244. case m.Term > sm.term:
  245. sm.becomeFollower(m.Term, m.From)
  246. case m.Term < sm.term:
  247. // ignore
  248. return
  249. }
  250. handleAppendEntries := func() {
  251. if sm.log.maybeAppend(m.Index, m.LogTerm, m.Commit, m.Entries...) {
  252. sm.send(Message{To: m.From, Type: msgAppResp, Index: sm.log.lastIndex()})
  253. } else {
  254. sm.send(Message{To: m.From, Type: msgAppResp, Index: -1})
  255. }
  256. }
  257. switch sm.state {
  258. case stateLeader:
  259. switch m.Type {
  260. case msgAppResp:
  261. if m.Index < 0 {
  262. sm.ins[m.From].decr()
  263. sm.sendAppend(m.From)
  264. } else {
  265. sm.ins[m.From].update(m.Index)
  266. if sm.maybeCommit() {
  267. sm.bcastAppend()
  268. }
  269. }
  270. case msgVote:
  271. sm.send(Message{To: m.From, Type: msgVoteResp, Index: -1})
  272. }
  273. case stateCandidate:
  274. switch m.Type {
  275. case msgApp:
  276. sm.becomeFollower(sm.term, m.From)
  277. handleAppendEntries()
  278. case msgVote:
  279. sm.send(Message{To: m.From, Type: msgVoteResp, Index: -1})
  280. case msgVoteResp:
  281. gr := sm.poll(m.From, m.Index >= 0)
  282. switch sm.q() {
  283. case gr:
  284. sm.becomeLeader()
  285. sm.bcastAppend()
  286. case len(sm.votes) - gr:
  287. sm.becomeFollower(sm.term, none)
  288. }
  289. }
  290. case stateFollower:
  291. switch m.Type {
  292. case msgApp:
  293. handleAppendEntries()
  294. case msgVote:
  295. if (sm.vote == none || sm.vote == m.From) && sm.log.isUpToDate(m.Index, m.LogTerm) {
  296. sm.vote = m.From
  297. sm.send(Message{To: m.From, Type: msgVoteResp, Index: sm.log.lastIndex()})
  298. } else {
  299. sm.send(Message{To: m.From, Type: msgVoteResp, Index: -1})
  300. }
  301. }
  302. }
  303. }