tracker.go 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. // Copyright 2019 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package tracker
  15. import (
  16. "fmt"
  17. "sort"
  18. "strings"
  19. "go.etcd.io/etcd/raft/quorum"
  20. pb "go.etcd.io/etcd/raft/raftpb"
  21. )
  22. // Config reflects the configuration tracked in a ProgressTracker.
  23. type Config struct {
  24. Voters quorum.JointConfig
  25. // AutoLeave is true if the configuration is joint and a transition to the
  26. // incoming configuration should be carried out automatically by Raft when
  27. // this is possible. If false, the configuration will be joint until the
  28. // application initiates the transition manually.
  29. AutoLeave bool
  30. // Learners is a set of IDs corresponding to the learners active in the
  31. // current configuration.
  32. //
  33. // Invariant: Learners and Voters does not intersect, i.e. if a peer is in
  34. // either half of the joint config, it can't be a learner; if it is a
  35. // learner it can't be in either half of the joint config. This invariant
  36. // simplifies the implementation since it allows peers to have clarity about
  37. // its current role without taking into account joint consensus.
  38. Learners map[uint64]struct{}
  39. // When we turn a voter into a learner during a joint consensus transition,
  40. // we cannot add the learner directly when entering the joint state. This is
  41. // because this would violate the invariant that the intersection of
  42. // voters and learners is empty. For example, assume a Voter is removed and
  43. // immediately re-added as a learner (or in other words, it is demoted):
  44. //
  45. // Initially, the configuration will be
  46. //
  47. // voters: {1 2 3}
  48. // learners: {}
  49. //
  50. // and we want to demote 3. Entering the joint configuration, we naively get
  51. //
  52. // voters: {1 2} & {1 2 3}
  53. // learners: {3}
  54. //
  55. // but this violates the invariant (3 is both voter and learner). Instead,
  56. // we get
  57. //
  58. // voters: {1 2} & {1 2 3}
  59. // learners: {}
  60. // next_learners: {3}
  61. //
  62. // Where 3 is now still purely a voter, but we are remembering the intention
  63. // to make it a learner upon transitioning into the final configuration:
  64. //
  65. // voters: {1 2}
  66. // learners: {3}
  67. // next_learners: {}
  68. //
  69. // Note that next_learners is not used while adding a learner that is not
  70. // also a voter in the joint config. In this case, the learner is added
  71. // right away when entering the joint configuration, so that it is caught up
  72. // as soon as possible.
  73. LearnersNext map[uint64]struct{}
  74. }
  75. func (c Config) String() string {
  76. var buf strings.Builder
  77. fmt.Fprintf(&buf, "voters=%s", c.Voters)
  78. if c.Learners != nil {
  79. fmt.Fprintf(&buf, " learners=%s", quorum.MajorityConfig(c.Learners).String())
  80. }
  81. if c.LearnersNext != nil {
  82. fmt.Fprintf(&buf, " learners_next=%s", quorum.MajorityConfig(c.LearnersNext).String())
  83. }
  84. if c.AutoLeave {
  85. fmt.Fprintf(&buf, " autoleave")
  86. }
  87. return buf.String()
  88. }
  89. // Clone returns a copy of the Config that shares no memory with the original.
  90. func (c *Config) Clone() Config {
  91. clone := func(m map[uint64]struct{}) map[uint64]struct{} {
  92. if m == nil {
  93. return nil
  94. }
  95. mm := make(map[uint64]struct{}, len(m))
  96. for k := range m {
  97. mm[k] = struct{}{}
  98. }
  99. return mm
  100. }
  101. return Config{
  102. Voters: quorum.JointConfig{clone(c.Voters[0]), clone(c.Voters[1])},
  103. Learners: clone(c.Learners),
  104. LearnersNext: clone(c.LearnersNext),
  105. }
  106. }
  107. // ProgressTracker tracks the currently active configuration and the information
  108. // known about the nodes and learners in it. In particular, it tracks the match
  109. // index for each peer which in turn allows reasoning about the committed index.
  110. type ProgressTracker struct {
  111. Config
  112. Progress ProgressMap
  113. Votes map[uint64]bool
  114. MaxInflight int
  115. }
  116. // MakeProgressTracker initializes a ProgressTracker.
  117. func MakeProgressTracker(maxInflight int) ProgressTracker {
  118. p := ProgressTracker{
  119. MaxInflight: maxInflight,
  120. Config: Config{
  121. Voters: quorum.JointConfig{
  122. quorum.MajorityConfig{},
  123. nil, // only populated when used
  124. },
  125. Learners: nil, // only populated when used
  126. LearnersNext: nil, // only populated when used
  127. },
  128. Votes: map[uint64]bool{},
  129. Progress: map[uint64]*Progress{},
  130. }
  131. return p
  132. }
  133. // ConfState returns a ConfState representing the active configuration.
  134. func (p *ProgressTracker) ConfState() pb.ConfState {
  135. return pb.ConfState{
  136. Voters: p.Voters[0].Slice(),
  137. VotersOutgoing: p.Voters[1].Slice(),
  138. Learners: quorum.MajorityConfig(p.Learners).Slice(),
  139. LearnersNext: quorum.MajorityConfig(p.LearnersNext).Slice(),
  140. AutoLeave: p.AutoLeave,
  141. }
  142. }
  143. // IsSingleton returns true if (and only if) there is only one voting member
  144. // (i.e. the leader) in the current configuration.
  145. func (p *ProgressTracker) IsSingleton() bool {
  146. return len(p.Voters[0]) == 1 && len(p.Voters[1]) == 0
  147. }
  148. type matchAckIndexer map[uint64]*Progress
  149. var _ quorum.AckedIndexer = matchAckIndexer(nil)
  150. // AckedIndex implements IndexLookuper.
  151. func (l matchAckIndexer) AckedIndex(id uint64) (quorum.Index, bool) {
  152. pr, ok := l[id]
  153. if !ok {
  154. return 0, false
  155. }
  156. return quorum.Index(pr.Match), true
  157. }
  158. // Committed returns the largest log index known to be committed based on what
  159. // the voting members of the group have acknowledged.
  160. func (p *ProgressTracker) Committed() uint64 {
  161. return uint64(p.Voters.CommittedIndex(matchAckIndexer(p.Progress)))
  162. }
  163. func insertionSort(sl []uint64) {
  164. a, b := 0, len(sl)
  165. for i := a + 1; i < b; i++ {
  166. for j := i; j > a && sl[j] < sl[j-1]; j-- {
  167. sl[j], sl[j-1] = sl[j-1], sl[j]
  168. }
  169. }
  170. }
  171. // Visit invokes the supplied closure for all tracked progresses in stable order.
  172. func (p *ProgressTracker) Visit(f func(id uint64, pr *Progress)) {
  173. n := len(p.Progress)
  174. // We need to sort the IDs and don't want to allocate since this is hot code.
  175. // The optimization here mirrors that in `(MajorityConfig).CommittedIndex`,
  176. // see there for details.
  177. var sl [7]uint64
  178. ids := sl[:]
  179. if len(sl) >= n {
  180. ids = sl[:n]
  181. } else {
  182. ids = make([]uint64, n)
  183. }
  184. for id := range p.Progress {
  185. n--
  186. ids[n] = id
  187. }
  188. insertionSort(ids)
  189. for _, id := range ids {
  190. f(id, p.Progress[id])
  191. }
  192. }
  193. // QuorumActive returns true if the quorum is active from the view of the local
  194. // raft state machine. Otherwise, it returns false.
  195. func (p *ProgressTracker) QuorumActive() bool {
  196. votes := map[uint64]bool{}
  197. p.Visit(func(id uint64, pr *Progress) {
  198. if pr.IsLearner {
  199. return
  200. }
  201. votes[id] = pr.RecentActive
  202. })
  203. return p.Voters.VoteResult(votes) == quorum.VoteWon
  204. }
  205. // VoterNodes returns a sorted slice of voters.
  206. func (p *ProgressTracker) VoterNodes() []uint64 {
  207. m := p.Voters.IDs()
  208. nodes := make([]uint64, 0, len(m))
  209. for id := range m {
  210. nodes = append(nodes, id)
  211. }
  212. sort.Slice(nodes, func(i, j int) bool { return nodes[i] < nodes[j] })
  213. return nodes
  214. }
  215. // LearnerNodes returns a sorted slice of learners.
  216. func (p *ProgressTracker) LearnerNodes() []uint64 {
  217. if len(p.Learners) == 0 {
  218. return nil
  219. }
  220. nodes := make([]uint64, 0, len(p.Learners))
  221. for id := range p.Learners {
  222. nodes = append(nodes, id)
  223. }
  224. sort.Slice(nodes, func(i, j int) bool { return nodes[i] < nodes[j] })
  225. return nodes
  226. }
  227. // ResetVotes prepares for a new round of vote counting via recordVote.
  228. func (p *ProgressTracker) ResetVotes() {
  229. p.Votes = map[uint64]bool{}
  230. }
  231. // RecordVote records that the node with the given id voted for this Raft
  232. // instance if v == true (and declined it otherwise).
  233. func (p *ProgressTracker) RecordVote(id uint64, v bool) {
  234. _, ok := p.Votes[id]
  235. if !ok {
  236. p.Votes[id] = v
  237. }
  238. }
  239. // TallyVotes returns the number of granted and rejected Votes, and whether the
  240. // election outcome is known.
  241. func (p *ProgressTracker) TallyVotes() (granted int, rejected int, _ quorum.VoteResult) {
  242. // Make sure to populate granted/rejected correctly even if the Votes slice
  243. // contains members no longer part of the configuration. This doesn't really
  244. // matter in the way the numbers are used (they're informational), but might
  245. // as well get it right.
  246. for id, pr := range p.Progress {
  247. if pr.IsLearner {
  248. continue
  249. }
  250. v, voted := p.Votes[id]
  251. if !voted {
  252. continue
  253. }
  254. if v {
  255. granted++
  256. } else {
  257. rejected++
  258. }
  259. }
  260. result := p.Voters.VoteResult(p.Votes)
  261. return granted, rejected, result
  262. }