election.go 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. // Copyright 2016 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package concurrency
  15. import (
  16. "context"
  17. "errors"
  18. "fmt"
  19. v3 "go.etcd.io/etcd/clientv3"
  20. pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
  21. "go.etcd.io/etcd/mvcc/mvccpb"
  22. )
  23. var (
  24. ErrElectionNotLeader = errors.New("election: not leader")
  25. ErrElectionNoLeader = errors.New("election: no leader")
  26. )
  27. type Election struct {
  28. session *Session
  29. keyPrefix string
  30. leaderKey string
  31. leaderRev int64
  32. leaderSession *Session
  33. hdr *pb.ResponseHeader
  34. }
  35. // NewElection returns a new election on a given key prefix.
  36. func NewElection(s *Session, pfx string) *Election {
  37. return &Election{session: s, keyPrefix: pfx + "/"}
  38. }
  39. // ResumeElection initializes an election with a known leader.
  40. func ResumeElection(s *Session, pfx string, leaderKey string, leaderRev int64) *Election {
  41. return &Election{
  42. keyPrefix: pfx,
  43. session: s,
  44. leaderKey: leaderKey,
  45. leaderRev: leaderRev,
  46. leaderSession: s,
  47. }
  48. }
  49. // Campaign puts a value as eligible for the election on the prefix
  50. // key.
  51. // Multiple sessions can participate in the election for the
  52. // same prefix, but only one can be the leader at a time.
  53. //
  54. // If the context is 'context.TODO()/context.Background()', the Campaign
  55. // will continue to be blocked for other keys to be deleted, unless server
  56. // returns a non-recoverable error (e.g. ErrCompacted).
  57. // Otherwise, until the context is not cancelled or timed-out, Campaign will
  58. // continue to be blocked until it becomes the leader.
  59. func (e *Election) Campaign(ctx context.Context, val string) error {
  60. s := e.session
  61. client := e.session.Client()
  62. k := fmt.Sprintf("%s%x", e.keyPrefix, s.Lease())
  63. txn := client.Txn(ctx).If(v3.Compare(v3.CreateRevision(k), "=", 0))
  64. txn = txn.Then(v3.OpPut(k, val, v3.WithLease(s.Lease())))
  65. txn = txn.Else(v3.OpGet(k))
  66. resp, err := txn.Commit()
  67. if err != nil {
  68. return err
  69. }
  70. e.leaderKey, e.leaderRev, e.leaderSession = k, resp.Header.Revision, s
  71. if !resp.Succeeded {
  72. kv := resp.Responses[0].GetResponseRange().Kvs[0]
  73. e.leaderRev = kv.CreateRevision
  74. if string(kv.Value) != val {
  75. if err = e.Proclaim(ctx, val); err != nil {
  76. e.Resign(ctx)
  77. return err
  78. }
  79. }
  80. }
  81. _, err = waitDeletes(ctx, client, e.keyPrefix, e.leaderRev-1)
  82. if err != nil {
  83. // clean up in case of context cancel
  84. select {
  85. case <-ctx.Done():
  86. e.Resign(client.Ctx())
  87. default:
  88. e.leaderSession = nil
  89. }
  90. return err
  91. }
  92. e.hdr = resp.Header
  93. return nil
  94. }
  95. // Proclaim lets the leader announce a new value without another election.
  96. func (e *Election) Proclaim(ctx context.Context, val string) error {
  97. if e.leaderSession == nil {
  98. return ErrElectionNotLeader
  99. }
  100. client := e.session.Client()
  101. cmp := v3.Compare(v3.CreateRevision(e.leaderKey), "=", e.leaderRev)
  102. txn := client.Txn(ctx).If(cmp)
  103. txn = txn.Then(v3.OpPut(e.leaderKey, val, v3.WithLease(e.leaderSession.Lease())))
  104. tresp, terr := txn.Commit()
  105. if terr != nil {
  106. return terr
  107. }
  108. if !tresp.Succeeded {
  109. e.leaderKey = ""
  110. return ErrElectionNotLeader
  111. }
  112. e.hdr = tresp.Header
  113. return nil
  114. }
  115. // Resign lets a leader start a new election.
  116. func (e *Election) Resign(ctx context.Context) (err error) {
  117. if e.leaderSession == nil {
  118. return nil
  119. }
  120. client := e.session.Client()
  121. cmp := v3.Compare(v3.CreateRevision(e.leaderKey), "=", e.leaderRev)
  122. resp, err := client.Txn(ctx).If(cmp).Then(v3.OpDelete(e.leaderKey)).Commit()
  123. if err == nil {
  124. e.hdr = resp.Header
  125. }
  126. e.leaderKey = ""
  127. e.leaderSession = nil
  128. return err
  129. }
  130. // Leader returns the leader value for the current election.
  131. func (e *Election) Leader(ctx context.Context) (*v3.GetResponse, error) {
  132. client := e.session.Client()
  133. resp, err := client.Get(ctx, e.keyPrefix, v3.WithFirstCreate()...)
  134. if err != nil {
  135. return nil, err
  136. } else if len(resp.Kvs) == 0 {
  137. // no leader currently elected
  138. return nil, ErrElectionNoLeader
  139. }
  140. return resp, nil
  141. }
  142. // Observe returns a channel that reliably observes ordered leader proposals
  143. // as GetResponse values on every current elected leader key. It will not
  144. // necessarily fetch all historical leader updates, but will always post the
  145. // most recent leader value.
  146. //
  147. // The channel closes when the context is canceled or the underlying watcher
  148. // is otherwise disrupted.
  149. func (e *Election) Observe(ctx context.Context) <-chan v3.GetResponse {
  150. retc := make(chan v3.GetResponse)
  151. go e.observe(ctx, retc)
  152. return retc
  153. }
  154. func (e *Election) observe(ctx context.Context, ch chan<- v3.GetResponse) {
  155. client := e.session.Client()
  156. defer close(ch)
  157. for {
  158. resp, err := client.Get(ctx, e.keyPrefix, v3.WithFirstCreate()...)
  159. if err != nil {
  160. return
  161. }
  162. var kv *mvccpb.KeyValue
  163. var hdr *pb.ResponseHeader
  164. if len(resp.Kvs) == 0 {
  165. cctx, cancel := context.WithCancel(ctx)
  166. // wait for first key put on prefix
  167. opts := []v3.OpOption{v3.WithRev(resp.Header.Revision), v3.WithPrefix()}
  168. wch := client.Watch(cctx, e.keyPrefix, opts...)
  169. for kv == nil {
  170. wr, ok := <-wch
  171. if !ok || wr.Err() != nil {
  172. cancel()
  173. return
  174. }
  175. // only accept puts; a delete will make observe() spin
  176. for _, ev := range wr.Events {
  177. if ev.Type == mvccpb.PUT {
  178. hdr, kv = &wr.Header, ev.Kv
  179. // may have multiple revs; hdr.rev = the last rev
  180. // set to kv's rev in case batch has multiple Puts
  181. hdr.Revision = kv.ModRevision
  182. break
  183. }
  184. }
  185. }
  186. cancel()
  187. } else {
  188. hdr, kv = resp.Header, resp.Kvs[0]
  189. }
  190. select {
  191. case ch <- v3.GetResponse{Header: hdr, Kvs: []*mvccpb.KeyValue{kv}}:
  192. case <-ctx.Done():
  193. return
  194. }
  195. cctx, cancel := context.WithCancel(ctx)
  196. wch := client.Watch(cctx, string(kv.Key), v3.WithRev(hdr.Revision+1))
  197. keyDeleted := false
  198. for !keyDeleted {
  199. wr, ok := <-wch
  200. if !ok {
  201. cancel()
  202. return
  203. }
  204. for _, ev := range wr.Events {
  205. if ev.Type == mvccpb.DELETE {
  206. keyDeleted = true
  207. break
  208. }
  209. resp.Header = &wr.Header
  210. resp.Kvs = []*mvccpb.KeyValue{ev.Kv}
  211. select {
  212. case ch <- *resp:
  213. case <-cctx.Done():
  214. cancel()
  215. return
  216. }
  217. }
  218. }
  219. cancel()
  220. }
  221. }
  222. // Key returns the leader key if elected, empty string otherwise.
  223. func (e *Election) Key() string { return e.leaderKey }
  224. // Rev returns the leader key's creation revision, if elected.
  225. func (e *Election) Rev() int64 { return e.leaderRev }
  226. // Header is the response header from the last successful election proposal.
  227. func (e *Election) Header() *pb.ResponseHeader { return e.hdr }