cluster.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package membership
  15. import (
  16. "bytes"
  17. "context"
  18. "crypto/sha1"
  19. "encoding/binary"
  20. "encoding/json"
  21. "fmt"
  22. "path"
  23. "sort"
  24. "strings"
  25. "sync"
  26. "time"
  27. "go.etcd.io/etcd/v3/etcdserver/api/v2store"
  28. "go.etcd.io/etcd/v3/mvcc/backend"
  29. "go.etcd.io/etcd/v3/pkg/netutil"
  30. "go.etcd.io/etcd/v3/pkg/types"
  31. "go.etcd.io/etcd/v3/raft"
  32. "go.etcd.io/etcd/v3/raft/raftpb"
  33. "go.etcd.io/etcd/v3/version"
  34. "github.com/coreos/go-semver/semver"
  35. "github.com/prometheus/client_golang/prometheus"
  36. "go.uber.org/zap"
  37. )
  38. // RaftCluster is a list of Members that belong to the same raft cluster
  39. type RaftCluster struct {
  40. lg *zap.Logger
  41. localID types.ID
  42. cid types.ID
  43. token string
  44. v2store v2store.Store
  45. be backend.Backend
  46. sync.Mutex // guards the fields below
  47. version *semver.Version
  48. members map[types.ID]*Member
  49. // removed contains the ids of removed members in the cluster.
  50. // removed id cannot be reused.
  51. removed map[types.ID]bool
  52. }
  53. // ConfigChangeContext represents a context for confChange.
  54. type ConfigChangeContext struct {
  55. Member
  56. // IsPromote indicates if the config change is for promoting a learner member.
  57. // This flag is needed because both adding a new member and promoting a learner member
  58. // uses the same config change type 'ConfChangeAddNode'.
  59. IsPromote bool `json:"isPromote"`
  60. }
  61. // NewClusterFromURLsMap creates a new raft cluster using provided urls map. Currently, it does not support creating
  62. // cluster with raft learner member.
  63. func NewClusterFromURLsMap(lg *zap.Logger, token string, urlsmap types.URLsMap) (*RaftCluster, error) {
  64. c := NewCluster(lg, token)
  65. for name, urls := range urlsmap {
  66. m := NewMember(name, urls, token, nil)
  67. if _, ok := c.members[m.ID]; ok {
  68. return nil, fmt.Errorf("member exists with identical ID %v", m)
  69. }
  70. if uint64(m.ID) == raft.None {
  71. return nil, fmt.Errorf("cannot use %x as member id", raft.None)
  72. }
  73. c.members[m.ID] = m
  74. }
  75. c.genID()
  76. return c, nil
  77. }
  78. func NewClusterFromMembers(lg *zap.Logger, token string, id types.ID, membs []*Member) *RaftCluster {
  79. c := NewCluster(lg, token)
  80. c.cid = id
  81. for _, m := range membs {
  82. c.members[m.ID] = m
  83. }
  84. return c
  85. }
  86. func NewCluster(lg *zap.Logger, token string) *RaftCluster {
  87. return &RaftCluster{
  88. lg: lg,
  89. token: token,
  90. members: make(map[types.ID]*Member),
  91. removed: make(map[types.ID]bool),
  92. }
  93. }
  94. func (c *RaftCluster) ID() types.ID { return c.cid }
  95. func (c *RaftCluster) Members() []*Member {
  96. c.Lock()
  97. defer c.Unlock()
  98. var ms MembersByID
  99. for _, m := range c.members {
  100. ms = append(ms, m.Clone())
  101. }
  102. sort.Sort(ms)
  103. return []*Member(ms)
  104. }
  105. func (c *RaftCluster) Member(id types.ID) *Member {
  106. c.Lock()
  107. defer c.Unlock()
  108. return c.members[id].Clone()
  109. }
  110. // MemberByName returns a Member with the given name if exists.
  111. // If more than one member has the given name, it will panic.
  112. func (c *RaftCluster) MemberByName(name string) *Member {
  113. c.Lock()
  114. defer c.Unlock()
  115. var memb *Member
  116. for _, m := range c.members {
  117. if m.Name == name {
  118. if memb != nil {
  119. if c.lg != nil {
  120. c.lg.Panic("two member with same name found", zap.String("name", name))
  121. } else {
  122. plog.Panicf("two members with the given name %q exist", name)
  123. }
  124. }
  125. memb = m
  126. }
  127. }
  128. return memb.Clone()
  129. }
  130. func (c *RaftCluster) MemberIDs() []types.ID {
  131. c.Lock()
  132. defer c.Unlock()
  133. var ids []types.ID
  134. for _, m := range c.members {
  135. ids = append(ids, m.ID)
  136. }
  137. sort.Sort(types.IDSlice(ids))
  138. return ids
  139. }
  140. func (c *RaftCluster) IsIDRemoved(id types.ID) bool {
  141. c.Lock()
  142. defer c.Unlock()
  143. return c.removed[id]
  144. }
  145. // PeerURLs returns a list of all peer addresses.
  146. // The returned list is sorted in ascending lexicographical order.
  147. func (c *RaftCluster) PeerURLs() []string {
  148. c.Lock()
  149. defer c.Unlock()
  150. urls := make([]string, 0)
  151. for _, p := range c.members {
  152. urls = append(urls, p.PeerURLs...)
  153. }
  154. sort.Strings(urls)
  155. return urls
  156. }
  157. // ClientURLs returns a list of all client addresses.
  158. // The returned list is sorted in ascending lexicographical order.
  159. func (c *RaftCluster) ClientURLs() []string {
  160. c.Lock()
  161. defer c.Unlock()
  162. urls := make([]string, 0)
  163. for _, p := range c.members {
  164. urls = append(urls, p.ClientURLs...)
  165. }
  166. sort.Strings(urls)
  167. return urls
  168. }
  169. func (c *RaftCluster) String() string {
  170. c.Lock()
  171. defer c.Unlock()
  172. b := &bytes.Buffer{}
  173. fmt.Fprintf(b, "{ClusterID:%s ", c.cid)
  174. var ms []string
  175. for _, m := range c.members {
  176. ms = append(ms, fmt.Sprintf("%+v", m))
  177. }
  178. fmt.Fprintf(b, "Members:[%s] ", strings.Join(ms, " "))
  179. var ids []string
  180. for id := range c.removed {
  181. ids = append(ids, id.String())
  182. }
  183. fmt.Fprintf(b, "RemovedMemberIDs:[%s]}", strings.Join(ids, " "))
  184. return b.String()
  185. }
  186. func (c *RaftCluster) genID() {
  187. mIDs := c.MemberIDs()
  188. b := make([]byte, 8*len(mIDs))
  189. for i, id := range mIDs {
  190. binary.BigEndian.PutUint64(b[8*i:], uint64(id))
  191. }
  192. hash := sha1.Sum(b)
  193. c.cid = types.ID(binary.BigEndian.Uint64(hash[:8]))
  194. }
  195. func (c *RaftCluster) SetID(localID, cid types.ID) {
  196. c.localID = localID
  197. c.cid = cid
  198. }
  199. func (c *RaftCluster) SetStore(st v2store.Store) { c.v2store = st }
  200. func (c *RaftCluster) SetBackend(be backend.Backend) {
  201. c.be = be
  202. mustCreateBackendBuckets(c.be)
  203. }
  204. func (c *RaftCluster) Recover(onSet func(*zap.Logger, *semver.Version)) {
  205. c.Lock()
  206. defer c.Unlock()
  207. c.members, c.removed = membersFromStore(c.lg, c.v2store)
  208. c.version = clusterVersionFromStore(c.lg, c.v2store)
  209. mustDetectDowngrade(c.lg, c.version)
  210. onSet(c.lg, c.version)
  211. for _, m := range c.members {
  212. if c.lg != nil {
  213. c.lg.Info(
  214. "recovered/added member from store",
  215. zap.String("cluster-id", c.cid.String()),
  216. zap.String("local-member-id", c.localID.String()),
  217. zap.String("recovered-remote-peer-id", m.ID.String()),
  218. zap.Strings("recovered-remote-peer-urls", m.PeerURLs),
  219. )
  220. } else {
  221. plog.Infof("added member %s %v to cluster %s from store", m.ID, m.PeerURLs, c.cid)
  222. }
  223. }
  224. if c.version != nil {
  225. if c.lg != nil {
  226. c.lg.Info(
  227. "set cluster version from store",
  228. zap.String("cluster-version", version.Cluster(c.version.String())),
  229. )
  230. } else {
  231. plog.Infof("set the cluster version to %v from store", version.Cluster(c.version.String()))
  232. }
  233. }
  234. }
  235. // ValidateConfigurationChange takes a proposed ConfChange and
  236. // ensures that it is still valid.
  237. func (c *RaftCluster) ValidateConfigurationChange(cc raftpb.ConfChange) error {
  238. members, removed := membersFromStore(c.lg, c.v2store)
  239. id := types.ID(cc.NodeID)
  240. if removed[id] {
  241. return ErrIDRemoved
  242. }
  243. switch cc.Type {
  244. case raftpb.ConfChangeAddNode, raftpb.ConfChangeAddLearnerNode:
  245. confChangeContext := new(ConfigChangeContext)
  246. if err := json.Unmarshal(cc.Context, confChangeContext); err != nil {
  247. if c.lg != nil {
  248. c.lg.Panic("failed to unmarshal confChangeContext", zap.Error(err))
  249. } else {
  250. plog.Panicf("unmarshal confChangeContext should never fail: %v", err)
  251. }
  252. }
  253. // A ConfChangeAddNode to a existing learner node promotes it to a voting member.
  254. if confChangeContext.IsPromote {
  255. if members[id] == nil {
  256. return ErrIDNotFound
  257. }
  258. if !members[id].IsLearner {
  259. return ErrMemberNotLearner
  260. }
  261. } else {
  262. // add a learner or a follower case
  263. if members[id] != nil {
  264. return ErrIDExists
  265. }
  266. urls := make(map[string]bool)
  267. for _, m := range members {
  268. for _, u := range m.PeerURLs {
  269. urls[u] = true
  270. }
  271. }
  272. for _, u := range confChangeContext.Member.PeerURLs {
  273. if urls[u] {
  274. return ErrPeerURLexists
  275. }
  276. }
  277. }
  278. case raftpb.ConfChangeRemoveNode:
  279. if members[id] == nil {
  280. return ErrIDNotFound
  281. }
  282. case raftpb.ConfChangeUpdateNode:
  283. if members[id] == nil {
  284. return ErrIDNotFound
  285. }
  286. urls := make(map[string]bool)
  287. for _, m := range members {
  288. if m.ID == id {
  289. continue
  290. }
  291. for _, u := range m.PeerURLs {
  292. urls[u] = true
  293. }
  294. }
  295. m := new(Member)
  296. if err := json.Unmarshal(cc.Context, m); err != nil {
  297. if c.lg != nil {
  298. c.lg.Panic("failed to unmarshal member", zap.Error(err))
  299. } else {
  300. plog.Panicf("unmarshal member should never fail: %v", err)
  301. }
  302. }
  303. for _, u := range m.PeerURLs {
  304. if urls[u] {
  305. return ErrPeerURLexists
  306. }
  307. }
  308. default:
  309. if c.lg != nil {
  310. c.lg.Panic("unknown ConfChange type", zap.String("type", cc.Type.String()))
  311. } else {
  312. plog.Panicf("ConfChange type should be either AddNode, RemoveNode or UpdateNode")
  313. }
  314. }
  315. return nil
  316. }
  317. // AddMember adds a new Member into the cluster, and saves the given member's
  318. // raftAttributes into the store. The given member should have empty attributes.
  319. // A Member with a matching id must not exist.
  320. func (c *RaftCluster) AddMember(m *Member) {
  321. c.Lock()
  322. defer c.Unlock()
  323. if c.v2store != nil {
  324. mustSaveMemberToStore(c.v2store, m)
  325. }
  326. if c.be != nil {
  327. mustSaveMemberToBackend(c.be, m)
  328. }
  329. c.members[m.ID] = m
  330. if c.lg != nil {
  331. c.lg.Info(
  332. "added member",
  333. zap.String("cluster-id", c.cid.String()),
  334. zap.String("local-member-id", c.localID.String()),
  335. zap.String("added-peer-id", m.ID.String()),
  336. zap.Strings("added-peer-peer-urls", m.PeerURLs),
  337. )
  338. } else {
  339. plog.Infof("added member %s %v to cluster %s", m.ID, m.PeerURLs, c.cid)
  340. }
  341. }
  342. // RemoveMember removes a member from the store.
  343. // The given id MUST exist, or the function panics.
  344. func (c *RaftCluster) RemoveMember(id types.ID) {
  345. c.Lock()
  346. defer c.Unlock()
  347. if c.v2store != nil {
  348. mustDeleteMemberFromStore(c.v2store, id)
  349. }
  350. if c.be != nil {
  351. mustDeleteMemberFromBackend(c.be, id)
  352. }
  353. m, ok := c.members[id]
  354. delete(c.members, id)
  355. c.removed[id] = true
  356. if c.lg != nil {
  357. if ok {
  358. c.lg.Info(
  359. "removed member",
  360. zap.String("cluster-id", c.cid.String()),
  361. zap.String("local-member-id", c.localID.String()),
  362. zap.String("removed-remote-peer-id", id.String()),
  363. zap.Strings("removed-remote-peer-urls", m.PeerURLs),
  364. )
  365. } else {
  366. c.lg.Warn(
  367. "skipped removing already removed member",
  368. zap.String("cluster-id", c.cid.String()),
  369. zap.String("local-member-id", c.localID.String()),
  370. zap.String("removed-remote-peer-id", id.String()),
  371. )
  372. }
  373. } else {
  374. plog.Infof("removed member %s from cluster %s", id, c.cid)
  375. }
  376. }
  377. func (c *RaftCluster) UpdateAttributes(id types.ID, attr Attributes) {
  378. c.Lock()
  379. defer c.Unlock()
  380. if m, ok := c.members[id]; ok {
  381. m.Attributes = attr
  382. if c.v2store != nil {
  383. mustUpdateMemberAttrInStore(c.v2store, m)
  384. }
  385. if c.be != nil {
  386. mustSaveMemberToBackend(c.be, m)
  387. }
  388. return
  389. }
  390. _, ok := c.removed[id]
  391. if !ok {
  392. if c.lg != nil {
  393. c.lg.Panic(
  394. "failed to update; member unknown",
  395. zap.String("cluster-id", c.cid.String()),
  396. zap.String("local-member-id", c.localID.String()),
  397. zap.String("unknown-remote-peer-id", id.String()),
  398. )
  399. } else {
  400. plog.Panicf("error updating attributes of unknown member %s", id)
  401. }
  402. }
  403. if c.lg != nil {
  404. c.lg.Warn(
  405. "skipped attributes update of removed member",
  406. zap.String("cluster-id", c.cid.String()),
  407. zap.String("local-member-id", c.localID.String()),
  408. zap.String("updated-peer-id", id.String()),
  409. )
  410. } else {
  411. plog.Warningf("skipped updating attributes of removed member %s", id)
  412. }
  413. }
  414. // PromoteMember marks the member's IsLearner RaftAttributes to false.
  415. func (c *RaftCluster) PromoteMember(id types.ID) {
  416. c.Lock()
  417. defer c.Unlock()
  418. c.members[id].RaftAttributes.IsLearner = false
  419. if c.v2store != nil {
  420. mustUpdateMemberInStore(c.v2store, c.members[id])
  421. }
  422. if c.be != nil {
  423. mustSaveMemberToBackend(c.be, c.members[id])
  424. }
  425. if c.lg != nil {
  426. c.lg.Info(
  427. "promote member",
  428. zap.String("cluster-id", c.cid.String()),
  429. zap.String("local-member-id", c.localID.String()),
  430. )
  431. } else {
  432. plog.Noticef("promote member %s in cluster %s", id, c.cid)
  433. }
  434. }
  435. func (c *RaftCluster) UpdateRaftAttributes(id types.ID, raftAttr RaftAttributes) {
  436. c.Lock()
  437. defer c.Unlock()
  438. c.members[id].RaftAttributes = raftAttr
  439. if c.v2store != nil {
  440. mustUpdateMemberInStore(c.v2store, c.members[id])
  441. }
  442. if c.be != nil {
  443. mustSaveMemberToBackend(c.be, c.members[id])
  444. }
  445. if c.lg != nil {
  446. c.lg.Info(
  447. "updated member",
  448. zap.String("cluster-id", c.cid.String()),
  449. zap.String("local-member-id", c.localID.String()),
  450. zap.String("updated-remote-peer-id", id.String()),
  451. zap.Strings("updated-remote-peer-urls", raftAttr.PeerURLs),
  452. )
  453. } else {
  454. plog.Noticef("updated member %s %v in cluster %s", id, raftAttr.PeerURLs, c.cid)
  455. }
  456. }
  457. func (c *RaftCluster) Version() *semver.Version {
  458. c.Lock()
  459. defer c.Unlock()
  460. if c.version == nil {
  461. return nil
  462. }
  463. return semver.Must(semver.NewVersion(c.version.String()))
  464. }
  465. func (c *RaftCluster) SetVersion(ver *semver.Version, onSet func(*zap.Logger, *semver.Version)) {
  466. c.Lock()
  467. defer c.Unlock()
  468. if c.version != nil {
  469. if c.lg != nil {
  470. c.lg.Info(
  471. "updated cluster version",
  472. zap.String("cluster-id", c.cid.String()),
  473. zap.String("local-member-id", c.localID.String()),
  474. zap.String("from", version.Cluster(c.version.String())),
  475. zap.String("from", version.Cluster(ver.String())),
  476. )
  477. } else {
  478. plog.Noticef("updated the cluster version from %v to %v", version.Cluster(c.version.String()), version.Cluster(ver.String()))
  479. }
  480. } else {
  481. if c.lg != nil {
  482. c.lg.Info(
  483. "set initial cluster version",
  484. zap.String("cluster-id", c.cid.String()),
  485. zap.String("local-member-id", c.localID.String()),
  486. zap.String("cluster-version", version.Cluster(ver.String())),
  487. )
  488. } else {
  489. plog.Noticef("set the initial cluster version to %v", version.Cluster(ver.String()))
  490. }
  491. }
  492. c.version = ver
  493. mustDetectDowngrade(c.lg, c.version)
  494. if c.v2store != nil {
  495. mustSaveClusterVersionToStore(c.v2store, ver)
  496. }
  497. if c.be != nil {
  498. mustSaveClusterVersionToBackend(c.be, ver)
  499. }
  500. ClusterVersionMetrics.With(prometheus.Labels{"cluster_version": ver.String()}).Set(1)
  501. onSet(c.lg, ver)
  502. }
  503. func (c *RaftCluster) IsReadyToAddNewMember() bool {
  504. nmembers := 1
  505. nstarted := 0
  506. for _, member := range c.members {
  507. if member.IsStarted() {
  508. nstarted++
  509. }
  510. nmembers++
  511. }
  512. if nstarted == 1 && nmembers == 2 {
  513. // a case of adding a new node to 1-member cluster for restoring cluster data
  514. // https://github.com/etcd-io/etcd/blob/master/Documentation/v2/admin_guide.md#restoring-the-cluster
  515. if c.lg != nil {
  516. c.lg.Debug("number of started member is 1; can accept add member request")
  517. } else {
  518. plog.Debugf("The number of started member is 1. This cluster can accept add member request.")
  519. }
  520. return true
  521. }
  522. nquorum := nmembers/2 + 1
  523. if nstarted < nquorum {
  524. if c.lg != nil {
  525. c.lg.Warn(
  526. "rejecting member add; started member will be less than quorum",
  527. zap.Int("number-of-started-member", nstarted),
  528. zap.Int("quorum", nquorum),
  529. zap.String("cluster-id", c.cid.String()),
  530. zap.String("local-member-id", c.localID.String()),
  531. )
  532. } else {
  533. plog.Warningf("Reject add member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum)
  534. }
  535. return false
  536. }
  537. return true
  538. }
  539. func (c *RaftCluster) IsReadyToRemoveMember(id uint64) bool {
  540. nmembers := 0
  541. nstarted := 0
  542. for _, member := range c.members {
  543. if uint64(member.ID) == id {
  544. continue
  545. }
  546. if member.IsStarted() {
  547. nstarted++
  548. }
  549. nmembers++
  550. }
  551. nquorum := nmembers/2 + 1
  552. if nstarted < nquorum {
  553. if c.lg != nil {
  554. c.lg.Warn(
  555. "rejecting member remove; started member will be less than quorum",
  556. zap.Int("number-of-started-member", nstarted),
  557. zap.Int("quorum", nquorum),
  558. zap.String("cluster-id", c.cid.String()),
  559. zap.String("local-member-id", c.localID.String()),
  560. )
  561. } else {
  562. plog.Warningf("Reject remove member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum)
  563. }
  564. return false
  565. }
  566. return true
  567. }
  568. func membersFromStore(lg *zap.Logger, st v2store.Store) (map[types.ID]*Member, map[types.ID]bool) {
  569. members := make(map[types.ID]*Member)
  570. removed := make(map[types.ID]bool)
  571. e, err := st.Get(StoreMembersPrefix, true, true)
  572. if err != nil {
  573. if isKeyNotFound(err) {
  574. return members, removed
  575. }
  576. if lg != nil {
  577. lg.Panic("failed to get members from store", zap.String("path", StoreMembersPrefix), zap.Error(err))
  578. } else {
  579. plog.Panicf("get storeMembers should never fail: %v", err)
  580. }
  581. }
  582. for _, n := range e.Node.Nodes {
  583. var m *Member
  584. m, err = nodeToMember(n)
  585. if err != nil {
  586. if lg != nil {
  587. lg.Panic("failed to nodeToMember", zap.Error(err))
  588. } else {
  589. plog.Panicf("nodeToMember should never fail: %v", err)
  590. }
  591. }
  592. members[m.ID] = m
  593. }
  594. e, err = st.Get(storeRemovedMembersPrefix, true, true)
  595. if err != nil {
  596. if isKeyNotFound(err) {
  597. return members, removed
  598. }
  599. if lg != nil {
  600. lg.Panic(
  601. "failed to get removed members from store",
  602. zap.String("path", storeRemovedMembersPrefix),
  603. zap.Error(err),
  604. )
  605. } else {
  606. plog.Panicf("get storeRemovedMembers should never fail: %v", err)
  607. }
  608. }
  609. for _, n := range e.Node.Nodes {
  610. removed[MustParseMemberIDFromKey(n.Key)] = true
  611. }
  612. return members, removed
  613. }
  614. func clusterVersionFromStore(lg *zap.Logger, st v2store.Store) *semver.Version {
  615. e, err := st.Get(path.Join(storePrefix, "version"), false, false)
  616. if err != nil {
  617. if isKeyNotFound(err) {
  618. return nil
  619. }
  620. if lg != nil {
  621. lg.Panic(
  622. "failed to get cluster version from store",
  623. zap.String("path", path.Join(storePrefix, "version")),
  624. zap.Error(err),
  625. )
  626. } else {
  627. plog.Panicf("unexpected error (%v) when getting cluster version from store", err)
  628. }
  629. }
  630. return semver.Must(semver.NewVersion(*e.Node.Value))
  631. }
  632. // ValidateClusterAndAssignIDs validates the local cluster by matching the PeerURLs
  633. // with the existing cluster. If the validation succeeds, it assigns the IDs
  634. // from the existing cluster to the local cluster.
  635. // If the validation fails, an error will be returned.
  636. func ValidateClusterAndAssignIDs(lg *zap.Logger, local *RaftCluster, existing *RaftCluster) error {
  637. ems := existing.Members()
  638. lms := local.Members()
  639. if len(ems) != len(lms) {
  640. return fmt.Errorf("member count is unequal")
  641. }
  642. sort.Sort(MembersByPeerURLs(ems))
  643. sort.Sort(MembersByPeerURLs(lms))
  644. ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
  645. defer cancel()
  646. for i := range ems {
  647. if ok, err := netutil.URLStringsEqual(ctx, lg, ems[i].PeerURLs, lms[i].PeerURLs); !ok {
  648. return fmt.Errorf("unmatched member while checking PeerURLs (%v)", err)
  649. }
  650. lms[i].ID = ems[i].ID
  651. }
  652. local.members = make(map[types.ID]*Member)
  653. for _, m := range lms {
  654. local.members[m.ID] = m
  655. }
  656. return nil
  657. }
  658. func mustDetectDowngrade(lg *zap.Logger, cv *semver.Version) {
  659. lv := semver.Must(semver.NewVersion(version.Version))
  660. // only keep major.minor version for comparison against cluster version
  661. lv = &semver.Version{Major: lv.Major, Minor: lv.Minor}
  662. if cv != nil && lv.LessThan(*cv) {
  663. if lg != nil {
  664. lg.Fatal(
  665. "invalid downgrade; server version is lower than determined cluster version",
  666. zap.String("current-server-version", version.Version),
  667. zap.String("determined-cluster-version", version.Cluster(cv.String())),
  668. )
  669. } else {
  670. plog.Fatalf("cluster cannot be downgraded (current version: %s is lower than determined cluster version: %s).", version.Version, version.Cluster(cv.String()))
  671. }
  672. }
  673. }
  674. // IsLocalMemberLearner returns if the local member is raft learner
  675. func (c *RaftCluster) IsLocalMemberLearner() bool {
  676. c.Lock()
  677. defer c.Unlock()
  678. localMember, ok := c.members[c.localID]
  679. if !ok {
  680. if c.lg != nil {
  681. c.lg.Panic(
  682. "failed to find local ID in cluster members",
  683. zap.String("cluster-id", c.cid.String()),
  684. zap.String("local-member-id", c.localID.String()),
  685. )
  686. } else {
  687. plog.Panicf("failed to find local ID %s in cluster %s", c.localID.String(), c.cid.String())
  688. }
  689. }
  690. return localMember.IsLearner
  691. }
  692. // IsMemberExist returns if the member with the given id exists in cluster.
  693. func (c *RaftCluster) IsMemberExist(id types.ID) bool {
  694. c.Lock()
  695. defer c.Unlock()
  696. _, ok := c.members[id]
  697. return ok
  698. }
  699. // VotingMemberIDs returns the ID of voting members in cluster.
  700. func (c *RaftCluster) VotingMemberIDs() []types.ID {
  701. c.Lock()
  702. defer c.Unlock()
  703. var ids []types.ID
  704. for _, m := range c.members {
  705. if !m.IsLearner {
  706. ids = append(ids, m.ID)
  707. }
  708. }
  709. sort.Sort(types.IDSlice(ids))
  710. return ids
  711. }