cluster.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package membership
  15. import (
  16. "bytes"
  17. "context"
  18. "crypto/sha1"
  19. "encoding/binary"
  20. "encoding/json"
  21. "fmt"
  22. "path"
  23. "sort"
  24. "strings"
  25. "sync"
  26. "time"
  27. "go.etcd.io/etcd/v3/etcdserver/api/v2store"
  28. "go.etcd.io/etcd/v3/mvcc/backend"
  29. "go.etcd.io/etcd/v3/pkg/netutil"
  30. "go.etcd.io/etcd/v3/pkg/types"
  31. "go.etcd.io/etcd/v3/raft"
  32. "go.etcd.io/etcd/v3/raft/raftpb"
  33. "go.etcd.io/etcd/v3/version"
  34. "github.com/coreos/go-semver/semver"
  35. "github.com/prometheus/client_golang/prometheus"
  36. "go.uber.org/zap"
  37. )
  38. // RaftCluster is a list of Members that belong to the same raft cluster
  39. type RaftCluster struct {
  40. lg *zap.Logger
  41. localID types.ID
  42. cid types.ID
  43. token string
  44. v2store v2store.Store
  45. be backend.Backend
  46. sync.Mutex // guards the fields below
  47. version *semver.Version
  48. members map[types.ID]*Member
  49. // removed contains the ids of removed members in the cluster.
  50. // removed id cannot be reused.
  51. removed map[types.ID]bool
  52. }
  53. // ConfigChangeContext represents a context for confChange.
  54. type ConfigChangeContext struct {
  55. Member
  56. IsPromote bool `json:"isPromote"`
  57. }
  58. // NewClusterFromURLsMap creates a new raft cluster using provided urls map. Currently, it does not support creating
  59. // cluster with raft learner member.
  60. func NewClusterFromURLsMap(lg *zap.Logger, token string, urlsmap types.URLsMap) (*RaftCluster, error) {
  61. c := NewCluster(lg, token)
  62. for name, urls := range urlsmap {
  63. m := NewMember(name, urls, token, nil)
  64. if _, ok := c.members[m.ID]; ok {
  65. return nil, fmt.Errorf("member exists with identical ID %v", m)
  66. }
  67. if uint64(m.ID) == raft.None {
  68. return nil, fmt.Errorf("cannot use %x as member id", raft.None)
  69. }
  70. c.members[m.ID] = m
  71. }
  72. c.genID()
  73. return c, nil
  74. }
  75. func NewClusterFromMembers(lg *zap.Logger, token string, id types.ID, membs []*Member) *RaftCluster {
  76. c := NewCluster(lg, token)
  77. c.cid = id
  78. for _, m := range membs {
  79. c.members[m.ID] = m
  80. }
  81. return c
  82. }
  83. func NewCluster(lg *zap.Logger, token string) *RaftCluster {
  84. return &RaftCluster{
  85. lg: lg,
  86. token: token,
  87. members: make(map[types.ID]*Member),
  88. removed: make(map[types.ID]bool),
  89. }
  90. }
  91. func (c *RaftCluster) ID() types.ID { return c.cid }
  92. func (c *RaftCluster) Members() []*Member {
  93. c.Lock()
  94. defer c.Unlock()
  95. var ms MembersByID
  96. for _, m := range c.members {
  97. ms = append(ms, m.Clone())
  98. }
  99. sort.Sort(ms)
  100. return []*Member(ms)
  101. }
  102. func (c *RaftCluster) Member(id types.ID) *Member {
  103. c.Lock()
  104. defer c.Unlock()
  105. return c.members[id].Clone()
  106. }
  107. // MemberByName returns a Member with the given name if exists.
  108. // If more than one member has the given name, it will panic.
  109. func (c *RaftCluster) MemberByName(name string) *Member {
  110. c.Lock()
  111. defer c.Unlock()
  112. var memb *Member
  113. for _, m := range c.members {
  114. if m.Name == name {
  115. if memb != nil {
  116. if c.lg != nil {
  117. c.lg.Panic("two member with same name found", zap.String("name", name))
  118. } else {
  119. plog.Panicf("two members with the given name %q exist", name)
  120. }
  121. }
  122. memb = m
  123. }
  124. }
  125. return memb.Clone()
  126. }
  127. func (c *RaftCluster) MemberIDs() []types.ID {
  128. c.Lock()
  129. defer c.Unlock()
  130. var ids []types.ID
  131. for _, m := range c.members {
  132. ids = append(ids, m.ID)
  133. }
  134. sort.Sort(types.IDSlice(ids))
  135. return ids
  136. }
  137. func (c *RaftCluster) IsIDRemoved(id types.ID) bool {
  138. c.Lock()
  139. defer c.Unlock()
  140. return c.removed[id]
  141. }
  142. // PeerURLs returns a list of all peer addresses.
  143. // The returned list is sorted in ascending lexicographical order.
  144. func (c *RaftCluster) PeerURLs() []string {
  145. c.Lock()
  146. defer c.Unlock()
  147. urls := make([]string, 0)
  148. for _, p := range c.members {
  149. urls = append(urls, p.PeerURLs...)
  150. }
  151. sort.Strings(urls)
  152. return urls
  153. }
  154. // ClientURLs returns a list of all client addresses.
  155. // The returned list is sorted in ascending lexicographical order.
  156. func (c *RaftCluster) ClientURLs() []string {
  157. c.Lock()
  158. defer c.Unlock()
  159. urls := make([]string, 0)
  160. for _, p := range c.members {
  161. urls = append(urls, p.ClientURLs...)
  162. }
  163. sort.Strings(urls)
  164. return urls
  165. }
  166. func (c *RaftCluster) String() string {
  167. c.Lock()
  168. defer c.Unlock()
  169. b := &bytes.Buffer{}
  170. fmt.Fprintf(b, "{ClusterID:%s ", c.cid)
  171. var ms []string
  172. for _, m := range c.members {
  173. ms = append(ms, fmt.Sprintf("%+v", m))
  174. }
  175. fmt.Fprintf(b, "Members:[%s] ", strings.Join(ms, " "))
  176. var ids []string
  177. for id := range c.removed {
  178. ids = append(ids, id.String())
  179. }
  180. fmt.Fprintf(b, "RemovedMemberIDs:[%s]}", strings.Join(ids, " "))
  181. return b.String()
  182. }
  183. func (c *RaftCluster) genID() {
  184. mIDs := c.MemberIDs()
  185. b := make([]byte, 8*len(mIDs))
  186. for i, id := range mIDs {
  187. binary.BigEndian.PutUint64(b[8*i:], uint64(id))
  188. }
  189. hash := sha1.Sum(b)
  190. c.cid = types.ID(binary.BigEndian.Uint64(hash[:8]))
  191. }
  192. func (c *RaftCluster) SetID(localID, cid types.ID) {
  193. c.localID = localID
  194. c.cid = cid
  195. }
  196. func (c *RaftCluster) SetStore(st v2store.Store) { c.v2store = st }
  197. func (c *RaftCluster) SetBackend(be backend.Backend) {
  198. c.be = be
  199. mustCreateBackendBuckets(c.be)
  200. }
  201. func (c *RaftCluster) Recover(onSet func(*zap.Logger, *semver.Version)) {
  202. c.Lock()
  203. defer c.Unlock()
  204. c.members, c.removed = membersFromStore(c.lg, c.v2store)
  205. c.version = clusterVersionFromStore(c.lg, c.v2store)
  206. mustDetectDowngrade(c.lg, c.version)
  207. onSet(c.lg, c.version)
  208. for _, m := range c.members {
  209. if c.lg != nil {
  210. c.lg.Info(
  211. "recovered/added member from store",
  212. zap.String("cluster-id", c.cid.String()),
  213. zap.String("local-member-id", c.localID.String()),
  214. zap.String("recovered-remote-peer-id", m.ID.String()),
  215. zap.Strings("recovered-remote-peer-urls", m.PeerURLs),
  216. )
  217. } else {
  218. plog.Infof("added member %s %v to cluster %s from store", m.ID, m.PeerURLs, c.cid)
  219. }
  220. }
  221. if c.version != nil {
  222. if c.lg != nil {
  223. c.lg.Info(
  224. "set cluster version from store",
  225. zap.String("cluster-version", version.Cluster(c.version.String())),
  226. )
  227. } else {
  228. plog.Infof("set the cluster version to %v from store", version.Cluster(c.version.String()))
  229. }
  230. }
  231. }
  232. // ValidateConfigurationChange takes a proposed ConfChange and
  233. // ensures that it is still valid.
  234. func (c *RaftCluster) ValidateConfigurationChange(cc raftpb.ConfChange) error {
  235. members, removed := membersFromStore(c.lg, c.v2store)
  236. id := types.ID(cc.NodeID)
  237. if removed[id] {
  238. return ErrIDRemoved
  239. }
  240. switch cc.Type {
  241. case raftpb.ConfChangeAddNode, raftpb.ConfChangeAddLearnerNode:
  242. urls := make(map[string]bool)
  243. for _, m := range members {
  244. for _, u := range m.PeerURLs {
  245. urls[u] = true
  246. }
  247. }
  248. confChangeContext := new(ConfigChangeContext)
  249. if err := json.Unmarshal(cc.Context, confChangeContext); err != nil {
  250. if c.lg != nil {
  251. c.lg.Panic("failed to unmarshal confChangeContext", zap.Error(err))
  252. } else {
  253. plog.Panicf("unmarshal confChangeContext should never fail: %v", err)
  254. }
  255. }
  256. // A ConfChangeAddNode to a existing learner node promotes it to a voting member.
  257. if confChangeContext.IsPromote {
  258. if members[id] == nil {
  259. return ErrIDNotFound
  260. }
  261. if !members[id].IsLearner {
  262. return ErrMemberNotLearner
  263. }
  264. } else {
  265. // add a learner or a follower case
  266. if members[id] != nil {
  267. return ErrIDExists
  268. }
  269. for _, u := range confChangeContext.PeerURLs {
  270. if urls[u] {
  271. return ErrPeerURLexists
  272. }
  273. }
  274. }
  275. case raftpb.ConfChangeRemoveNode:
  276. if members[id] == nil {
  277. return ErrIDNotFound
  278. }
  279. case raftpb.ConfChangeUpdateNode:
  280. if members[id] == nil {
  281. return ErrIDNotFound
  282. }
  283. urls := make(map[string]bool)
  284. for _, m := range members {
  285. if m.ID == id {
  286. continue
  287. }
  288. for _, u := range m.PeerURLs {
  289. urls[u] = true
  290. }
  291. }
  292. m := new(Member)
  293. if err := json.Unmarshal(cc.Context, m); err != nil {
  294. if c.lg != nil {
  295. c.lg.Panic("failed to unmarshal member", zap.Error(err))
  296. } else {
  297. plog.Panicf("unmarshal member should never fail: %v", err)
  298. }
  299. }
  300. for _, u := range m.PeerURLs {
  301. if urls[u] {
  302. return ErrPeerURLexists
  303. }
  304. }
  305. default:
  306. if c.lg != nil {
  307. c.lg.Panic("unknown ConfChange type", zap.String("type", cc.Type.String()))
  308. } else {
  309. plog.Panicf("ConfChange type should be either AddNode, RemoveNode or UpdateNode")
  310. }
  311. }
  312. return nil
  313. }
  314. // AddMember adds a new Member into the cluster, and saves the given member's
  315. // raftAttributes into the store. The given member should have empty attributes.
  316. // A Member with a matching id must not exist.
  317. func (c *RaftCluster) AddMember(m *Member) {
  318. c.Lock()
  319. defer c.Unlock()
  320. if c.v2store != nil {
  321. mustSaveMemberToStore(c.v2store, m)
  322. }
  323. if c.be != nil {
  324. mustSaveMemberToBackend(c.be, m)
  325. }
  326. c.members[m.ID] = m
  327. if c.lg != nil {
  328. c.lg.Info(
  329. "added member",
  330. zap.String("cluster-id", c.cid.String()),
  331. zap.String("local-member-id", c.localID.String()),
  332. zap.String("added-peer-id", m.ID.String()),
  333. zap.Strings("added-peer-peer-urls", m.PeerURLs),
  334. )
  335. } else {
  336. plog.Infof("added member %s %v to cluster %s", m.ID, m.PeerURLs, c.cid)
  337. }
  338. }
  339. // RemoveMember removes a member from the store.
  340. // The given id MUST exist, or the function panics.
  341. func (c *RaftCluster) RemoveMember(id types.ID) {
  342. c.Lock()
  343. defer c.Unlock()
  344. if c.v2store != nil {
  345. mustDeleteMemberFromStore(c.v2store, id)
  346. }
  347. if c.be != nil {
  348. mustDeleteMemberFromBackend(c.be, id)
  349. }
  350. m, ok := c.members[id]
  351. delete(c.members, id)
  352. c.removed[id] = true
  353. if c.lg != nil {
  354. if ok {
  355. c.lg.Info(
  356. "removed member",
  357. zap.String("cluster-id", c.cid.String()),
  358. zap.String("local-member-id", c.localID.String()),
  359. zap.String("removed-remote-peer-id", id.String()),
  360. zap.Strings("removed-remote-peer-urls", m.PeerURLs),
  361. )
  362. } else {
  363. c.lg.Warn(
  364. "skipped removing already removed member",
  365. zap.String("cluster-id", c.cid.String()),
  366. zap.String("local-member-id", c.localID.String()),
  367. zap.String("removed-remote-peer-id", id.String()),
  368. )
  369. }
  370. } else {
  371. plog.Infof("removed member %s from cluster %s", id, c.cid)
  372. }
  373. }
  374. func (c *RaftCluster) UpdateAttributes(id types.ID, attr Attributes) {
  375. c.Lock()
  376. defer c.Unlock()
  377. if m, ok := c.members[id]; ok {
  378. m.Attributes = attr
  379. if c.v2store != nil {
  380. mustUpdateMemberAttrInStore(c.v2store, m)
  381. }
  382. if c.be != nil {
  383. mustSaveMemberToBackend(c.be, m)
  384. }
  385. return
  386. }
  387. _, ok := c.removed[id]
  388. if !ok {
  389. if c.lg != nil {
  390. c.lg.Panic(
  391. "failed to update; member unknown",
  392. zap.String("cluster-id", c.cid.String()),
  393. zap.String("local-member-id", c.localID.String()),
  394. zap.String("unknown-remote-peer-id", id.String()),
  395. )
  396. } else {
  397. plog.Panicf("error updating attributes of unknown member %s", id)
  398. }
  399. }
  400. if c.lg != nil {
  401. c.lg.Warn(
  402. "skipped attributes update of removed member",
  403. zap.String("cluster-id", c.cid.String()),
  404. zap.String("local-member-id", c.localID.String()),
  405. zap.String("updated-peer-id", id.String()),
  406. )
  407. } else {
  408. plog.Warningf("skipped updating attributes of removed member %s", id)
  409. }
  410. }
  411. // PromoteMember marks the member's IsLearner RaftAttributes to false.
  412. func (c *RaftCluster) PromoteMember(id types.ID) {
  413. c.Lock()
  414. defer c.Unlock()
  415. c.members[id].RaftAttributes.IsLearner = false
  416. if c.v2store != nil {
  417. mustUpdateMemberInStore(c.v2store, c.members[id])
  418. }
  419. if c.be != nil {
  420. mustSaveMemberToBackend(c.be, c.members[id])
  421. }
  422. if c.lg != nil {
  423. c.lg.Info(
  424. "promote member",
  425. zap.String("cluster-id", c.cid.String()),
  426. zap.String("local-member-id", c.localID.String()),
  427. )
  428. } else {
  429. plog.Noticef("promote member %s in cluster %s", id, c.cid)
  430. }
  431. }
  432. func (c *RaftCluster) UpdateRaftAttributes(id types.ID, raftAttr RaftAttributes) {
  433. c.Lock()
  434. defer c.Unlock()
  435. c.members[id].RaftAttributes = raftAttr
  436. if c.v2store != nil {
  437. mustUpdateMemberInStore(c.v2store, c.members[id])
  438. }
  439. if c.be != nil {
  440. mustSaveMemberToBackend(c.be, c.members[id])
  441. }
  442. if c.lg != nil {
  443. c.lg.Info(
  444. "updated member",
  445. zap.String("cluster-id", c.cid.String()),
  446. zap.String("local-member-id", c.localID.String()),
  447. zap.String("updated-remote-peer-id", id.String()),
  448. zap.Strings("updated-remote-peer-urls", raftAttr.PeerURLs),
  449. )
  450. } else {
  451. plog.Noticef("updated member %s %v in cluster %s", id, raftAttr.PeerURLs, c.cid)
  452. }
  453. }
  454. func (c *RaftCluster) Version() *semver.Version {
  455. c.Lock()
  456. defer c.Unlock()
  457. if c.version == nil {
  458. return nil
  459. }
  460. return semver.Must(semver.NewVersion(c.version.String()))
  461. }
  462. func (c *RaftCluster) SetVersion(ver *semver.Version, onSet func(*zap.Logger, *semver.Version)) {
  463. c.Lock()
  464. defer c.Unlock()
  465. if c.version != nil {
  466. if c.lg != nil {
  467. c.lg.Info(
  468. "updated cluster version",
  469. zap.String("cluster-id", c.cid.String()),
  470. zap.String("local-member-id", c.localID.String()),
  471. zap.String("from", version.Cluster(c.version.String())),
  472. zap.String("from", version.Cluster(ver.String())),
  473. )
  474. } else {
  475. plog.Noticef("updated the cluster version from %v to %v", version.Cluster(c.version.String()), version.Cluster(ver.String()))
  476. }
  477. } else {
  478. if c.lg != nil {
  479. c.lg.Info(
  480. "set initial cluster version",
  481. zap.String("cluster-id", c.cid.String()),
  482. zap.String("local-member-id", c.localID.String()),
  483. zap.String("cluster-version", version.Cluster(ver.String())),
  484. )
  485. } else {
  486. plog.Noticef("set the initial cluster version to %v", version.Cluster(ver.String()))
  487. }
  488. }
  489. c.version = ver
  490. mustDetectDowngrade(c.lg, c.version)
  491. if c.v2store != nil {
  492. mustSaveClusterVersionToStore(c.v2store, ver)
  493. }
  494. if c.be != nil {
  495. mustSaveClusterVersionToBackend(c.be, ver)
  496. }
  497. ClusterVersionMetrics.With(prometheus.Labels{"cluster_version": ver.String()}).Set(1)
  498. onSet(c.lg, ver)
  499. }
  500. func (c *RaftCluster) IsReadyToAddNewMember() bool {
  501. nmembers := 1
  502. nstarted := 0
  503. for _, member := range c.members {
  504. if member.IsStarted() {
  505. nstarted++
  506. }
  507. nmembers++
  508. }
  509. if nstarted == 1 && nmembers == 2 {
  510. // a case of adding a new node to 1-member cluster for restoring cluster data
  511. // https://github.com/etcd-io/etcd/blob/master/Documentation/v2/admin_guide.md#restoring-the-cluster
  512. if c.lg != nil {
  513. c.lg.Debug("number of started member is 1; can accept add member request")
  514. } else {
  515. plog.Debugf("The number of started member is 1. This cluster can accept add member request.")
  516. }
  517. return true
  518. }
  519. nquorum := nmembers/2 + 1
  520. if nstarted < nquorum {
  521. if c.lg != nil {
  522. c.lg.Warn(
  523. "rejecting member add; started member will be less than quorum",
  524. zap.Int("number-of-started-member", nstarted),
  525. zap.Int("quorum", nquorum),
  526. zap.String("cluster-id", c.cid.String()),
  527. zap.String("local-member-id", c.localID.String()),
  528. )
  529. } else {
  530. plog.Warningf("Reject add member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum)
  531. }
  532. return false
  533. }
  534. return true
  535. }
  536. func (c *RaftCluster) IsReadyToRemoveMember(id uint64) bool {
  537. nmembers := 0
  538. nstarted := 0
  539. for _, member := range c.members {
  540. if uint64(member.ID) == id {
  541. continue
  542. }
  543. if member.IsStarted() {
  544. nstarted++
  545. }
  546. nmembers++
  547. }
  548. nquorum := nmembers/2 + 1
  549. if nstarted < nquorum {
  550. if c.lg != nil {
  551. c.lg.Warn(
  552. "rejecting member remove; started member will be less than quorum",
  553. zap.Int("number-of-started-member", nstarted),
  554. zap.Int("quorum", nquorum),
  555. zap.String("cluster-id", c.cid.String()),
  556. zap.String("local-member-id", c.localID.String()),
  557. )
  558. } else {
  559. plog.Warningf("Reject remove member request: the number of started member (%d) will be less than the quorum number of the cluster (%d)", nstarted, nquorum)
  560. }
  561. return false
  562. }
  563. return true
  564. }
  565. func membersFromStore(lg *zap.Logger, st v2store.Store) (map[types.ID]*Member, map[types.ID]bool) {
  566. members := make(map[types.ID]*Member)
  567. removed := make(map[types.ID]bool)
  568. e, err := st.Get(StoreMembersPrefix, true, true)
  569. if err != nil {
  570. if isKeyNotFound(err) {
  571. return members, removed
  572. }
  573. if lg != nil {
  574. lg.Panic("failed to get members from store", zap.String("path", StoreMembersPrefix), zap.Error(err))
  575. } else {
  576. plog.Panicf("get storeMembers should never fail: %v", err)
  577. }
  578. }
  579. for _, n := range e.Node.Nodes {
  580. var m *Member
  581. m, err = nodeToMember(n)
  582. if err != nil {
  583. if lg != nil {
  584. lg.Panic("failed to nodeToMember", zap.Error(err))
  585. } else {
  586. plog.Panicf("nodeToMember should never fail: %v", err)
  587. }
  588. }
  589. members[m.ID] = m
  590. }
  591. e, err = st.Get(storeRemovedMembersPrefix, true, true)
  592. if err != nil {
  593. if isKeyNotFound(err) {
  594. return members, removed
  595. }
  596. if lg != nil {
  597. lg.Panic(
  598. "failed to get removed members from store",
  599. zap.String("path", storeRemovedMembersPrefix),
  600. zap.Error(err),
  601. )
  602. } else {
  603. plog.Panicf("get storeRemovedMembers should never fail: %v", err)
  604. }
  605. }
  606. for _, n := range e.Node.Nodes {
  607. removed[MustParseMemberIDFromKey(n.Key)] = true
  608. }
  609. return members, removed
  610. }
  611. func clusterVersionFromStore(lg *zap.Logger, st v2store.Store) *semver.Version {
  612. e, err := st.Get(path.Join(storePrefix, "version"), false, false)
  613. if err != nil {
  614. if isKeyNotFound(err) {
  615. return nil
  616. }
  617. if lg != nil {
  618. lg.Panic(
  619. "failed to get cluster version from store",
  620. zap.String("path", path.Join(storePrefix, "version")),
  621. zap.Error(err),
  622. )
  623. } else {
  624. plog.Panicf("unexpected error (%v) when getting cluster version from store", err)
  625. }
  626. }
  627. return semver.Must(semver.NewVersion(*e.Node.Value))
  628. }
  629. // ValidateClusterAndAssignIDs validates the local cluster by matching the PeerURLs
  630. // with the existing cluster. If the validation succeeds, it assigns the IDs
  631. // from the existing cluster to the local cluster.
  632. // If the validation fails, an error will be returned.
  633. func ValidateClusterAndAssignIDs(lg *zap.Logger, local *RaftCluster, existing *RaftCluster) error {
  634. ems := existing.Members()
  635. lms := local.Members()
  636. if len(ems) != len(lms) {
  637. return fmt.Errorf("member count is unequal")
  638. }
  639. sort.Sort(MembersByPeerURLs(ems))
  640. sort.Sort(MembersByPeerURLs(lms))
  641. ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
  642. defer cancel()
  643. for i := range ems {
  644. if ok, err := netutil.URLStringsEqual(ctx, lg, ems[i].PeerURLs, lms[i].PeerURLs); !ok {
  645. return fmt.Errorf("unmatched member while checking PeerURLs (%v)", err)
  646. }
  647. lms[i].ID = ems[i].ID
  648. }
  649. local.members = make(map[types.ID]*Member)
  650. for _, m := range lms {
  651. local.members[m.ID] = m
  652. }
  653. return nil
  654. }
  655. func mustDetectDowngrade(lg *zap.Logger, cv *semver.Version) {
  656. lv := semver.Must(semver.NewVersion(version.Version))
  657. // only keep major.minor version for comparison against cluster version
  658. lv = &semver.Version{Major: lv.Major, Minor: lv.Minor}
  659. if cv != nil && lv.LessThan(*cv) {
  660. if lg != nil {
  661. lg.Fatal(
  662. "invalid downgrade; server version is lower than determined cluster version",
  663. zap.String("current-server-version", version.Version),
  664. zap.String("determined-cluster-version", version.Cluster(cv.String())),
  665. )
  666. } else {
  667. plog.Fatalf("cluster cannot be downgraded (current version: %s is lower than determined cluster version: %s).", version.Version, version.Cluster(cv.String()))
  668. }
  669. }
  670. }
  671. // IsLearner returns if the local member is raft learner
  672. func (c *RaftCluster) IsLearner() bool {
  673. c.Lock()
  674. defer c.Unlock()
  675. localMember, ok := c.members[c.localID]
  676. if !ok {
  677. if c.lg != nil {
  678. c.lg.Panic(
  679. "failed to find local ID in cluster members",
  680. zap.String("cluster-id", c.cid.String()),
  681. zap.String("local-member-id", c.localID.String()),
  682. )
  683. } else {
  684. plog.Panicf("failed to find local ID %s in cluster %s", c.localID.String(), c.cid.String())
  685. }
  686. }
  687. return localMember.IsLearner
  688. }
  689. // IsMemberExist returns if the member with the given id exists in cluster.
  690. func (c *RaftCluster) IsMemberExist(id types.ID) bool {
  691. c.Lock()
  692. defer c.Unlock()
  693. _, ok := c.members[id]
  694. return ok
  695. }
  696. // VotingMemberIDs returns the ID of voting members in cluster.
  697. func (c *RaftCluster) VotingMemberIDs() []types.ID {
  698. c.Lock()
  699. defer c.Unlock()
  700. var ids []types.ID
  701. for _, m := range c.members {
  702. if !m.IsLearner {
  703. ids = append(ids, m.ID)
  704. }
  705. }
  706. sort.Sort(types.IDSlice(ids))
  707. return ids
  708. }