cluster.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. /*
  2. Copyright 2014 CoreOS, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package etcdserver
  14. import (
  15. "crypto/sha1"
  16. "encoding/binary"
  17. "encoding/json"
  18. "fmt"
  19. "log"
  20. "net/url"
  21. "path"
  22. "reflect"
  23. "sort"
  24. "strings"
  25. "sync"
  26. etcdErr "github.com/coreos/etcd/error"
  27. "github.com/coreos/etcd/pkg/flags"
  28. "github.com/coreos/etcd/pkg/types"
  29. "github.com/coreos/etcd/raft/raftpb"
  30. "github.com/coreos/etcd/store"
  31. )
  32. const (
  33. raftAttributesSuffix = "raftAttributes"
  34. attributesSuffix = "attributes"
  35. )
  36. type ClusterInfo interface {
  37. // ID returns the cluster ID
  38. ID() types.ID
  39. // ClientURLs returns an aggregate set of all URLs on which this
  40. // cluster is listening for client requests
  41. ClientURLs() []string
  42. // Members returns a slice of members sorted by their ID
  43. Members() []*Member
  44. // Member retrieves a particular member based on ID, or nil if the
  45. // member does not exist in the cluster
  46. Member(id types.ID) *Member
  47. // IsIDRemoved checks whether the given ID has been removed from this
  48. // cluster at some point in the past
  49. IsIDRemoved(id types.ID) bool
  50. }
  51. // Cluster is a list of Members that belong to the same raft cluster
  52. type Cluster struct {
  53. id types.ID
  54. token string
  55. members map[types.ID]*Member
  56. // removed contains the ids of removed members in the cluster.
  57. // removed id cannot be reused.
  58. removed map[types.ID]bool
  59. store store.Store
  60. sync.Mutex
  61. }
  62. // NewClusterFromString returns a Cluster instantiated from the given cluster token
  63. // and cluster string, by parsing members from a set of discovery-formatted
  64. // names-to-IPs, like:
  65. // mach0=http://1.1.1.1,mach0=http://2.2.2.2,mach1=http://3.3.3.3,mach2=http://4.4.4.4
  66. func NewClusterFromString(token string, cluster string) (*Cluster, error) {
  67. c := newCluster(token)
  68. v, err := url.ParseQuery(strings.Replace(cluster, ",", "&", -1))
  69. if err != nil {
  70. return nil, err
  71. }
  72. for name, urls := range v {
  73. if len(urls) == 0 || urls[0] == "" {
  74. return nil, fmt.Errorf("Empty URL given for %q", name)
  75. }
  76. purls := &flags.URLsValue{}
  77. if err := purls.Set(strings.Join(urls, ",")); err != nil {
  78. return nil, err
  79. }
  80. m := NewMember(name, types.URLs(*purls), c.token, nil)
  81. if _, ok := c.members[m.ID]; ok {
  82. return nil, fmt.Errorf("Member exists with identical ID %v", m)
  83. }
  84. c.members[m.ID] = m
  85. }
  86. c.genID()
  87. return c, nil
  88. }
  89. func NewClusterFromStore(token string, st store.Store) *Cluster {
  90. c := newCluster(token)
  91. c.store = st
  92. c.members, c.removed = membersFromStore(c.store)
  93. return c
  94. }
  95. func NewClusterFromMembers(token string, id types.ID, membs []*Member) *Cluster {
  96. c := newCluster(token)
  97. c.id = id
  98. for _, m := range membs {
  99. c.members[m.ID] = m
  100. }
  101. return c
  102. }
  103. func newCluster(token string) *Cluster {
  104. return &Cluster{
  105. token: token,
  106. members: make(map[types.ID]*Member),
  107. removed: make(map[types.ID]bool),
  108. }
  109. }
  110. func (c *Cluster) ID() types.ID { return c.id }
  111. func (c *Cluster) Members() []*Member {
  112. c.Lock()
  113. defer c.Unlock()
  114. var sms SortableMemberSlice
  115. for _, m := range c.members {
  116. sms = append(sms, m.Clone())
  117. }
  118. sort.Sort(sms)
  119. return []*Member(sms)
  120. }
  121. func (c *Cluster) Member(id types.ID) *Member {
  122. c.Lock()
  123. defer c.Unlock()
  124. return c.members[id].Clone()
  125. }
  126. // MemberByName returns a Member with the given name if exists.
  127. // If more than one member has the given name, it will panic.
  128. func (c *Cluster) MemberByName(name string) *Member {
  129. c.Lock()
  130. defer c.Unlock()
  131. var memb *Member
  132. for _, m := range c.members {
  133. if m.Name == name {
  134. if memb != nil {
  135. log.Panicf("two members with the given name %q exist", name)
  136. }
  137. memb = m
  138. }
  139. }
  140. return memb.Clone()
  141. }
  142. func (c *Cluster) MemberIDs() []types.ID {
  143. c.Lock()
  144. defer c.Unlock()
  145. var ids []types.ID
  146. for _, m := range c.members {
  147. ids = append(ids, m.ID)
  148. }
  149. sort.Sort(types.IDSlice(ids))
  150. return ids
  151. }
  152. func (c *Cluster) IsIDRemoved(id types.ID) bool {
  153. c.Lock()
  154. defer c.Unlock()
  155. return c.removed[id]
  156. }
  157. // PeerURLs returns a list of all peer addresses. Each address is prefixed
  158. // with the scheme (currently "http://"). The returned list is sorted in
  159. // ascending lexicographical order.
  160. func (c *Cluster) PeerURLs() []string {
  161. c.Lock()
  162. defer c.Unlock()
  163. endpoints := make([]string, 0)
  164. for _, p := range c.members {
  165. for _, addr := range p.PeerURLs {
  166. endpoints = append(endpoints, addr)
  167. }
  168. }
  169. sort.Strings(endpoints)
  170. return endpoints
  171. }
  172. // ClientURLs returns a list of all client addresses. Each address is prefixed
  173. // with the scheme (currently "http://"). The returned list is sorted in
  174. // ascending lexicographical order.
  175. func (c *Cluster) ClientURLs() []string {
  176. c.Lock()
  177. defer c.Unlock()
  178. urls := make([]string, 0)
  179. for _, p := range c.members {
  180. for _, url := range p.ClientURLs {
  181. urls = append(urls, url)
  182. }
  183. }
  184. sort.Strings(urls)
  185. return urls
  186. }
  187. func (c *Cluster) String() string {
  188. c.Lock()
  189. defer c.Unlock()
  190. sl := []string{}
  191. for _, m := range c.members {
  192. for _, u := range m.PeerURLs {
  193. sl = append(sl, fmt.Sprintf("%s=%s", m.Name, u))
  194. }
  195. }
  196. sort.Strings(sl)
  197. return strings.Join(sl, ",")
  198. }
  199. func (c *Cluster) genID() {
  200. mIDs := c.MemberIDs()
  201. b := make([]byte, 8*len(mIDs))
  202. for i, id := range mIDs {
  203. binary.BigEndian.PutUint64(b[8*i:], uint64(id))
  204. }
  205. hash := sha1.Sum(b)
  206. c.id = types.ID(binary.BigEndian.Uint64(hash[:8]))
  207. }
  208. func (c *Cluster) SetID(id types.ID) { c.id = id }
  209. func (c *Cluster) SetStore(st store.Store) { c.store = st }
  210. func (c *Cluster) Recover() {
  211. c.members, c.removed = membersFromStore(c.store)
  212. }
  213. // ValidateConfigurationChange takes a proposed ConfChange and
  214. // ensures that it is still valid.
  215. func (c *Cluster) ValidateConfigurationChange(cc raftpb.ConfChange) error {
  216. members, removed := membersFromStore(c.store)
  217. id := types.ID(cc.NodeID)
  218. if removed[id] {
  219. return ErrIDRemoved
  220. }
  221. switch cc.Type {
  222. case raftpb.ConfChangeAddNode:
  223. if members[id] != nil {
  224. return ErrIDExists
  225. }
  226. urls := make(map[string]bool)
  227. for _, m := range members {
  228. for _, u := range m.PeerURLs {
  229. urls[u] = true
  230. }
  231. }
  232. m := new(Member)
  233. if err := json.Unmarshal(cc.Context, m); err != nil {
  234. log.Panicf("unmarshal member should never fail: %v", err)
  235. }
  236. for _, u := range m.PeerURLs {
  237. if urls[u] {
  238. return ErrPeerURLexists
  239. }
  240. }
  241. case raftpb.ConfChangeRemoveNode:
  242. if members[id] == nil {
  243. return ErrIDNotFound
  244. }
  245. case raftpb.ConfChangeUpdateNode:
  246. if members[id] == nil {
  247. return ErrIDNotFound
  248. }
  249. urls := make(map[string]bool)
  250. for _, m := range members {
  251. if m.ID == id {
  252. continue
  253. }
  254. for _, u := range m.PeerURLs {
  255. urls[u] = true
  256. }
  257. }
  258. m := new(Member)
  259. if err := json.Unmarshal(cc.Context, m); err != nil {
  260. log.Panicf("unmarshal member should never fail: %v", err)
  261. }
  262. for _, u := range m.PeerURLs {
  263. if urls[u] {
  264. return ErrPeerURLexists
  265. }
  266. }
  267. default:
  268. log.Panicf("ConfChange type should be either AddNode, RemoveNode or UpdateNode")
  269. }
  270. return nil
  271. }
  272. // AddMember adds a new Member into the cluster, and saves the given member's
  273. // raftAttributes into the store. The given member should have empty attributes.
  274. // A Member with a matching id must not exist.
  275. func (c *Cluster) AddMember(m *Member) {
  276. c.Lock()
  277. defer c.Unlock()
  278. b, err := json.Marshal(m.RaftAttributes)
  279. if err != nil {
  280. log.Panicf("marshal raftAttributes should never fail: %v", err)
  281. }
  282. p := path.Join(memberStoreKey(m.ID), raftAttributesSuffix)
  283. if _, err := c.store.Create(p, false, string(b), false, store.Permanent); err != nil {
  284. log.Panicf("create raftAttributes should never fail: %v", err)
  285. }
  286. c.members[m.ID] = m
  287. }
  288. // RemoveMember removes a member from the store.
  289. // The given id MUST exist, or the function panics.
  290. func (c *Cluster) RemoveMember(id types.ID) {
  291. c.Lock()
  292. defer c.Unlock()
  293. if _, err := c.store.Delete(memberStoreKey(id), true, true); err != nil {
  294. log.Panicf("delete member should never fail: %v", err)
  295. }
  296. delete(c.members, id)
  297. if _, err := c.store.Create(removedMemberStoreKey(id), false, "", false, store.Permanent); err != nil {
  298. log.Panicf("create removedMember should never fail: %v", err)
  299. }
  300. c.removed[id] = true
  301. }
  302. func (c *Cluster) UpdateMemberAttributes(id types.ID, attr Attributes) {
  303. c.Lock()
  304. defer c.Unlock()
  305. c.members[id].Attributes = attr
  306. }
  307. func (c *Cluster) UpdateMember(nm *Member) {
  308. c.Lock()
  309. defer c.Unlock()
  310. b, err := json.Marshal(nm.RaftAttributes)
  311. if err != nil {
  312. log.Panicf("marshal raftAttributes should never fail: %v", err)
  313. }
  314. p := path.Join(memberStoreKey(nm.ID), raftAttributesSuffix)
  315. if _, err := c.store.Update(p, string(b), store.Permanent); err != nil {
  316. log.Panicf("update raftAttributes should never fail: %v", err)
  317. }
  318. c.members[nm.ID].RaftAttributes = nm.RaftAttributes
  319. }
  320. // nodeToMember builds member through a store node.
  321. // the child nodes of the given node should be sorted by key.
  322. func nodeToMember(n *store.NodeExtern) (*Member, error) {
  323. m := &Member{ID: mustParseMemberIDFromKey(n.Key)}
  324. attrs := make(map[string][]byte)
  325. raftAttrKey := path.Join(n.Key, raftAttributesSuffix)
  326. attrKey := path.Join(n.Key, attributesSuffix)
  327. for _, nn := range n.Nodes {
  328. if nn.Key != raftAttrKey && nn.Key != attrKey {
  329. return nil, fmt.Errorf("unknown key %q", nn.Key)
  330. }
  331. attrs[nn.Key] = []byte(*nn.Value)
  332. }
  333. if data := attrs[raftAttrKey]; data != nil {
  334. if err := json.Unmarshal(data, &m.RaftAttributes); err != nil {
  335. return nil, fmt.Errorf("unmarshal raftAttributes error: %v", err)
  336. }
  337. } else {
  338. return nil, fmt.Errorf("raftAttributes key doesn't exist")
  339. }
  340. if data := attrs[attrKey]; data != nil {
  341. if err := json.Unmarshal(data, &m.Attributes); err != nil {
  342. return m, fmt.Errorf("unmarshal attributes error: %v", err)
  343. }
  344. }
  345. return m, nil
  346. }
  347. func membersFromStore(st store.Store) (map[types.ID]*Member, map[types.ID]bool) {
  348. members := make(map[types.ID]*Member)
  349. removed := make(map[types.ID]bool)
  350. e, err := st.Get(storeMembersPrefix, true, true)
  351. if err != nil {
  352. if isKeyNotFound(err) {
  353. return members, removed
  354. }
  355. log.Panicf("get storeMembers should never fail: %v", err)
  356. }
  357. for _, n := range e.Node.Nodes {
  358. m, err := nodeToMember(n)
  359. if err != nil {
  360. log.Panicf("nodeToMember should never fail: %v", err)
  361. }
  362. members[m.ID] = m
  363. }
  364. e, err = st.Get(storeRemovedMembersPrefix, true, true)
  365. if err != nil {
  366. if isKeyNotFound(err) {
  367. return members, removed
  368. }
  369. log.Panicf("get storeRemovedMembers should never fail: %v", err)
  370. }
  371. for _, n := range e.Node.Nodes {
  372. removed[mustParseMemberIDFromKey(n.Key)] = true
  373. }
  374. return members, removed
  375. }
  376. // ValidateClusterAndAssignIDs validates the local cluster by matching the PeerURLs
  377. // with the existing cluster. If the validation succeeds, it assigns the IDs
  378. // from the existing cluster to the local cluster.
  379. // If the validation fails, an error will be returned.
  380. func ValidateClusterAndAssignIDs(local *Cluster, existing *Cluster) error {
  381. ems := existing.Members()
  382. lms := local.Members()
  383. if len(ems) != len(lms) {
  384. return fmt.Errorf("member count is unequal")
  385. }
  386. sort.Sort(SortableMemberSliceByPeerURLs(ems))
  387. sort.Sort(SortableMemberSliceByPeerURLs(lms))
  388. for i := range ems {
  389. if !reflect.DeepEqual(ems[i].PeerURLs, lms[i].PeerURLs) {
  390. return fmt.Errorf("unmatched member while checking PeerURLs")
  391. }
  392. lms[i].ID = ems[i].ID
  393. }
  394. local.members = make(map[types.ID]*Member)
  395. for _, m := range lms {
  396. local.members[m.ID] = m
  397. }
  398. return nil
  399. }
  400. func isKeyNotFound(err error) bool {
  401. e, ok := err.(*etcdErr.Error)
  402. return ok && e.ErrorCode == etcdErr.EcodeKeyNotFound
  403. }