cluster.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. /*
  2. Copyright 2014 CoreOS, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package etcdserver
  14. import (
  15. "crypto/sha1"
  16. "encoding/binary"
  17. "encoding/json"
  18. "fmt"
  19. "log"
  20. "net/url"
  21. "path"
  22. "reflect"
  23. "sort"
  24. "strings"
  25. "sync"
  26. etcdErr "github.com/coreos/etcd/error"
  27. "github.com/coreos/etcd/pkg/flags"
  28. "github.com/coreos/etcd/pkg/types"
  29. "github.com/coreos/etcd/raft/raftpb"
  30. "github.com/coreos/etcd/store"
  31. )
  32. const (
  33. raftAttributesSuffix = "raftAttributes"
  34. attributesSuffix = "attributes"
  35. )
  36. type ClusterInfo interface {
  37. // ID returns the cluster ID
  38. ID() types.ID
  39. // ClientURLs returns an aggregate set of all URLs on which this
  40. // cluster is listening for client requests
  41. ClientURLs() []string
  42. // Members returns a slice of members sorted by their ID
  43. Members() []*Member
  44. // Member retrieves a particular member based on ID, or nil if the
  45. // member does not exist in the cluster
  46. Member(id types.ID) *Member
  47. // IsIDRemoved checks whether the given ID has been removed from this
  48. // cluster at some point in the past
  49. IsIDRemoved(id types.ID) bool
  50. }
  51. // Cluster is a list of Members that belong to the same raft cluster
  52. type Cluster struct {
  53. id types.ID
  54. token string
  55. members map[types.ID]*Member
  56. // removed contains the ids of removed members in the cluster.
  57. // removed id cannot be reused.
  58. removed map[types.ID]bool
  59. store store.Store
  60. sync.Mutex
  61. }
  62. // NewClusterFromString returns a Cluster instantiated from the given cluster token
  63. // and cluster string, by parsing members from a set of discovery-formatted
  64. // names-to-IPs, like:
  65. // mach0=http://1.1.1.1,mach0=http://2.2.2.2,mach1=http://3.3.3.3,mach2=http://4.4.4.4
  66. func NewClusterFromString(token string, cluster string) (*Cluster, error) {
  67. c := newCluster(token)
  68. v, err := url.ParseQuery(strings.Replace(cluster, ",", "&", -1))
  69. if err != nil {
  70. return nil, err
  71. }
  72. for name, urls := range v {
  73. if len(urls) == 0 || urls[0] == "" {
  74. return nil, fmt.Errorf("Empty URL given for %q", name)
  75. }
  76. purls := &flags.URLsValue{}
  77. if err := purls.Set(strings.Join(urls, ",")); err != nil {
  78. return nil, err
  79. }
  80. m := NewMember(name, types.URLs(*purls), c.token, nil)
  81. if _, ok := c.members[m.ID]; ok {
  82. return nil, fmt.Errorf("Member exists with identical ID %v", m)
  83. }
  84. c.members[m.ID] = m
  85. }
  86. c.genID()
  87. return c, nil
  88. }
  89. func NewClusterFromStore(token string, st store.Store) *Cluster {
  90. c := newCluster(token)
  91. c.store = st
  92. c.members, c.removed = membersFromStore(c.store)
  93. return c
  94. }
  95. func NewClusterFromMembers(token string, id types.ID, membs []*Member) *Cluster {
  96. c := newCluster(token)
  97. c.id = id
  98. for _, m := range membs {
  99. c.members[m.ID] = m
  100. }
  101. return c
  102. }
  103. func newCluster(token string) *Cluster {
  104. return &Cluster{
  105. token: token,
  106. members: make(map[types.ID]*Member),
  107. removed: make(map[types.ID]bool),
  108. }
  109. }
  110. func (c *Cluster) ID() types.ID { return c.id }
  111. func (c *Cluster) Members() []*Member {
  112. c.Lock()
  113. defer c.Unlock()
  114. var sms SortableMemberSlice
  115. for _, m := range c.members {
  116. sms = append(sms, m.Clone())
  117. }
  118. sort.Sort(sms)
  119. return []*Member(sms)
  120. }
  121. type SortableMemberSlice []*Member
  122. func (s SortableMemberSlice) Len() int { return len(s) }
  123. func (s SortableMemberSlice) Less(i, j int) bool { return s[i].ID < s[j].ID }
  124. func (s SortableMemberSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
  125. func (c *Cluster) Member(id types.ID) *Member {
  126. c.Lock()
  127. defer c.Unlock()
  128. return c.members[id].Clone()
  129. }
  130. // MemberByName returns a Member with the given name if exists.
  131. // If more than one member has the given name, it will panic.
  132. func (c *Cluster) MemberByName(name string) *Member {
  133. c.Lock()
  134. defer c.Unlock()
  135. var memb *Member
  136. for _, m := range c.members {
  137. if m.Name == name {
  138. if memb != nil {
  139. log.Panicf("two members with the given name %q exist", name)
  140. }
  141. memb = m
  142. }
  143. }
  144. return memb.Clone()
  145. }
  146. func (c *Cluster) MemberIDs() []types.ID {
  147. c.Lock()
  148. defer c.Unlock()
  149. var ids []types.ID
  150. for _, m := range c.members {
  151. ids = append(ids, m.ID)
  152. }
  153. sort.Sort(types.IDSlice(ids))
  154. return ids
  155. }
  156. func (c *Cluster) IsIDRemoved(id types.ID) bool {
  157. c.Lock()
  158. defer c.Unlock()
  159. return c.removed[id]
  160. }
  161. // PeerURLs returns a list of all peer addresses. Each address is prefixed
  162. // with the scheme (currently "http://"). The returned list is sorted in
  163. // ascending lexicographical order.
  164. func (c *Cluster) PeerURLs() []string {
  165. c.Lock()
  166. defer c.Unlock()
  167. endpoints := make([]string, 0)
  168. for _, p := range c.members {
  169. for _, addr := range p.PeerURLs {
  170. endpoints = append(endpoints, addr)
  171. }
  172. }
  173. sort.Strings(endpoints)
  174. return endpoints
  175. }
  176. // ClientURLs returns a list of all client addresses. Each address is prefixed
  177. // with the scheme (currently "http://"). The returned list is sorted in
  178. // ascending lexicographical order.
  179. func (c *Cluster) ClientURLs() []string {
  180. c.Lock()
  181. defer c.Unlock()
  182. urls := make([]string, 0)
  183. for _, p := range c.members {
  184. for _, url := range p.ClientURLs {
  185. urls = append(urls, url)
  186. }
  187. }
  188. sort.Strings(urls)
  189. return urls
  190. }
  191. func (c *Cluster) String() string {
  192. c.Lock()
  193. defer c.Unlock()
  194. sl := []string{}
  195. for _, m := range c.members {
  196. for _, u := range m.PeerURLs {
  197. sl = append(sl, fmt.Sprintf("%s=%s", m.Name, u))
  198. }
  199. }
  200. sort.Strings(sl)
  201. return strings.Join(sl, ",")
  202. }
  203. // ValidateAndAssignIDs validates the given members by matching their PeerURLs
  204. // with the existing members in the cluster. If the validation succeeds, it
  205. // assigns the IDs from the given members to the existing members in the
  206. // cluster. If the validation fails, an error will be returned.
  207. func (c *Cluster) ValidateAndAssignIDs(membs []*Member) error {
  208. if len(c.members) != len(membs) {
  209. return fmt.Errorf("member count is unequal")
  210. }
  211. omembs := make([]*Member, 0)
  212. for _, m := range c.members {
  213. omembs = append(omembs, m)
  214. }
  215. sort.Sort(SortableMemberSliceByPeerURLs(omembs))
  216. sort.Sort(SortableMemberSliceByPeerURLs(membs))
  217. for i := range omembs {
  218. if !reflect.DeepEqual(omembs[i].PeerURLs, membs[i].PeerURLs) {
  219. return fmt.Errorf("unmatched member while checking PeerURLs")
  220. }
  221. omembs[i].ID = membs[i].ID
  222. }
  223. c.members = make(map[types.ID]*Member)
  224. for _, m := range omembs {
  225. c.members[m.ID] = m
  226. }
  227. return nil
  228. }
  229. func (c *Cluster) genID() {
  230. mIDs := c.MemberIDs()
  231. b := make([]byte, 8*len(mIDs))
  232. for i, id := range mIDs {
  233. binary.BigEndian.PutUint64(b[8*i:], uint64(id))
  234. }
  235. hash := sha1.Sum(b)
  236. c.id = types.ID(binary.BigEndian.Uint64(hash[:8]))
  237. }
  238. func (c *Cluster) SetID(id types.ID) { c.id = id }
  239. func (c *Cluster) SetStore(st store.Store) { c.store = st }
  240. // ValidateConfigurationChange takes a proposed ConfChange and
  241. // ensures that it is still valid.
  242. func (c *Cluster) ValidateConfigurationChange(cc raftpb.ConfChange) error {
  243. members, removed := membersFromStore(c.store)
  244. id := types.ID(cc.NodeID)
  245. if removed[id] {
  246. return ErrIDRemoved
  247. }
  248. switch cc.Type {
  249. case raftpb.ConfChangeAddNode:
  250. if members[id] != nil {
  251. return ErrIDExists
  252. }
  253. urls := make(map[string]bool)
  254. for _, m := range members {
  255. for _, u := range m.PeerURLs {
  256. urls[u] = true
  257. }
  258. }
  259. m := new(Member)
  260. if err := json.Unmarshal(cc.Context, m); err != nil {
  261. log.Panicf("unmarshal member should never fail: %v", err)
  262. }
  263. for _, u := range m.PeerURLs {
  264. if urls[u] {
  265. return ErrPeerURLexists
  266. }
  267. }
  268. case raftpb.ConfChangeRemoveNode:
  269. if members[id] == nil {
  270. return ErrIDNotFound
  271. }
  272. case raftpb.ConfChangeUpdateNode:
  273. if members[id] == nil {
  274. return ErrIDNotFound
  275. }
  276. urls := make(map[string]bool)
  277. for _, m := range members {
  278. if m.ID == id {
  279. continue
  280. }
  281. for _, u := range m.PeerURLs {
  282. urls[u] = true
  283. }
  284. }
  285. m := new(Member)
  286. if err := json.Unmarshal(cc.Context, m); err != nil {
  287. log.Panicf("unmarshal member should never fail: %v", err)
  288. }
  289. for _, u := range m.PeerURLs {
  290. if urls[u] {
  291. return ErrPeerURLexists
  292. }
  293. }
  294. default:
  295. log.Panicf("ConfChange type should be either AddNode, RemoveNode or UpdateNode")
  296. }
  297. return nil
  298. }
  299. // AddMember adds a new Member into the cluster, and saves the given member's
  300. // raftAttributes into the store. The given member should have empty attributes.
  301. // A Member with a matching id must not exist.
  302. func (c *Cluster) AddMember(m *Member) {
  303. c.Lock()
  304. defer c.Unlock()
  305. b, err := json.Marshal(m.RaftAttributes)
  306. if err != nil {
  307. log.Panicf("marshal raftAttributes should never fail: %v", err)
  308. }
  309. p := path.Join(memberStoreKey(m.ID), raftAttributesSuffix)
  310. if _, err := c.store.Create(p, false, string(b), false, store.Permanent); err != nil {
  311. log.Panicf("create raftAttributes should never fail: %v", err)
  312. }
  313. c.members[m.ID] = m
  314. }
  315. // RemoveMember removes a member from the store.
  316. // The given id MUST exist, or the function panics.
  317. func (c *Cluster) RemoveMember(id types.ID) {
  318. c.Lock()
  319. defer c.Unlock()
  320. if _, err := c.store.Delete(memberStoreKey(id), true, true); err != nil {
  321. log.Panicf("delete member should never fail: %v", err)
  322. }
  323. delete(c.members, id)
  324. if _, err := c.store.Create(removedMemberStoreKey(id), false, "", false, store.Permanent); err != nil {
  325. log.Panicf("create removedMember should never fail: %v", err)
  326. }
  327. c.removed[id] = true
  328. }
  329. func (c *Cluster) UpdateMemberAttributes(id types.ID, attr Attributes) {
  330. c.Lock()
  331. defer c.Unlock()
  332. c.members[id].Attributes = attr
  333. }
  334. func (c *Cluster) UpdateMember(nm *Member) {
  335. c.Lock()
  336. defer c.Unlock()
  337. b, err := json.Marshal(nm.RaftAttributes)
  338. if err != nil {
  339. log.Panicf("marshal raftAttributes should never fail: %v", err)
  340. }
  341. p := path.Join(memberStoreKey(nm.ID), raftAttributesSuffix)
  342. if _, err := c.store.Update(p, string(b), store.Permanent); err != nil {
  343. log.Panicf("update raftAttributes should never fail: %v", err)
  344. }
  345. c.members[nm.ID].RaftAttributes = nm.RaftAttributes
  346. }
  347. // nodeToMember builds member through a store node.
  348. // the child nodes of the given node should be sorted by key.
  349. func nodeToMember(n *store.NodeExtern) (*Member, error) {
  350. m := &Member{ID: mustParseMemberIDFromKey(n.Key)}
  351. attrs := make(map[string][]byte)
  352. raftAttrKey := path.Join(n.Key, raftAttributesSuffix)
  353. attrKey := path.Join(n.Key, attributesSuffix)
  354. for _, nn := range n.Nodes {
  355. if nn.Key != raftAttrKey && nn.Key != attrKey {
  356. return nil, fmt.Errorf("unknown key %q", nn.Key)
  357. }
  358. attrs[nn.Key] = []byte(*nn.Value)
  359. }
  360. if data := attrs[raftAttrKey]; data != nil {
  361. if err := json.Unmarshal(data, &m.RaftAttributes); err != nil {
  362. return nil, fmt.Errorf("unmarshal raftAttributes error: %v", err)
  363. }
  364. } else {
  365. return nil, fmt.Errorf("raftAttributes key doesn't exist")
  366. }
  367. if data := attrs[attrKey]; data != nil {
  368. if err := json.Unmarshal(data, &m.Attributes); err != nil {
  369. return m, fmt.Errorf("unmarshal attributes error: %v", err)
  370. }
  371. }
  372. return m, nil
  373. }
  374. func membersFromStore(st store.Store) (map[types.ID]*Member, map[types.ID]bool) {
  375. members := make(map[types.ID]*Member)
  376. removed := make(map[types.ID]bool)
  377. e, err := st.Get(storeMembersPrefix, true, true)
  378. if err != nil {
  379. if isKeyNotFound(err) {
  380. return members, removed
  381. }
  382. log.Panicf("get storeMembers should never fail: %v", err)
  383. }
  384. for _, n := range e.Node.Nodes {
  385. m, err := nodeToMember(n)
  386. if err != nil {
  387. log.Panicf("nodeToMember should never fail: %v", err)
  388. }
  389. members[m.ID] = m
  390. }
  391. e, err = st.Get(storeRemovedMembersPrefix, true, true)
  392. if err != nil {
  393. if isKeyNotFound(err) {
  394. return members, removed
  395. }
  396. log.Panicf("get storeRemovedMembers should never fail: %v", err)
  397. }
  398. for _, n := range e.Node.Nodes {
  399. removed[mustParseMemberIDFromKey(n.Key)] = true
  400. }
  401. return members, removed
  402. }
  403. func isKeyNotFound(err error) bool {
  404. e, ok := err.(*etcdErr.Error)
  405. return ok && e.ErrorCode == etcdErr.EcodeKeyNotFound
  406. }