cluster_test.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548
  1. /*
  2. Copyright 2014 CoreOS, Inc.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package integration
  14. import (
  15. "fmt"
  16. "io/ioutil"
  17. "log"
  18. "net"
  19. "net/http"
  20. "net/http/httptest"
  21. "os"
  22. "reflect"
  23. "sort"
  24. "strings"
  25. "testing"
  26. "time"
  27. "github.com/coreos/etcd/client"
  28. "github.com/coreos/etcd/etcdserver"
  29. "github.com/coreos/etcd/etcdserver/etcdhttp"
  30. "github.com/coreos/etcd/etcdserver/etcdhttp/httptypes"
  31. "github.com/coreos/etcd/pkg/types"
  32. "github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
  33. )
  34. const (
  35. tickDuration = 10 * time.Millisecond
  36. clusterName = "etcd"
  37. requestTimeout = 2 * time.Second
  38. )
  39. func init() {
  40. // open microsecond-level time log for integration test debugging
  41. log.SetFlags(log.Ltime | log.Lmicroseconds | log.Lshortfile)
  42. }
  43. func TestClusterOf1(t *testing.T) { testCluster(t, 1) }
  44. func TestClusterOf3(t *testing.T) { testCluster(t, 3) }
  45. func testCluster(t *testing.T, size int) {
  46. defer afterTest(t)
  47. c := NewCluster(t, size)
  48. c.Launch(t)
  49. defer c.Terminate(t)
  50. clusterMustProgress(t, c)
  51. }
  52. func TestClusterOf1UsingDiscovery(t *testing.T) { testClusterUsingDiscovery(t, 1) }
  53. func TestClusterOf3UsingDiscovery(t *testing.T) { testClusterUsingDiscovery(t, 3) }
  54. func testClusterUsingDiscovery(t *testing.T, size int) {
  55. defer afterTest(t)
  56. dc := NewCluster(t, 1)
  57. dc.Launch(t)
  58. defer dc.Terminate(t)
  59. // init discovery token space
  60. dcc := mustNewHTTPClient(t, dc.URLs())
  61. dkapi := client.NewKeysAPI(dcc)
  62. ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
  63. if _, err := dkapi.Create(ctx, "/_config/size", fmt.Sprintf("%d", size), -1); err != nil {
  64. t.Fatal(err)
  65. }
  66. cancel()
  67. c := NewClusterByDiscovery(t, size, dc.URL(0)+"/v2/keys")
  68. c.Launch(t)
  69. defer c.Terminate(t)
  70. clusterMustProgress(t, c)
  71. }
  72. func TestDoubleClusterSizeOf1(t *testing.T) { testDoubleClusterSize(t, 1) }
  73. func TestDoubleClusterSizeOf3(t *testing.T) { testDoubleClusterSize(t, 3) }
  74. func testDoubleClusterSize(t *testing.T, size int) {
  75. defer afterTest(t)
  76. c := NewCluster(t, size)
  77. c.Launch(t)
  78. defer c.Terminate(t)
  79. for i := 0; i < size; i++ {
  80. c.AddMember(t)
  81. }
  82. clusterMustProgress(t, c)
  83. }
  84. func TestDecreaseClusterSizeOf3(t *testing.T) { testDecreaseClusterSize(t, 3) }
  85. func TestDecreaseClusterSizeOf5(t *testing.T) { testDecreaseClusterSize(t, 5) }
  86. func testDecreaseClusterSize(t *testing.T, size int) {
  87. defer afterTest(t)
  88. c := NewCluster(t, size)
  89. c.Launch(t)
  90. defer c.Terminate(t)
  91. // TODO: remove the last but one member
  92. for i := 0; i < size-2; i++ {
  93. id := c.Members[len(c.Members)-1].s.ID()
  94. c.RemoveMember(t, uint64(id))
  95. c.waitLeader(t)
  96. }
  97. clusterMustProgress(t, c)
  98. }
  99. // clusterMustProgress ensures that cluster can make progress. It creates
  100. // a key first, and check the new key could be got from all client urls of
  101. // the cluster.
  102. func clusterMustProgress(t *testing.T, cl *cluster) {
  103. cc := mustNewHTTPClient(t, []string{cl.URL(0)})
  104. kapi := client.NewKeysAPI(cc)
  105. ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
  106. resp, err := kapi.Create(ctx, "/foo", "bar", -1)
  107. if err != nil {
  108. t.Fatalf("create on %s error: %v", cl.URL(0), err)
  109. }
  110. cancel()
  111. for i, u := range cl.URLs() {
  112. cc := mustNewHTTPClient(t, []string{u})
  113. kapi := client.NewKeysAPI(cc)
  114. ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
  115. if _, err := kapi.Watch("foo", resp.Node.ModifiedIndex).Next(ctx); err != nil {
  116. t.Fatalf("#%d: watch on %s error: %v", i, u, err)
  117. }
  118. cancel()
  119. }
  120. }
  121. // TODO: support TLS
  122. type cluster struct {
  123. Members []*member
  124. }
  125. func fillClusterForMembers(ms []*member, cName string) error {
  126. addrs := make([]string, 0)
  127. for _, m := range ms {
  128. for _, l := range m.PeerListeners {
  129. addrs = append(addrs, fmt.Sprintf("%s=%s", m.Name, "http://"+l.Addr().String()))
  130. }
  131. }
  132. clusterStr := strings.Join(addrs, ",")
  133. var err error
  134. for _, m := range ms {
  135. m.Cluster, err = etcdserver.NewClusterFromString(cName, clusterStr)
  136. if err != nil {
  137. return err
  138. }
  139. }
  140. return nil
  141. }
  142. // NewCluster returns an unlaunched cluster of the given size which has been
  143. // set to use static bootstrap.
  144. func NewCluster(t *testing.T, size int) *cluster {
  145. c := &cluster{}
  146. ms := make([]*member, size)
  147. for i := 0; i < size; i++ {
  148. ms[i] = mustNewMember(t, c.name(i))
  149. }
  150. c.Members = ms
  151. if err := fillClusterForMembers(c.Members, clusterName); err != nil {
  152. t.Fatal(err)
  153. }
  154. return c
  155. }
  156. // NewClusterUsingDiscovery returns an unlaunched cluster of the given size
  157. // which has been set to use the given url as discovery service to bootstrap.
  158. func NewClusterByDiscovery(t *testing.T, size int, url string) *cluster {
  159. c := &cluster{}
  160. ms := make([]*member, size)
  161. for i := 0; i < size; i++ {
  162. ms[i] = mustNewMember(t, c.name(i))
  163. ms[i].DiscoveryURL = url
  164. }
  165. c.Members = ms
  166. return c
  167. }
  168. func (c *cluster) Launch(t *testing.T) {
  169. errc := make(chan error)
  170. for _, m := range c.Members {
  171. // Members are launched in separate goroutines because if they boot
  172. // using discovery url, they have to wait for others to register to continue.
  173. go func(m *member) {
  174. errc <- m.Launch()
  175. }(m)
  176. }
  177. for _ = range c.Members {
  178. if err := <-errc; err != nil {
  179. t.Fatalf("error setting up member: %v", err)
  180. }
  181. }
  182. // wait cluster to be stable to receive future client requests
  183. c.waitMembersMatch(t, c.HTTPMembers())
  184. }
  185. func (c *cluster) URL(i int) string {
  186. return c.Members[i].ClientURLs[0].String()
  187. }
  188. func (c *cluster) URLs() []string {
  189. urls := make([]string, 0)
  190. for _, m := range c.Members {
  191. for _, u := range m.ClientURLs {
  192. urls = append(urls, u.String())
  193. }
  194. }
  195. return urls
  196. }
  197. func (c *cluster) HTTPMembers() []httptypes.Member {
  198. ms := make([]httptypes.Member, len(c.Members))
  199. for i, m := range c.Members {
  200. ms[i].Name = m.Name
  201. for _, ln := range m.PeerListeners {
  202. ms[i].PeerURLs = append(ms[i].PeerURLs, "http://"+ln.Addr().String())
  203. }
  204. for _, ln := range m.ClientListeners {
  205. ms[i].ClientURLs = append(ms[i].ClientURLs, "http://"+ln.Addr().String())
  206. }
  207. }
  208. return ms
  209. }
  210. func (c *cluster) AddMember(t *testing.T) {
  211. clusterStr := c.Members[0].Cluster.String()
  212. idx := len(c.Members)
  213. m := mustNewMember(t, c.name(idx))
  214. // send add request to the cluster
  215. cc := mustNewHTTPClient(t, []string{c.URL(0)})
  216. ma := client.NewMembersAPI(cc)
  217. ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
  218. peerURL := "http://" + m.PeerListeners[0].Addr().String()
  219. if _, err := ma.Add(ctx, peerURL); err != nil {
  220. t.Fatalf("add member on %s error: %v", c.URL(0), err)
  221. }
  222. cancel()
  223. // wait for the add node entry applied in the cluster
  224. members := append(c.HTTPMembers(), httptypes.Member{PeerURLs: []string{peerURL}, ClientURLs: []string{}})
  225. c.waitMembersMatch(t, members)
  226. for _, ln := range m.PeerListeners {
  227. clusterStr += fmt.Sprintf(",%s=http://%s", m.Name, ln.Addr().String())
  228. }
  229. var err error
  230. m.Cluster, err = etcdserver.NewClusterFromString(clusterName, clusterStr)
  231. if err != nil {
  232. t.Fatal(err)
  233. }
  234. m.NewCluster = false
  235. if err := m.Launch(); err != nil {
  236. t.Fatal(err)
  237. }
  238. c.Members = append(c.Members, m)
  239. // wait cluster to be stable to receive future client requests
  240. c.waitMembersMatch(t, c.HTTPMembers())
  241. }
  242. func (c *cluster) RemoveMember(t *testing.T, id uint64) {
  243. // send remove request to the cluster
  244. cc := mustNewHTTPClient(t, []string{c.URL(0)})
  245. ma := client.NewMembersAPI(cc)
  246. ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
  247. if err := ma.Remove(ctx, types.ID(id).String()); err != nil {
  248. t.Fatalf("unexpected remove error %v", err)
  249. }
  250. cancel()
  251. newMembers := make([]*member, 0)
  252. for _, m := range c.Members {
  253. if uint64(m.s.ID()) != id {
  254. newMembers = append(newMembers, m)
  255. } else {
  256. select {
  257. case <-m.s.StopNotify():
  258. m.Terminate(t)
  259. case <-time.After(time.Second):
  260. t.Fatalf("failed to remove member %s in one second", m.s.ID())
  261. }
  262. }
  263. }
  264. c.Members = newMembers
  265. c.waitMembersMatch(t, c.HTTPMembers())
  266. }
  267. func (c *cluster) Terminate(t *testing.T) {
  268. for _, m := range c.Members {
  269. m.Terminate(t)
  270. }
  271. }
  272. func (c *cluster) waitMembersMatch(t *testing.T, membs []httptypes.Member) {
  273. for _, u := range c.URLs() {
  274. cc := mustNewHTTPClient(t, []string{u})
  275. ma := client.NewMembersAPI(cc)
  276. for {
  277. ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
  278. ms, err := ma.List(ctx)
  279. cancel()
  280. if err == nil && isMembersEqual(ms, membs) {
  281. break
  282. }
  283. time.Sleep(tickDuration)
  284. }
  285. }
  286. return
  287. }
  288. func (c *cluster) waitLeader(t *testing.T) {
  289. possibleLead := make(map[uint64]bool)
  290. var lead uint64
  291. for _, m := range c.Members {
  292. possibleLead[uint64(m.s.ID())] = true
  293. }
  294. for lead == 0 || !possibleLead[lead] {
  295. lead = 0
  296. for _, m := range c.Members {
  297. if lead != 0 && lead != m.s.Lead() {
  298. lead = 0
  299. break
  300. }
  301. lead = m.s.Lead()
  302. }
  303. time.Sleep(10 * tickDuration)
  304. }
  305. }
  306. func (c *cluster) name(i int) string {
  307. return fmt.Sprint("node", i)
  308. }
  309. // isMembersEqual checks whether two members equal except ID field.
  310. // The given wmembs should always set ID field to empty string.
  311. func isMembersEqual(membs []httptypes.Member, wmembs []httptypes.Member) bool {
  312. sort.Sort(SortableMemberSliceByPeerURLs(membs))
  313. sort.Sort(SortableMemberSliceByPeerURLs(wmembs))
  314. for i := range membs {
  315. membs[i].ID = ""
  316. }
  317. return reflect.DeepEqual(membs, wmembs)
  318. }
  319. func newLocalListener(t *testing.T) net.Listener {
  320. l, err := net.Listen("tcp", "127.0.0.1:0")
  321. if err != nil {
  322. t.Fatal(err)
  323. }
  324. return l
  325. }
  326. func newListenerWithAddr(t *testing.T, addr string) net.Listener {
  327. var err error
  328. var l net.Listener
  329. // TODO: we want to reuse a previous closed port immediately.
  330. // a better way is to set SO_REUSExx instead of doing retry.
  331. for i := 0; i < 5; i++ {
  332. l, err = net.Listen("tcp", addr)
  333. if err == nil {
  334. break
  335. }
  336. time.Sleep(500 * time.Millisecond)
  337. }
  338. if err != nil {
  339. t.Fatal(err)
  340. }
  341. return l
  342. }
  343. type member struct {
  344. etcdserver.ServerConfig
  345. PeerListeners, ClientListeners []net.Listener
  346. s *etcdserver.EtcdServer
  347. hss []*httptest.Server
  348. }
  349. func mustNewMember(t *testing.T, name string) *member {
  350. var err error
  351. m := &member{}
  352. pln := newLocalListener(t)
  353. m.PeerListeners = []net.Listener{pln}
  354. m.PeerURLs, err = types.NewURLs([]string{"http://" + pln.Addr().String()})
  355. if err != nil {
  356. t.Fatal(err)
  357. }
  358. cln := newLocalListener(t)
  359. m.ClientListeners = []net.Listener{cln}
  360. m.ClientURLs, err = types.NewURLs([]string{"http://" + cln.Addr().String()})
  361. if err != nil {
  362. t.Fatal(err)
  363. }
  364. m.Name = name
  365. m.DataDir, err = ioutil.TempDir(os.TempDir(), "etcd")
  366. if err != nil {
  367. t.Fatal(err)
  368. }
  369. clusterStr := fmt.Sprintf("%s=http://%s", name, pln.Addr().String())
  370. m.Cluster, err = etcdserver.NewClusterFromString(clusterName, clusterStr)
  371. if err != nil {
  372. t.Fatal(err)
  373. }
  374. m.NewCluster = true
  375. m.Transport = newTransport()
  376. return m
  377. }
  378. // Clone returns a member with the same server configuration. The returned
  379. // member will not set PeerListeners and ClientListeners.
  380. func (m *member) Clone() *member {
  381. mm := &member{}
  382. mm.ServerConfig = m.ServerConfig
  383. var err error
  384. clientURLStrs := m.ClientURLs.StringSlice()
  385. mm.ClientURLs, err = types.NewURLs(clientURLStrs)
  386. if err != nil {
  387. // this should never fail
  388. panic(err)
  389. }
  390. peerURLStrs := m.PeerURLs.StringSlice()
  391. mm.PeerURLs, err = types.NewURLs(peerURLStrs)
  392. if err != nil {
  393. // this should never fail
  394. panic(err)
  395. }
  396. clusterStr := m.Cluster.String()
  397. mm.Cluster, err = etcdserver.NewClusterFromString(clusterName, clusterStr)
  398. if err != nil {
  399. // this should never fail
  400. panic(err)
  401. }
  402. mm.Transport = newTransport()
  403. return mm
  404. }
  405. // Launch starts a member based on ServerConfig, PeerListeners
  406. // and ClientListeners.
  407. func (m *member) Launch() error {
  408. var err error
  409. if m.s, err = etcdserver.NewServer(&m.ServerConfig); err != nil {
  410. return fmt.Errorf("failed to initialize the etcd server: %v", err)
  411. }
  412. m.s.Ticker = time.Tick(tickDuration)
  413. m.s.SyncTicker = time.Tick(500 * time.Millisecond)
  414. m.s.Start()
  415. for _, ln := range m.PeerListeners {
  416. hs := &httptest.Server{
  417. Listener: ln,
  418. Config: &http.Server{Handler: etcdhttp.NewPeerHandler(m.s)},
  419. }
  420. hs.Start()
  421. m.hss = append(m.hss, hs)
  422. }
  423. for _, ln := range m.ClientListeners {
  424. hs := &httptest.Server{
  425. Listener: ln,
  426. Config: &http.Server{Handler: etcdhttp.NewClientHandler(m.s)},
  427. }
  428. hs.Start()
  429. m.hss = append(m.hss, hs)
  430. }
  431. return nil
  432. }
  433. // Stop stops the member, but the data dir of the member is preserved.
  434. func (m *member) Stop(t *testing.T) {
  435. m.s.Stop()
  436. for _, hs := range m.hss {
  437. hs.CloseClientConnections()
  438. hs.Close()
  439. }
  440. m.hss = nil
  441. }
  442. // Start starts the member using the preserved data dir.
  443. func (m *member) Restart(t *testing.T) error {
  444. newPeerListeners := make([]net.Listener, 0)
  445. for _, ln := range m.PeerListeners {
  446. newPeerListeners = append(newPeerListeners, newListenerWithAddr(t, ln.Addr().String()))
  447. }
  448. m.PeerListeners = newPeerListeners
  449. newClientListeners := make([]net.Listener, 0)
  450. for _, ln := range m.ClientListeners {
  451. newClientListeners = append(newClientListeners, newListenerWithAddr(t, ln.Addr().String()))
  452. }
  453. m.ClientListeners = newClientListeners
  454. return m.Launch()
  455. }
  456. // Terminate stops the member and removes the data dir.
  457. func (m *member) Terminate(t *testing.T) {
  458. m.s.Stop()
  459. for _, hs := range m.hss {
  460. hs.CloseClientConnections()
  461. hs.Close()
  462. }
  463. if err := os.RemoveAll(m.ServerConfig.DataDir); err != nil {
  464. t.Fatal(err)
  465. }
  466. }
  467. func mustNewHTTPClient(t *testing.T, eps []string) client.HTTPClient {
  468. cc, err := client.NewHTTPClient(newTransport(), eps)
  469. if err != nil {
  470. t.Fatal(err)
  471. }
  472. return cc
  473. }
  474. func newTransport() *http.Transport {
  475. tr := &http.Transport{}
  476. // TODO: need the support of graceful stop in Sender to remove this
  477. tr.DisableKeepAlives = true
  478. tr.Dial = (&net.Dialer{Timeout: 100 * time.Millisecond}).Dial
  479. return tr
  480. }
  481. type SortableMemberSliceByPeerURLs []httptypes.Member
  482. func (p SortableMemberSliceByPeerURLs) Len() int { return len(p) }
  483. func (p SortableMemberSliceByPeerURLs) Less(i, j int) bool {
  484. return p[i].PeerURLs[0] < p[j].PeerURLs[0]
  485. }
  486. func (p SortableMemberSliceByPeerURLs) Swap(i, j int) { p[i], p[j] = p[j], p[i] }