cluster_test.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package integration
  15. import (
  16. "fmt"
  17. "log"
  18. "math/rand"
  19. "os"
  20. "strconv"
  21. "testing"
  22. "time"
  23. "github.com/coreos/etcd/client"
  24. "github.com/coreos/etcd/pkg/testutil"
  25. "golang.org/x/net/context"
  26. )
  27. func init() {
  28. // open microsecond-level time log for integration test debugging
  29. log.SetFlags(log.Ltime | log.Lmicroseconds | log.Lshortfile)
  30. if t := os.Getenv("ETCD_ELECTION_TIMEOUT_TICKS"); t != "" {
  31. if i, err := strconv.ParseInt(t, 10, 64); err == nil {
  32. electionTicks = int(i)
  33. }
  34. }
  35. }
  36. func TestClusterOf1(t *testing.T) { testCluster(t, 1) }
  37. func TestClusterOf3(t *testing.T) { testCluster(t, 3) }
  38. func testCluster(t *testing.T, size int) {
  39. defer testutil.AfterTest(t)
  40. c := NewCluster(t, size)
  41. c.Launch(t)
  42. defer c.Terminate(t)
  43. clusterMustProgress(t, c.Members)
  44. }
  45. func TestTLSClusterOf3(t *testing.T) {
  46. defer testutil.AfterTest(t)
  47. c := NewClusterByConfig(t, &ClusterConfig{Size: 3, PeerTLS: &testTLSInfo})
  48. c.Launch(t)
  49. defer c.Terminate(t)
  50. clusterMustProgress(t, c.Members)
  51. }
  52. func TestClusterOf1UsingDiscovery(t *testing.T) { testClusterUsingDiscovery(t, 1) }
  53. func TestClusterOf3UsingDiscovery(t *testing.T) { testClusterUsingDiscovery(t, 3) }
  54. func testClusterUsingDiscovery(t *testing.T, size int) {
  55. defer testutil.AfterTest(t)
  56. dc := NewCluster(t, 1)
  57. dc.Launch(t)
  58. defer dc.Terminate(t)
  59. // init discovery token space
  60. dcc := MustNewHTTPClient(t, dc.URLs(), nil)
  61. dkapi := client.NewKeysAPI(dcc)
  62. ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
  63. if _, err := dkapi.Create(ctx, "/_config/size", fmt.Sprintf("%d", size)); err != nil {
  64. t.Fatal(err)
  65. }
  66. cancel()
  67. c := NewClusterByConfig(
  68. t,
  69. &ClusterConfig{Size: size, DiscoveryURL: dc.URL(0) + "/v2/keys"},
  70. )
  71. c.Launch(t)
  72. defer c.Terminate(t)
  73. clusterMustProgress(t, c.Members)
  74. }
  75. func TestTLSClusterOf3UsingDiscovery(t *testing.T) {
  76. defer testutil.AfterTest(t)
  77. dc := NewCluster(t, 1)
  78. dc.Launch(t)
  79. defer dc.Terminate(t)
  80. // init discovery token space
  81. dcc := MustNewHTTPClient(t, dc.URLs(), nil)
  82. dkapi := client.NewKeysAPI(dcc)
  83. ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
  84. if _, err := dkapi.Create(ctx, "/_config/size", fmt.Sprintf("%d", 3)); err != nil {
  85. t.Fatal(err)
  86. }
  87. cancel()
  88. c := NewClusterByConfig(t,
  89. &ClusterConfig{
  90. Size: 3,
  91. PeerTLS: &testTLSInfo,
  92. DiscoveryURL: dc.URL(0) + "/v2/keys"},
  93. )
  94. c.Launch(t)
  95. defer c.Terminate(t)
  96. clusterMustProgress(t, c.Members)
  97. }
  98. func TestDoubleClusterSizeOf1(t *testing.T) { testDoubleClusterSize(t, 1) }
  99. func TestDoubleClusterSizeOf3(t *testing.T) { testDoubleClusterSize(t, 3) }
  100. func testDoubleClusterSize(t *testing.T, size int) {
  101. defer testutil.AfterTest(t)
  102. c := NewCluster(t, size)
  103. c.Launch(t)
  104. defer c.Terminate(t)
  105. for i := 0; i < size; i++ {
  106. c.AddMember(t)
  107. }
  108. clusterMustProgress(t, c.Members)
  109. }
  110. func TestDoubleTLSClusterSizeOf3(t *testing.T) {
  111. defer testutil.AfterTest(t)
  112. c := NewClusterByConfig(t, &ClusterConfig{Size: 3, PeerTLS: &testTLSInfo})
  113. c.Launch(t)
  114. defer c.Terminate(t)
  115. for i := 0; i < 3; i++ {
  116. c.AddMember(t)
  117. }
  118. clusterMustProgress(t, c.Members)
  119. }
  120. func TestDecreaseClusterSizeOf3(t *testing.T) { testDecreaseClusterSize(t, 3) }
  121. func TestDecreaseClusterSizeOf5(t *testing.T) { testDecreaseClusterSize(t, 5) }
  122. func testDecreaseClusterSize(t *testing.T, size int) {
  123. defer testutil.AfterTest(t)
  124. c := NewCluster(t, size)
  125. c.Launch(t)
  126. defer c.Terminate(t)
  127. // TODO: remove the last but one member
  128. for i := 0; i < size-1; i++ {
  129. id := c.Members[len(c.Members)-1].s.ID()
  130. c.RemoveMember(t, uint64(id))
  131. c.waitLeader(t, c.Members)
  132. }
  133. clusterMustProgress(t, c.Members)
  134. }
  135. func TestForceNewCluster(t *testing.T) {
  136. c := NewCluster(t, 3)
  137. c.Launch(t)
  138. cc := MustNewHTTPClient(t, []string{c.Members[0].URL()}, nil)
  139. kapi := client.NewKeysAPI(cc)
  140. ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
  141. resp, err := kapi.Create(ctx, "/foo", "bar")
  142. if err != nil {
  143. t.Fatalf("unexpected create error: %v", err)
  144. }
  145. cancel()
  146. // ensure create has been applied in this machine
  147. ctx, cancel = context.WithTimeout(context.Background(), requestTimeout)
  148. if _, err = kapi.Watcher("/foo", &client.WatcherOptions{AfterIndex: resp.Node.ModifiedIndex - 1}).Next(ctx); err != nil {
  149. t.Fatalf("unexpected watch error: %v", err)
  150. }
  151. cancel()
  152. c.Members[0].Stop(t)
  153. c.Members[1].Terminate(t)
  154. c.Members[2].Terminate(t)
  155. c.Members[0].ForceNewCluster = true
  156. err = c.Members[0].Restart(t)
  157. if err != nil {
  158. t.Fatalf("unexpected ForceRestart error: %v", err)
  159. }
  160. defer c.Members[0].Terminate(t)
  161. c.waitLeader(t, c.Members[:1])
  162. // use new http client to init new connection
  163. cc = MustNewHTTPClient(t, []string{c.Members[0].URL()}, nil)
  164. kapi = client.NewKeysAPI(cc)
  165. // ensure force restart keep the old data, and new cluster can make progress
  166. ctx, cancel = context.WithTimeout(context.Background(), requestTimeout)
  167. if _, err := kapi.Watcher("/foo", &client.WatcherOptions{AfterIndex: resp.Node.ModifiedIndex - 1}).Next(ctx); err != nil {
  168. t.Fatalf("unexpected watch error: %v", err)
  169. }
  170. cancel()
  171. clusterMustProgress(t, c.Members[:1])
  172. }
  173. func TestAddMemberAfterClusterFullRotation(t *testing.T) {
  174. defer testutil.AfterTest(t)
  175. c := NewCluster(t, 3)
  176. c.Launch(t)
  177. defer c.Terminate(t)
  178. // remove all the previous three members and add in three new members.
  179. for i := 0; i < 3; i++ {
  180. c.RemoveMember(t, uint64(c.Members[0].s.ID()))
  181. c.waitLeader(t, c.Members)
  182. c.AddMember(t)
  183. c.waitLeader(t, c.Members)
  184. }
  185. c.AddMember(t)
  186. c.waitLeader(t, c.Members)
  187. clusterMustProgress(t, c.Members)
  188. }
  189. // Ensure we can remove a member then add a new one back immediately.
  190. func TestIssue2681(t *testing.T) {
  191. defer testutil.AfterTest(t)
  192. c := NewCluster(t, 5)
  193. c.Launch(t)
  194. defer c.Terminate(t)
  195. c.RemoveMember(t, uint64(c.Members[4].s.ID()))
  196. c.waitLeader(t, c.Members)
  197. c.AddMember(t)
  198. c.waitLeader(t, c.Members)
  199. clusterMustProgress(t, c.Members)
  200. }
  201. // Ensure we can remove a member after a snapshot then add a new one back.
  202. func TestIssue2746(t *testing.T) { testIssue2746(t, 5) }
  203. // With 3 nodes TestIssue2476 sometimes had a shutdown with an inflight snapshot.
  204. func TestIssue2746WithThree(t *testing.T) { testIssue2746(t, 3) }
  205. func testIssue2746(t *testing.T, members int) {
  206. defer testutil.AfterTest(t)
  207. c := NewCluster(t, members)
  208. for _, m := range c.Members {
  209. m.SnapCount = 10
  210. }
  211. c.Launch(t)
  212. defer c.Terminate(t)
  213. // force a snapshot
  214. for i := 0; i < 20; i++ {
  215. clusterMustProgress(t, c.Members)
  216. }
  217. c.RemoveMember(t, uint64(c.Members[members-1].s.ID()))
  218. c.waitLeader(t, c.Members)
  219. c.AddMember(t)
  220. c.waitLeader(t, c.Members)
  221. clusterMustProgress(t, c.Members)
  222. }
  223. // Ensure etcd will not panic when removing a just started member.
  224. func TestIssue2904(t *testing.T) {
  225. defer testutil.AfterTest(t)
  226. // start 1-member cluster to ensure member 0 is the leader of the cluster.
  227. c := NewCluster(t, 1)
  228. c.Launch(t)
  229. defer c.Terminate(t)
  230. c.AddMember(t)
  231. c.Members[1].Stop(t)
  232. // send remove member-1 request to the cluster.
  233. cc := MustNewHTTPClient(t, c.URLs(), nil)
  234. ma := client.NewMembersAPI(cc)
  235. ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
  236. // the proposal is not committed because member 1 is stopped, but the
  237. // proposal is appended to leader's raft log.
  238. ma.Remove(ctx, c.Members[1].s.ID().String())
  239. cancel()
  240. // restart member, and expect it to send UpdateAttributes request.
  241. // the log in the leader is like this:
  242. // [..., remove 1, ..., update attr 1, ...]
  243. c.Members[1].Restart(t)
  244. // when the member comes back, it ack the proposal to remove itself,
  245. // and apply it.
  246. <-c.Members[1].s.StopNotify()
  247. // terminate removed member
  248. c.Members[1].Terminate(t)
  249. c.Members = c.Members[:1]
  250. // wait member to be removed.
  251. c.waitMembersMatch(t, c.HTTPMembers())
  252. }
  253. // TestIssue3699 tests minority failure during cluster configuration; it was
  254. // deadlocking.
  255. func TestIssue3699(t *testing.T) {
  256. // start a cluster of 3 nodes a, b, c
  257. defer testutil.AfterTest(t)
  258. c := NewCluster(t, 3)
  259. c.Launch(t)
  260. defer c.Terminate(t)
  261. // make node a unavailable
  262. c.Members[0].Stop(t)
  263. // add node d
  264. c.AddMember(t)
  265. // electing node d as leader makes node a unable to participate
  266. leaderID := c.waitLeader(t, c.Members)
  267. for leaderID != 3 {
  268. c.Members[leaderID].Stop(t)
  269. <-c.Members[leaderID].s.StopNotify()
  270. c.Members[leaderID].Restart(t)
  271. leaderID = c.waitLeader(t, c.Members)
  272. }
  273. // bring back node a
  274. // node a will remain useless as long as d is the leader.
  275. if err := c.Members[0].Restart(t); err != nil {
  276. t.Fatal(err)
  277. }
  278. select {
  279. // waiting for ReadyNotify can take several seconds
  280. case <-time.After(10 * time.Second):
  281. t.Fatalf("waited too long for ready notification")
  282. case <-c.Members[0].s.StopNotify():
  283. t.Fatalf("should not be stopped")
  284. case <-c.Members[0].s.ReadyNotify():
  285. }
  286. // must waitLeader so goroutines don't leak on terminate
  287. c.waitLeader(t, c.Members)
  288. // try to participate in cluster
  289. cc := MustNewHTTPClient(t, []string{c.URL(0)}, c.cfg.ClientTLS)
  290. kapi := client.NewKeysAPI(cc)
  291. ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
  292. if _, err := kapi.Set(ctx, "/foo", "bar", nil); err != nil {
  293. t.Fatalf("unexpected error on Set (%v)", err)
  294. }
  295. cancel()
  296. }
  297. // clusterMustProgress ensures that cluster can make progress. It creates
  298. // a random key first, and check the new key could be got from all client urls
  299. // of the cluster.
  300. func clusterMustProgress(t *testing.T, membs []*member) {
  301. cc := MustNewHTTPClient(t, []string{membs[0].URL()}, nil)
  302. kapi := client.NewKeysAPI(cc)
  303. ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
  304. key := fmt.Sprintf("foo%d", rand.Int())
  305. resp, err := kapi.Create(ctx, "/"+key, "bar")
  306. if err != nil {
  307. t.Fatalf("create on %s error: %v", membs[0].URL(), err)
  308. }
  309. cancel()
  310. for i, m := range membs {
  311. u := m.URL()
  312. mcc := MustNewHTTPClient(t, []string{u}, nil)
  313. mkapi := client.NewKeysAPI(mcc)
  314. mctx, mcancel := context.WithTimeout(context.Background(), requestTimeout)
  315. if _, err := mkapi.Watcher(key, &client.WatcherOptions{AfterIndex: resp.Node.ModifiedIndex - 1}).Next(mctx); err != nil {
  316. t.Fatalf("#%d: watch on %s error: %v", i, u, err)
  317. }
  318. mcancel()
  319. }
  320. }