multi_node_kill_all_and_recovery_test.go 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. package test
  2. import (
  3. "bytes"
  4. "os"
  5. "strconv"
  6. "testing"
  7. "time"
  8. "github.com/coreos/etcd/server"
  9. "github.com/coreos/etcd/tests"
  10. "github.com/coreos/etcd/third_party/github.com/coreos/go-etcd/etcd"
  11. "github.com/coreos/etcd/third_party/github.com/stretchr/testify/assert"
  12. )
  13. // Create a five nodes
  14. // Kill all the nodes and restart
  15. func TestMultiNodeKillAllAndRecovery(t *testing.T) {
  16. procAttr := new(os.ProcAttr)
  17. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  18. stop := make(chan bool)
  19. leaderChan := make(chan string, 1)
  20. all := make(chan bool, 1)
  21. clusterSize := 5
  22. argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false)
  23. defer DestroyCluster(etcds)
  24. if err != nil {
  25. t.Fatal("cannot create cluster")
  26. }
  27. c := etcd.NewClient(nil)
  28. go Monitor(clusterSize, clusterSize, leaderChan, all, stop)
  29. <-all
  30. <-leaderChan
  31. stop <- true
  32. c.SyncCluster()
  33. // send 10 commands
  34. for i := 0; i < 10; i++ {
  35. // Test Set
  36. _, err := c.Set("foo", "bar", 0)
  37. if err != nil {
  38. panic(err)
  39. }
  40. }
  41. time.Sleep(time.Second)
  42. // kill all
  43. DestroyCluster(etcds)
  44. time.Sleep(time.Second)
  45. stop = make(chan bool)
  46. leaderChan = make(chan string, 1)
  47. all = make(chan bool, 1)
  48. time.Sleep(time.Second)
  49. for i := 0; i < clusterSize; i++ {
  50. etcds[i], err = os.StartProcess(EtcdBinPath, argGroup[i], procAttr)
  51. }
  52. go Monitor(clusterSize, 1, leaderChan, all, stop)
  53. <-all
  54. <-leaderChan
  55. result, err := c.Set("foo", "bar", 0)
  56. if err != nil {
  57. t.Fatalf("Recovery error: %s", err)
  58. }
  59. if result.Node.ModifiedIndex != 17 {
  60. t.Fatalf("recovery failed! [%d/17]", result.Node.ModifiedIndex)
  61. }
  62. }
  63. // TestTLSMultiNodeKillAllAndRecovery create a five nodes
  64. // then kill all the nodes and restart
  65. func TestTLSMultiNodeKillAllAndRecovery(t *testing.T) {
  66. procAttr := new(os.ProcAttr)
  67. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  68. stop := make(chan bool)
  69. leaderChan := make(chan string, 1)
  70. all := make(chan bool, 1)
  71. clusterSize := 5
  72. argGroup, etcds, err := CreateCluster(clusterSize, procAttr, true)
  73. defer DestroyCluster(etcds)
  74. if err != nil {
  75. t.Fatal("cannot create cluster")
  76. }
  77. c := etcd.NewClient(nil)
  78. go Monitor(clusterSize, clusterSize, leaderChan, all, stop)
  79. <-all
  80. <-leaderChan
  81. stop <- true
  82. c.SyncCluster()
  83. // send 10 commands
  84. for i := 0; i < 10; i++ {
  85. // Test Set
  86. _, err := c.Set("foo", "bar", 0)
  87. if err != nil {
  88. panic(err)
  89. }
  90. }
  91. time.Sleep(time.Second)
  92. // kill all
  93. DestroyCluster(etcds)
  94. time.Sleep(time.Second)
  95. stop = make(chan bool)
  96. leaderChan = make(chan string, 1)
  97. all = make(chan bool, 1)
  98. time.Sleep(time.Second)
  99. for i := 0; i < clusterSize; i++ {
  100. etcds[i], err = os.StartProcess(EtcdBinPath, argGroup[i], procAttr)
  101. // See util.go for the reason to wait for server
  102. client := buildClient()
  103. err = WaitForServer("127.0.0.1:400"+strconv.Itoa(i+1), client, "http")
  104. if err != nil {
  105. t.Fatalf("node start error: %s", err)
  106. }
  107. }
  108. go Monitor(clusterSize, 1, leaderChan, all, stop)
  109. <-all
  110. <-leaderChan
  111. result, err := c.Set("foo", "bar", 0)
  112. if err != nil {
  113. t.Fatalf("Recovery error: %s", err)
  114. }
  115. if result.Node.ModifiedIndex != 17 {
  116. t.Fatalf("recovery failed! [%d/17]", result.Node.ModifiedIndex)
  117. }
  118. }
  119. // Create a five-node cluster
  120. // Kill all the nodes and restart
  121. func TestMultiNodeKillAllAndRecoveryWithStandbys(t *testing.T) {
  122. procAttr := new(os.ProcAttr)
  123. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  124. stop := make(chan bool)
  125. leaderChan := make(chan string, 1)
  126. all := make(chan bool, 1)
  127. clusterSize := 15
  128. argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false)
  129. defer DestroyCluster(etcds)
  130. if err != nil {
  131. t.Fatal("cannot create cluster")
  132. }
  133. c := etcd.NewClient(nil)
  134. go Monitor(clusterSize, clusterSize, leaderChan, all, stop)
  135. <-all
  136. <-leaderChan
  137. stop <- true
  138. c.SyncCluster()
  139. // Reconfigure with smaller active size (7 nodes) and wait for remove.
  140. resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":7}`))
  141. if !assert.Equal(t, resp.StatusCode, 200) {
  142. t.FailNow()
  143. }
  144. time.Sleep(2*server.ActiveMonitorTimeout + (1 * time.Second))
  145. // Verify that there is three machines in peer mode.
  146. result, err := c.Get("_etcd/machines", false, true)
  147. assert.NoError(t, err)
  148. assert.Equal(t, len(result.Node.Nodes), 7)
  149. // send set commands
  150. for i := 0; i < 2*clusterSize; i++ {
  151. // Test Set
  152. _, err := c.Set("foo", "bar", 0)
  153. if err != nil {
  154. panic(err)
  155. }
  156. }
  157. time.Sleep(time.Second)
  158. // kill all
  159. DestroyCluster(etcds)
  160. time.Sleep(time.Second)
  161. stop = make(chan bool)
  162. leaderChan = make(chan string, 1)
  163. all = make(chan bool, 1)
  164. time.Sleep(time.Second)
  165. for i := 0; i < clusterSize; i++ {
  166. etcds[i], err = os.StartProcess(EtcdBinPath, append(argGroup[i], "-peers="), procAttr)
  167. }
  168. time.Sleep(2 * time.Second)
  169. // send set commands
  170. for i := 0; i < 2*clusterSize; i++ {
  171. // Test Set
  172. _, err := c.Set("foo", "bar", 0)
  173. if err != nil {
  174. t.Fatalf("Recovery error: %s", err)
  175. }
  176. }
  177. // Verify that we have seven machines.
  178. result, err = c.Get("_etcd/machines", false, true)
  179. assert.NoError(t, err)
  180. assert.Equal(t, len(result.Node.Nodes), 7)
  181. }