remove_node_test.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. // +build ignore
  2. package test
  3. import (
  4. "bytes"
  5. "fmt"
  6. "math/rand"
  7. "net/http"
  8. "os"
  9. "syscall"
  10. "testing"
  11. "time"
  12. "github.com/coreos/etcd/third_party/github.com/coreos/go-etcd/etcd"
  13. "github.com/coreos/etcd/tests"
  14. "github.com/coreos/etcd/third_party/github.com/stretchr/testify/assert"
  15. )
  16. // remove the node and node rejoin with previous log
  17. func TestRemoveNode(t *testing.T) {
  18. procAttr := new(os.ProcAttr)
  19. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  20. clusterSize := 4
  21. argGroup, etcds, _ := CreateCluster(clusterSize, procAttr, false)
  22. defer DestroyCluster(etcds)
  23. time.Sleep(time.Second)
  24. c := etcd.NewClient(nil)
  25. c.SyncCluster()
  26. resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":4, "syncInterval":5}`))
  27. if !assert.Equal(t, resp.StatusCode, 200) {
  28. t.FailNow()
  29. }
  30. rmReq, _ := http.NewRequest("DELETE", "http://127.0.0.1:7001/remove/node3", nil)
  31. client := &http.Client{}
  32. for i := 0; i < 2; i++ {
  33. for i := 0; i < 2; i++ {
  34. client.Do(rmReq)
  35. fmt.Println("send remove to node3 and wait for its exiting")
  36. time.Sleep(100 * time.Millisecond)
  37. resp, err := c.Get("_etcd/machines", false, false)
  38. if err != nil {
  39. panic(err)
  40. }
  41. if len(resp.Node.Nodes) != 3 {
  42. t.Fatal("cannot remove peer")
  43. }
  44. etcds[2].Kill()
  45. etcds[2].Wait()
  46. if i == 1 {
  47. // rejoin with log
  48. etcds[2], err = os.StartProcess(EtcdBinPath, argGroup[2], procAttr)
  49. } else {
  50. // rejoin without log
  51. etcds[2], err = os.StartProcess(EtcdBinPath, append(argGroup[2], "-f"), procAttr)
  52. }
  53. if err != nil {
  54. panic(err)
  55. }
  56. time.Sleep(time.Second + 5*time.Second)
  57. resp, err = c.Get("_etcd/machines", false, false)
  58. if err != nil {
  59. panic(err)
  60. }
  61. if len(resp.Node.Nodes) != 4 {
  62. t.Fatalf("add peer fails #1 (%d != 4)", len(resp.Node.Nodes))
  63. }
  64. }
  65. // first kill the node, then remove it, then add it back
  66. for i := 0; i < 2; i++ {
  67. etcds[2].Kill()
  68. fmt.Println("kill node3 and wait for its exiting")
  69. etcds[2].Wait()
  70. client.Do(rmReq)
  71. time.Sleep(100 * time.Millisecond)
  72. resp, err := c.Get("_etcd/machines", false, false)
  73. if err != nil {
  74. panic(err)
  75. }
  76. if len(resp.Node.Nodes) != 3 {
  77. t.Fatal("cannot remove peer")
  78. }
  79. if i == 1 {
  80. // rejoin with log
  81. etcds[2], err = os.StartProcess(EtcdBinPath, append(argGroup[2]), procAttr)
  82. } else {
  83. // rejoin without log
  84. etcds[2], err = os.StartProcess(EtcdBinPath, append(argGroup[2], "-f"), procAttr)
  85. }
  86. if err != nil {
  87. panic(err)
  88. }
  89. time.Sleep(time.Second + time.Second)
  90. resp, err = c.Get("_etcd/machines", false, false)
  91. if err != nil {
  92. panic(err)
  93. }
  94. if len(resp.Node.Nodes) != 4 {
  95. t.Fatalf("add peer fails #2 (%d != 4)", len(resp.Node.Nodes))
  96. }
  97. }
  98. }
  99. }
  100. func TestRemovePausedNode(t *testing.T) {
  101. procAttr := new(os.ProcAttr)
  102. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  103. clusterSize := 4
  104. _, etcds, _ := CreateCluster(clusterSize, procAttr, false)
  105. defer DestroyCluster(etcds)
  106. time.Sleep(time.Second)
  107. c := etcd.NewClient(nil)
  108. c.SyncCluster()
  109. r, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":3, "removeDelay":1, "syncInterval":1}`))
  110. if !assert.Equal(t, r.StatusCode, 200) {
  111. t.FailNow()
  112. }
  113. // Wait for standby instances to update its cluster config
  114. time.Sleep(6 * time.Second)
  115. resp, err := c.Get("_etcd/machines", false, false)
  116. if err != nil {
  117. panic(err)
  118. }
  119. if len(resp.Node.Nodes) != 3 {
  120. t.Fatal("cannot remove peer")
  121. }
  122. for i := 0; i < clusterSize; i++ {
  123. // first pause the node, then remove it, then resume it
  124. idx := rand.Int() % clusterSize
  125. etcds[idx].Signal(syscall.SIGSTOP)
  126. fmt.Printf("pause node%d and let standby node take its place\n", idx+1)
  127. time.Sleep(4 * time.Second)
  128. etcds[idx].Signal(syscall.SIGCONT)
  129. // let it change its state to candidate at least
  130. time.Sleep(time.Second)
  131. stop := make(chan bool)
  132. leaderChan := make(chan string, 1)
  133. all := make(chan bool, 1)
  134. go Monitor(clusterSize, clusterSize, leaderChan, all, stop)
  135. <-all
  136. <-leaderChan
  137. stop <- true
  138. resp, err = c.Get("_etcd/machines", false, false)
  139. if err != nil {
  140. panic(err)
  141. }
  142. if len(resp.Node.Nodes) != 3 {
  143. t.Fatalf("add peer fails (%d != 3)", len(resp.Node.Nodes))
  144. }
  145. for i := 0; i < 3; i++ {
  146. if resp.Node.Nodes[i].Key == fmt.Sprintf("node%d", idx+1) {
  147. t.Fatal("node should be removed")
  148. }
  149. }
  150. }
  151. }