kill_leader_test.go 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. package test
  2. import (
  3. "bytes"
  4. "fmt"
  5. "os"
  6. "strconv"
  7. "strings"
  8. "testing"
  9. "time"
  10. "github.com/coreos/etcd/server"
  11. "github.com/coreos/etcd/tests"
  12. "github.com/coreos/etcd/third_party/github.com/coreos/go-etcd/etcd"
  13. "github.com/coreos/etcd/third_party/github.com/stretchr/testify/assert"
  14. )
  15. // This test will kill the current leader and wait for the etcd cluster to elect a new leader for 200 times.
  16. // It will print out the election time and the average election time.
  17. func TestKillLeader(t *testing.T) {
  18. procAttr := new(os.ProcAttr)
  19. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  20. clusterSize := 3
  21. argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false)
  22. if err != nil {
  23. t.Fatal("cannot create cluster")
  24. }
  25. defer DestroyCluster(etcds)
  26. stop := make(chan bool)
  27. leaderChan := make(chan string, 1)
  28. all := make(chan bool, 1)
  29. time.Sleep(time.Second)
  30. go Monitor(clusterSize, 1, leaderChan, all, stop)
  31. var totalTime time.Duration
  32. leader := "http://127.0.0.1:7001"
  33. for i := 0; i < clusterSize; i++ {
  34. fmt.Println("leader is ", leader)
  35. port, _ := strconv.Atoi(strings.Split(leader, ":")[2])
  36. num := port - 7001
  37. fmt.Println("kill server ", num)
  38. etcds[num].Kill()
  39. etcds[num].Release()
  40. start := time.Now()
  41. for {
  42. newLeader := <-leaderChan
  43. if newLeader != leader {
  44. leader = newLeader
  45. break
  46. }
  47. }
  48. take := time.Now().Sub(start)
  49. totalTime += take
  50. avgTime := totalTime / (time.Duration)(i+1)
  51. fmt.Println("Total time:", totalTime, "; Avg time:", avgTime)
  52. etcds[num], err = os.StartProcess(EtcdBinPath, argGroup[num], procAttr)
  53. }
  54. stop <- true
  55. }
  56. // This test will kill the current leader and wait for the etcd cluster to elect a new leader for 200 times.
  57. // It will print out the election time and the average election time.
  58. // It runs in a cluster with standby nodes.
  59. func TestKillLeaderWithStandbys(t *testing.T) {
  60. // https://github.com/goraft/raft/issues/222
  61. t.Skip("stuck on raft issue")
  62. procAttr := new(os.ProcAttr)
  63. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  64. clusterSize := 5
  65. argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false)
  66. if err != nil {
  67. t.Fatal("cannot create cluster")
  68. }
  69. defer DestroyCluster(etcds)
  70. stop := make(chan bool)
  71. leaderChan := make(chan string, 1)
  72. all := make(chan bool, 1)
  73. time.Sleep(time.Second)
  74. go Monitor(clusterSize, 1, leaderChan, all, stop)
  75. c := etcd.NewClient(nil)
  76. c.SyncCluster()
  77. // Reconfigure with a small active size.
  78. resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":3, "removeDelay":2, "syncInterval":1}`))
  79. if !assert.Equal(t, resp.StatusCode, 200) {
  80. t.FailNow()
  81. }
  82. // Wait for two monitor cycles before checking for demotion.
  83. time.Sleep((2 * server.ActiveMonitorTimeout) + (2 * time.Second))
  84. // Verify that we have 3 peers.
  85. result, err := c.Get("_etcd/machines", true, true)
  86. assert.NoError(t, err)
  87. assert.Equal(t, len(result.Node.Nodes), 3)
  88. var totalTime time.Duration
  89. leader := "http://127.0.0.1:7001"
  90. for i := 0; i < clusterSize; i++ {
  91. t.Log("leader is ", leader)
  92. port, _ := strconv.Atoi(strings.Split(leader, ":")[2])
  93. num := port - 7001
  94. t.Log("kill server ", num)
  95. etcds[num].Kill()
  96. etcds[num].Release()
  97. start := time.Now()
  98. for {
  99. newLeader := <-leaderChan
  100. if newLeader != leader {
  101. leader = newLeader
  102. break
  103. }
  104. }
  105. take := time.Now().Sub(start)
  106. totalTime += take
  107. avgTime := totalTime / (time.Duration)(i+1)
  108. fmt.Println("Total time:", totalTime, "; Avg time:", avgTime)
  109. time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))
  110. time.Sleep(2 * time.Second)
  111. // Verify that we have 3 peers.
  112. result, err = c.Get("_etcd/machines", true, true)
  113. assert.NoError(t, err)
  114. assert.Equal(t, len(result.Node.Nodes), 3)
  115. // Verify that killed node is not one of those peers.
  116. _, err = c.Get(fmt.Sprintf("_etcd/machines/node%d", num+1), false, false)
  117. assert.Error(t, err)
  118. etcds[num], err = os.StartProcess(EtcdBinPath, argGroup[num], procAttr)
  119. }
  120. stop <- true
  121. }