kill_leader_test.go 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. package test
  2. import (
  3. "bytes"
  4. "fmt"
  5. "os"
  6. "strconv"
  7. "strings"
  8. "testing"
  9. "time"
  10. "github.com/coreos/etcd/server"
  11. "github.com/coreos/etcd/tests"
  12. "github.com/coreos/etcd/third_party/github.com/coreos/go-etcd/etcd"
  13. "github.com/coreos/etcd/third_party/github.com/stretchr/testify/assert"
  14. )
  15. // This test will kill the current leader and wait for the etcd cluster to elect a new leader for 200 times.
  16. // It will print out the election time and the average election time.
  17. // It runs in a cluster with standby nodes.
  18. func TestKillLeaderWithStandbys(t *testing.T) {
  19. // https://github.com/goraft/raft/issues/222
  20. t.Skip("stuck on raft issue")
  21. procAttr := new(os.ProcAttr)
  22. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  23. clusterSize := 5
  24. argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false)
  25. if err != nil {
  26. t.Fatal("cannot create cluster")
  27. }
  28. defer DestroyCluster(etcds)
  29. stop := make(chan bool)
  30. leaderChan := make(chan string, 1)
  31. all := make(chan bool, 1)
  32. time.Sleep(time.Second)
  33. go Monitor(clusterSize, 1, leaderChan, all, stop)
  34. c := etcd.NewClient(nil)
  35. c.SyncCluster()
  36. // Reconfigure with a small active size.
  37. resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":3, "removeDelay":2, "syncInterval":1}`))
  38. if !assert.Equal(t, resp.StatusCode, 200) {
  39. t.FailNow()
  40. }
  41. // Wait for two monitor cycles before checking for demotion.
  42. time.Sleep((2 * server.ActiveMonitorTimeout) + (2 * time.Second))
  43. // Verify that we have 3 peers.
  44. result, err := c.Get("_etcd/machines", true, true)
  45. assert.NoError(t, err)
  46. assert.Equal(t, len(result.Node.Nodes), 3)
  47. var totalTime time.Duration
  48. leader := "http://127.0.0.1:7001"
  49. for i := 0; i < clusterSize; i++ {
  50. t.Log("leader is ", leader)
  51. port, _ := strconv.Atoi(strings.Split(leader, ":")[2])
  52. num := port - 7001
  53. t.Log("kill server ", num)
  54. etcds[num].Kill()
  55. etcds[num].Release()
  56. start := time.Now()
  57. for {
  58. newLeader := <-leaderChan
  59. if newLeader != leader {
  60. leader = newLeader
  61. break
  62. }
  63. }
  64. take := time.Now().Sub(start)
  65. totalTime += take
  66. avgTime := totalTime / (time.Duration)(i+1)
  67. fmt.Println("Total time:", totalTime, "; Avg time:", avgTime)
  68. time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))
  69. time.Sleep(2 * time.Second)
  70. // Verify that we have 3 peers.
  71. result, err = c.Get("_etcd/machines", true, true)
  72. assert.NoError(t, err)
  73. assert.Equal(t, len(result.Node.Nodes), 3)
  74. // Verify that killed node is not one of those peers.
  75. _, err = c.Get(fmt.Sprintf("_etcd/machines/node%d", num+1), false, false)
  76. assert.Error(t, err)
  77. etcds[num], err = os.StartProcess(EtcdBinPath, argGroup[num], procAttr)
  78. }
  79. stop <- true
  80. }