kill_leader_test.go 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. // +build ignore
  2. package test
  3. import (
  4. "bytes"
  5. "fmt"
  6. "os"
  7. "strconv"
  8. "strings"
  9. "testing"
  10. "time"
  11. "github.com/coreos/etcd/server"
  12. "github.com/coreos/etcd/tests"
  13. "github.com/coreos/etcd/third_party/github.com/coreos/go-etcd/etcd"
  14. "github.com/coreos/etcd/third_party/github.com/stretchr/testify/assert"
  15. )
  16. // This test will kill the current leader and wait for the etcd cluster to elect a new leader for 200 times.
  17. // It will print out the election time and the average election time.
  18. // It runs in a cluster with standby nodes.
  19. func TestKillLeaderWithStandbys(t *testing.T) {
  20. // https://github.com/goraft/raft/issues/222
  21. t.Skip("stuck on raft issue")
  22. procAttr := new(os.ProcAttr)
  23. procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr}
  24. clusterSize := 5
  25. argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false)
  26. if err != nil {
  27. t.Fatal("cannot create cluster")
  28. }
  29. defer DestroyCluster(etcds)
  30. stop := make(chan bool)
  31. leaderChan := make(chan string, 1)
  32. all := make(chan bool, 1)
  33. time.Sleep(time.Second)
  34. go Monitor(clusterSize, 1, leaderChan, all, stop)
  35. c := etcd.NewClient(nil)
  36. c.SyncCluster()
  37. // Reconfigure with a small active size.
  38. resp, _ := tests.Put("http://localhost:7001/v2/admin/config", "application/json", bytes.NewBufferString(`{"activeSize":3, "removeDelay":2, "syncInterval":1}`))
  39. if !assert.Equal(t, resp.StatusCode, 200) {
  40. t.FailNow()
  41. }
  42. // Wait for two monitor cycles before checking for demotion.
  43. time.Sleep((2 * server.ActiveMonitorTimeout) + (2 * time.Second))
  44. // Verify that we have 3 peers.
  45. result, err := c.Get("_etcd/machines", true, true)
  46. assert.NoError(t, err)
  47. assert.Equal(t, len(result.Node.Nodes), 3)
  48. var totalTime time.Duration
  49. leader := "http://127.0.0.1:7001"
  50. for i := 0; i < clusterSize; i++ {
  51. t.Log("leader is ", leader)
  52. port, _ := strconv.Atoi(strings.Split(leader, ":")[2])
  53. num := port - 7001
  54. t.Log("kill server ", num)
  55. etcds[num].Kill()
  56. etcds[num].Release()
  57. start := time.Now()
  58. for {
  59. newLeader := <-leaderChan
  60. if newLeader != leader {
  61. leader = newLeader
  62. break
  63. }
  64. }
  65. take := time.Now().Sub(start)
  66. totalTime += take
  67. avgTime := totalTime / (time.Duration)(i+1)
  68. fmt.Println("Total time:", totalTime, "; Avg time:", avgTime)
  69. time.Sleep(server.ActiveMonitorTimeout + (1 * time.Second))
  70. time.Sleep(2 * time.Second)
  71. // Verify that we have 3 peers.
  72. result, err = c.Get("_etcd/machines", true, true)
  73. assert.NoError(t, err)
  74. assert.Equal(t, len(result.Node.Nodes), 3)
  75. // Verify that killed node is not one of those peers.
  76. _, err = c.Get(fmt.Sprintf("_etcd/machines/node%d", num+1), false, false)
  77. assert.Error(t, err)
  78. etcds[num], err = os.StartProcess(EtcdBinPath, argGroup[num], procAttr)
  79. }
  80. stop <- true
  81. }