cluster_health.go 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. package command
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "net/http"
  7. "os"
  8. "sort"
  9. "time"
  10. "github.com/coreos/etcd/Godeps/_workspace/src/github.com/codegangsta/cli"
  11. "github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
  12. "github.com/coreos/etcd/etcdserver/stats"
  13. )
  14. func NewClusterHealthCommand() cli.Command {
  15. return cli.Command{
  16. Name: "cluster-health",
  17. Usage: "check the health of the etcd cluster",
  18. Flags: []cli.Flag{},
  19. Action: handleClusterHealth,
  20. }
  21. }
  22. func handleClusterHealth(c *cli.Context) {
  23. tr, err := getTransport(c)
  24. if err != nil {
  25. handleError(ExitServerError, err)
  26. }
  27. mi := mustNewMembersAPI(c)
  28. ms, err := mi.List(context.TODO())
  29. if err != nil {
  30. handleError(ExitServerError, err)
  31. }
  32. cl := make([]string, 0)
  33. for _, m := range ms {
  34. cl = append(cl, m.ClientURLs...)
  35. }
  36. // check the /health endpoint of all members first
  37. ep, ls0, err := getLeaderStats(tr, cl)
  38. if err != nil {
  39. fmt.Println("cluster may be unhealthy: failed to connect", cl)
  40. os.Exit(1)
  41. }
  42. time.Sleep(time.Second)
  43. // are all the members makeing progress?
  44. _, ls1, err := getLeaderStats(tr, []string{ep})
  45. if err != nil {
  46. fmt.Println("cluster is unhealthy")
  47. os.Exit(1)
  48. }
  49. fmt.Println("cluster is healthy")
  50. // self is healthy
  51. var prints []string
  52. prints = append(prints, fmt.Sprintf("member %s is healthy\n", ls1.Leader))
  53. for name, fs0 := range ls0.Followers {
  54. fs1, ok := ls1.Followers[name]
  55. if !ok {
  56. fmt.Println("Cluster configuration changed during health checking. Please retry.")
  57. os.Exit(1)
  58. }
  59. if fs1.Counts.Success <= fs0.Counts.Success {
  60. prints = append(prints, fmt.Sprintf("member %s is unhealthy\n", name))
  61. } else {
  62. prints = append(prints, fmt.Sprintf("member %s is healthy\n", name))
  63. }
  64. }
  65. sort.Strings(prints)
  66. for _, p := range prints {
  67. fmt.Print(p)
  68. }
  69. os.Exit(0)
  70. }
  71. func getLeaderStats(tr *http.Transport, endpoints []string) (string, *stats.LeaderStats, error) {
  72. // go-etcd does not support cluster stats, use http client for now
  73. // TODO: use new etcd client with new member/stats endpoint
  74. httpclient := http.Client{
  75. Transport: tr,
  76. }
  77. for _, ep := range endpoints {
  78. resp, err := httpclient.Get(ep + "/v2/stats/leader")
  79. if err != nil {
  80. continue
  81. }
  82. defer resp.Body.Close()
  83. if resp.StatusCode != http.StatusOK {
  84. continue
  85. }
  86. ls := &stats.LeaderStats{}
  87. d := json.NewDecoder(resp.Body)
  88. err = d.Decode(ls)
  89. if err != nil {
  90. continue
  91. }
  92. return ep, ls, nil
  93. }
  94. return "", nil, errors.New("no leader")
  95. }