cluster_health.go 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. package command
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "net/http"
  6. "os"
  7. "os/signal"
  8. "time"
  9. "github.com/coreos/etcd/Godeps/_workspace/src/github.com/codegangsta/cli"
  10. "github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
  11. )
  12. func NewClusterHealthCommand() cli.Command {
  13. return cli.Command{
  14. Name: "cluster-health",
  15. Usage: "check the health of the etcd cluster",
  16. Flags: []cli.Flag{
  17. cli.BoolFlag{Name: "forever", Usage: "forever check the health every 10 second until CTRL+C"},
  18. },
  19. Action: handleClusterHealth,
  20. }
  21. }
  22. func handleClusterHealth(c *cli.Context) {
  23. forever := c.Bool("forever")
  24. if forever {
  25. sigch := make(chan os.Signal, 1)
  26. signal.Notify(sigch, os.Interrupt)
  27. go func() {
  28. <-sigch
  29. os.Exit(0)
  30. }()
  31. }
  32. tr, err := getTransport(c)
  33. if err != nil {
  34. handleError(ExitServerError, err)
  35. }
  36. hc := http.Client{
  37. Transport: tr,
  38. }
  39. mi := mustNewMembersAPI(c)
  40. ms, err := mi.List(context.TODO())
  41. if err != nil {
  42. fmt.Println("cluster may be unhealthy: failed to list members")
  43. handleError(ExitServerError, err)
  44. }
  45. for {
  46. health := false
  47. for _, m := range ms {
  48. checked := false
  49. for _, url := range m.ClientURLs {
  50. resp, err := hc.Get(url + "/health")
  51. if err != nil {
  52. fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err)
  53. continue
  54. }
  55. result := struct{ Health string }{}
  56. d := json.NewDecoder(resp.Body)
  57. err = d.Decode(&result)
  58. resp.Body.Close()
  59. if err != nil {
  60. fmt.Printf("failed to check the health of member %s on %s: %v\n", m.ID, url, err)
  61. continue
  62. }
  63. checked = true
  64. if result.Health == "true" {
  65. health = true
  66. fmt.Printf("member %s is healthy: got healthy result from %s\n", m.ID, url)
  67. } else {
  68. fmt.Printf("member %s is unhealthy: got unhealthy result from %s\n", m.ID, url)
  69. }
  70. break
  71. }
  72. if !checked {
  73. fmt.Printf("member %s is unreachable: %v are all unreachable\n", m.ID, m.ClientURLs)
  74. }
  75. }
  76. if health {
  77. fmt.Println("cluster is healthy")
  78. } else {
  79. fmt.Println("cluster is unhealthy")
  80. }
  81. if !forever {
  82. break
  83. }
  84. fmt.Printf("\nnext check after 10 second...\n\n")
  85. time.Sleep(10 * time.Second)
  86. }
  87. }