123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142 |
- package command
- import (
- "encoding/json"
- "errors"
- "fmt"
- "net/http"
- "os"
- "sort"
- "strings"
- "time"
- "github.com/coreos/etcd/Godeps/_workspace/src/github.com/codegangsta/cli"
- "github.com/coreos/etcd/Godeps/_workspace/src/github.com/coreos/go-etcd/etcd"
- "github.com/coreos/etcd/etcdserver/stats"
- )
- func NewClusterHealthCommand() cli.Command {
- return cli.Command{
- Name: "cluster-health",
- Usage: "check the health of the etcd cluster",
- Flags: []cli.Flag{},
- Action: handleClusterHealth,
- }
- }
- func handleClusterHealth(c *cli.Context) {
- endpoints, err := getEndpoints(c)
- if err != nil {
- handleError(ErrorFromEtcd, err)
- }
- tr, err := getTransport(c)
- if err != nil {
- handleError(ErrorFromEtcd, err)
- }
- client := etcd.NewClient(endpoints)
- client.SetTransport(tr)
- if c.GlobalBool("debug") {
- go dumpCURL(client)
- }
- if ok := client.SyncCluster(); !ok {
- handleError(FailedToConnectToHost, errors.New("cannot sync with the cluster using endpoints "+strings.Join(endpoints, ", ")))
- }
- // do we have a leader?
- cl := client.GetCluster()
- ep, ls0, err := getLeaderStats(tr, cl)
- if err != nil {
- fmt.Println("cluster may be unhealthy: failed to connect", cl)
- os.Exit(1)
- }
- // is raft stable and making progress?
- client = etcd.NewClient([]string{ep})
- client.SetTransport(tr)
- resp, err := client.Get("/", false, false)
- if err != nil {
- fmt.Println("cluster is unhealthy")
- os.Exit(1)
- }
- rt0, ri0 := resp.RaftTerm, resp.RaftIndex
- time.Sleep(time.Second)
- resp, err = client.Get("/", false, false)
- if err != nil {
- fmt.Println("cluster is unhealthy")
- os.Exit(1)
- }
- rt1, ri1 := resp.RaftTerm, resp.RaftIndex
- if rt0 != rt1 {
- fmt.Println("cluster is unhealthy")
- os.Exit(1)
- }
- if ri1 == ri0 {
- fmt.Println("cluster is unhealthy")
- os.Exit(1)
- }
- // are all the members makeing progress?
- _, ls1, err := getLeaderStats(tr, []string{ep})
- if err != nil {
- fmt.Println("cluster is unhealthy")
- os.Exit(1)
- }
- fmt.Println("cluster is healthy")
- // self is healthy
- var prints []string
- prints = append(prints, fmt.Sprintf("member %s is healthy\n", ls1.Leader))
- for name, fs0 := range ls0.Followers {
- fs1, ok := ls1.Followers[name]
- if !ok {
- fmt.Println("Cluster configuration changed during health checking. Please retry.")
- os.Exit(1)
- }
- if fs1.Counts.Success <= fs0.Counts.Success {
- prints = append(prints, fmt.Sprintf("member %s is unhealthy\n", name))
- } else {
- prints = append(prints, fmt.Sprintf("member %s is healthy\n", name))
- }
- }
- sort.Strings(prints)
- for _, p := range prints {
- fmt.Print(p)
- }
- os.Exit(0)
- }
- func getLeaderStats(tr *http.Transport, endpoints []string) (string, *stats.LeaderStats, error) {
- // go-etcd does not support cluster stats, use http client for now
- // TODO: use new etcd client with new member/stats endpoint
- httpclient := http.Client{
- Transport: tr,
- }
- for _, ep := range endpoints {
- resp, err := httpclient.Get(ep + "/v2/stats/leader")
- if err != nil {
- continue
- }
- defer resp.Body.Close()
- if resp.StatusCode != http.StatusOK {
- continue
- }
- ls := &stats.LeaderStats{}
- d := json.NewDecoder(resp.Body)
- err = d.Decode(ls)
- if err != nil {
- continue
- }
- return ep, ls, nil
- }
- return "", nil, errors.New("no leader")
- }
|