Browse Source

Merge pull request #10156 from gyuho/metrics-health

etcdserver: add "etcd_server_health_success/failures"
Gyuho Lee 7 years ago
parent
commit
ba606bf85e
2 changed files with 59 additions and 6 deletions
  1. 27 5
      etcdserver/api/etcdhttp/metrics.go
  2. 32 1
      integration/metrics_test.go

+ 27 - 5
etcdserver/api/etcdhttp/metrics.go

@@ -24,6 +24,7 @@ import (
 	"go.etcd.io/etcd/etcdserver/etcdserverpb"
 	"go.etcd.io/etcd/raft"
 
+	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promhttp"
 )
 
@@ -43,11 +44,6 @@ func HandlePrometheus(mux *http.ServeMux) {
 	mux.Handle(pathMetrics, promhttp.Handler())
 }
 
-// HandleHealth registers health handler on '/health'.
-func HandleHealth(mux *http.ServeMux, srv etcdserver.ServerV2) {
-	mux.Handle(PathHealth, NewHealthHandler(func() Health { return checkHealth(srv) }))
-}
-
 // NewHealthHandler handles '/health' requests.
 func NewHealthHandler(hfunc func() Health) http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
@@ -67,6 +63,26 @@ func NewHealthHandler(hfunc func() Health) http.HandlerFunc {
 	}
 }
 
+var (
+	healthSuccess = prometheus.NewCounter(prometheus.CounterOpts{
+		Namespace: "etcd",
+		Subsystem: "server",
+		Name:      "health_success",
+		Help:      "The total number of successful health checks",
+	})
+	healthFailed = prometheus.NewCounter(prometheus.CounterOpts{
+		Namespace: "etcd",
+		Subsystem: "server",
+		Name:      "health_failures",
+		Help:      "The total number of failed health checks",
+	})
+)
+
+func init() {
+	prometheus.MustRegister(healthSuccess)
+	prometheus.MustRegister(healthFailed)
+}
+
 // Health defines etcd server health status.
 // TODO: remove manual parsing in etcdctl cluster-health
 type Health struct {
@@ -97,5 +113,11 @@ func checkHealth(srv etcdserver.ServerV2) Health {
 			h.Health = "false"
 		}
 	}
+
+	if h.Health == "true" {
+		healthSuccess.Inc()
+	} else {
+		healthFailed.Inc()
+	}
 	return h
 }

+ 32 - 1
integration/metrics_test.go

@@ -16,14 +16,15 @@ package integration
 
 import (
 	"context"
+	"net/http"
 	"strconv"
 	"testing"
 	"time"
 
 	"go.etcd.io/etcd/etcdserver"
-
 	pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
 	"go.etcd.io/etcd/pkg/testutil"
+	"go.etcd.io/etcd/pkg/transport"
 )
 
 // TestMetricDbSizeBoot checks that the db size metric is set on boot.
@@ -165,3 +166,33 @@ func TestMetricQuotaBackendBytes(t *testing.T) {
 		t.Fatalf("expected %d, got %f", etcdserver.DefaultQuotaBytes, qv)
 	}
 }
+
+func TestMetricsHealth(t *testing.T) {
+	defer testutil.AfterTest(t)
+	clus := NewClusterV3(t, &ClusterConfig{Size: 1})
+	defer clus.Terminate(t)
+
+	tr, err := transport.NewTransport(transport.TLSInfo{}, 5*time.Second)
+	if err != nil {
+		t.Fatal(err)
+	}
+	u := clus.Members[0].ClientURLs[0]
+	u.Path = "/health"
+	resp, err := tr.RoundTrip(&http.Request{
+		Header: make(http.Header),
+		Method: http.MethodGet,
+		URL:    &u,
+	})
+	resp.Body.Close()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	hv, err := clus.Members[0].Metric("etcd_server_health_success")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if hv != "1" {
+		t.Fatalf("expected '1' from /health, got %q", hv)
+	}
+}