ソースを参照

snap: use Histogram for snap metrics

Xiang Li 10 年 前
コミット
c1e4e647eb
3 ファイル変更12 行追加10 行削除
  1. 4 4
      Documentation/metrics.md
  2. 6 4
      snap/metrics.go
  3. 2 2
      snap/snapshotter.go

+ 4 - 4
Documentation/metrics.md

@@ -73,11 +73,11 @@ Example Prometheus queries that may be useful from these metrics (across all etc
 
 ### snapshot
 
-| Name                                       | Description                                                | Type    |
-|--------------------------------------------|------------------------------------------------------------|---------|
-| snapshot_save_total_durations_microseconds | The total latency distributions of save called by snapshot | Summary |
+| Name                                       | Description                                                | Type      |
+|--------------------------------------------|------------------------------------------------------------|-----------|
+| snapshot_save_total_durations_seconds      | The total latency distributions of save called by snapshot | Histogram |
 
-Abnormally high snapshot duration (`snapshot_save_total_durations_microseconds`) indicates disk issues and might cause the cluster to be unstable.
+Abnormally high snapshot duration (`snapshot_save_total_durations_seconds`) indicates disk issues and might cause the cluster to be unstable.
 
 
 ### rafthttp

+ 6 - 4
snap/metrics.go

@@ -18,18 +18,20 @@ import "github.com/coreos/etcd/Godeps/_workspace/src/github.com/prometheus/clien
 
 var (
 	// TODO: save_fsync latency?
-	saveDurations = prometheus.NewSummary(prometheus.SummaryOpts{
+	saveDurations = prometheus.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "etcd",
 		Subsystem: "snapshot",
-		Name:      "save_total_durations_microseconds",
+		Name:      "save_total_durations_seconds",
 		Help:      "The total latency distributions of save called by snapshot.",
+		Buckets:   prometheus.ExponentialBuckets(0.001, 2, 14),
 	})
 
-	marshallingDurations = prometheus.NewSummary(prometheus.SummaryOpts{
+	marshallingDurations = prometheus.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "etcd",
 		Subsystem: "snapshot",
-		Name:      "save_marshalling_durations_microseconds",
+		Name:      "save_marshalling_durations_seconds",
 		Help:      "The marshalling cost distributions of save called by snapshot.",
+		Buckets:   prometheus.ExponentialBuckets(0.001, 2, 14),
 	})
 )
 

+ 2 - 2
snap/snapshotter.go

@@ -74,12 +74,12 @@ func (s *Snapshotter) save(snapshot *raftpb.Snapshot) error {
 	if err != nil {
 		return err
 	} else {
-		marshallingDurations.Observe(float64(time.Since(start).Nanoseconds() / int64(time.Microsecond)))
+		marshallingDurations.Observe(float64(time.Since(start)) / float64(time.Second))
 	}
 
 	err = ioutil.WriteFile(path.Join(s.dir, fname), d, 0666)
 	if err == nil {
-		saveDurations.Observe(float64(time.Since(start).Nanoseconds() / int64(time.Microsecond)))
+		saveDurations.Observe(float64(time.Since(start)) / float64(time.Second))
 	}
 	return err
 }