ソースを参照

etcdserver/api/snap: add v3 snapshot fsync metrics

etcd_snap_db_fsync_duration_seconds_count
etcd_snap_db_save_total_duration_seconds_bucket

Signed-off-by: Gyuho Lee <leegyuho@amazon.com>
Gyuho Lee 7 年 前
コミット
c392cd20cf
2 ファイル変更30 行追加0 行削除
  1. 6 0
      etcdserver/api/snap/db.go
  2. 24 0
      etcdserver/api/snap/metrics.go

+ 6 - 0
etcdserver/api/snap/db.go

@@ -21,6 +21,7 @@ import (
 	"io/ioutil"
 	"os"
 	"path/filepath"
+	"time"
 
 	"github.com/coreos/etcd/pkg/fileutil"
 
@@ -33,6 +34,8 @@ var ErrNoDBSnapshot = errors.New("snap: snapshot file doesn't exist")
 // SaveDBFrom saves snapshot of the database from the given reader. It
 // guarantees the save operation is atomic.
 func (s *Snapshotter) SaveDBFrom(r io.Reader, id uint64) (int64, error) {
+	start := time.Now()
+
 	f, err := ioutil.TempFile(s.dir, "tmp")
 	if err != nil {
 		return 0, err
@@ -40,7 +43,9 @@ func (s *Snapshotter) SaveDBFrom(r io.Reader, id uint64) (int64, error) {
 	var n int64
 	n, err = io.Copy(f, r)
 	if err == nil {
+		fsyncStart := time.Now()
 		err = fileutil.Fsync(f)
+		snapDBFsyncSec.Observe(time.Since(fsyncStart).Seconds())
 	}
 	f.Close()
 	if err != nil {
@@ -69,6 +74,7 @@ func (s *Snapshotter) SaveDBFrom(r io.Reader, id uint64) (int64, error) {
 		plog.Infof("saved database snapshot to disk [total bytes: %d]", n)
 	}
 
+	snapDBSaveSec.Observe(time.Since(start).Seconds())
 	return n, nil
 }
 

+ 24 - 0
etcdserver/api/snap/metrics.go

@@ -49,10 +49,34 @@ var (
 		// highest bucket start of 0.001 sec * 2^13 == 8.192 sec
 		Buckets: prometheus.ExponentialBuckets(0.001, 2, 14),
 	})
+
+	snapDBSaveSec = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Namespace: "etcd",
+		Subsystem: "snap_db",
+		Name:      "save_total_duration_seconds",
+		Help:      "The total latency distributions of v3 snapshot save",
+
+		// lowest bucket start of upper bound 0.1 sec (100 ms) with factor 2
+		// highest bucket start of 0.1 sec * 2^9 == 51.2 sec
+		Buckets: prometheus.ExponentialBuckets(0.1, 2, 10),
+	})
+
+	snapDBFsyncSec = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Namespace: "etcd",
+		Subsystem: "snap_db",
+		Name:      "fsync_duration_seconds",
+		Help:      "The latency distributions of fsyncing .snap.db file",
+
+		// lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2
+		// highest bucket start of 0.001 sec * 2^13 == 8.192 sec
+		Buckets: prometheus.ExponentialBuckets(0.001, 2, 14),
+	})
 )
 
 func init() {
 	prometheus.MustRegister(snapMarshallingSec)
 	prometheus.MustRegister(snapSaveSec)
 	prometheus.MustRegister(snapFsyncSec)
+	prometheus.MustRegister(snapDBSaveSec)
+	prometheus.MustRegister(snapDBFsyncSec)
 }