Jelajahi Sumber

Specialize sample heap to reduce allocations.

Here's the benchcmp output. I'm not sure why some of the timings changed
as most of those benchmarks have nothing to do with this, I suspect some
other factors at play.

benchmark                       old ns/op     new ns/op     delta
BenchmarkCounter                10.5          10.4          -0.95%
BenchmarkDebugGCStats           1541          1506          -2.27%
BenchmarkEWMA                   79.9          88.1          +10.26%
BenchmarkGuageFloat64           64.0          63.1          -1.41%
BenchmarkGuage                  10.4          11.1          +6.73%
BenchmarkHistogram              115           127           +10.43%
BenchmarkMeter                  361           409           +13.30%
BenchmarkMetrics                204413        405262        +98.26%
BenchmarkRegistry               686           1205          +75.66%
BenchmarkRuntimeMemStats        3246          4280          +31.85%
BenchmarkCompute1000            2319          2497          +7.68%
BenchmarkCompute1000000         2386431       3085427       +29.29%
BenchmarkCopy1000               3975          4268          +7.37%
BenchmarkCopy1000000            2385647       2382330       -0.14%
BenchmarkExpDecaySample257      733           449           -38.74%
BenchmarkExpDecaySample514      787           426           -45.87%
BenchmarkExpDecaySample1028     1104          460           -58.33%
BenchmarkUniformSample257       157           120           -23.57%
BenchmarkUniformSample514       139           116           -16.55%
BenchmarkUniformSample1028      137           111           -18.98%
BenchmarkTimer                  1371          950           -30.71%

benchmark                       old allocs     new allocs     delta
BenchmarkCounter                0              0              +0.00%
BenchmarkDebugGCStats           1              0              -100.00%
BenchmarkEWMA                   0              0              +0.00%
BenchmarkGuageFloat64           0              0              +0.00%
BenchmarkGuage                  0              0              +0.00%
BenchmarkHistogram              0              0              +0.00%
BenchmarkMeter                  0              0              +0.00%
BenchmarkMetrics                260            0              -100.00%
BenchmarkRegistry               2              2              +0.00%
BenchmarkRuntimeMemStats        1              0              -100.00%
BenchmarkCompute1000            0              0              +0.00%
BenchmarkCompute1000000         0              0              +0.00%
BenchmarkCopy1000               1              1              +0.00%
BenchmarkCopy1000000            1              1              +0.00%
BenchmarkExpDecaySample257      1              0              -100.00%
BenchmarkExpDecaySample514      1              0              -100.00%
BenchmarkExpDecaySample1028     1              0              -100.00%
BenchmarkUniformSample257       0              0              +0.00%
BenchmarkUniformSample514       0              0              +0.00%
BenchmarkUniformSample1028      0              0              +0.00%
BenchmarkTimer                  1              0              -100.00%

benchmark                       old bytes     new bytes     delta
BenchmarkCounter                0             0             +0.00%
BenchmarkDebugGCStats           31            0             -100.00%
BenchmarkEWMA                   0             0             +0.00%
BenchmarkGuageFloat64           0             0             +0.00%
BenchmarkGuage                  0             0             +0.00%
BenchmarkHistogram              0             0             +0.00%
BenchmarkMeter                  0             0             +0.00%
BenchmarkMetrics                4162          1             -99.98%
BenchmarkRegistry               340           340           +0.00%
BenchmarkRuntimeMemStats        31            0             -100.00%
BenchmarkCompute1000            0             0             +0.00%
BenchmarkCompute1000000         0             0             +0.00%
BenchmarkCopy1000               8192          8192          +0.00%
BenchmarkCopy1000000            8003599       8003599       +0.00%
BenchmarkExpDecaySample257      32            0             -100.00%
BenchmarkExpDecaySample514      32            0             -100.00%
BenchmarkExpDecaySample1028     31            0             -100.00%
BenchmarkUniformSample257       0             0             +0.00%
BenchmarkUniformSample514       0             0             +0.00%
BenchmarkUniformSample1028      0             0             +0.00%
BenchmarkTimer                  31            0             -100.00%
Kamil Kisiel 11 tahun lalu
induk
melakukan
b6699f1da1
1 mengubah file dengan 68 tambahan dan 35 penghapusan
  1. 68 35
      sample.go

+ 68 - 35
sample.go

@@ -1,7 +1,6 @@
 package metrics
 
 import (
-	"container/heap"
 	"math"
 	"math/rand"
 	"sort"
@@ -41,7 +40,7 @@ type ExpDecaySample struct {
 	mutex         sync.Mutex
 	reservoirSize int
 	t0, t1        time.Time
-	values        expDecaySampleHeap
+	values        *expDecaySampleHeap
 }
 
 // NewExpDecaySample constructs a new exponentially-decaying sample with the
@@ -54,7 +53,7 @@ func NewExpDecaySample(reservoirSize int, alpha float64) Sample {
 		alpha:         alpha,
 		reservoirSize: reservoirSize,
 		t0:            time.Now(),
-		values:        make(expDecaySampleHeap, 0, reservoirSize),
+		values:        newExpDecaySampleHeap(reservoirSize),
 	}
 	s.t1 = time.Now().Add(rescaleThreshold)
 	return s
@@ -67,7 +66,7 @@ func (s *ExpDecaySample) Clear() {
 	s.count = 0
 	s.t0 = time.Now()
 	s.t1 = s.t0.Add(rescaleThreshold)
-	s.values = make(expDecaySampleHeap, 0, s.reservoirSize)
+	s.values = newExpDecaySampleHeap(s.reservoirSize)
 }
 
 // Count returns the number of samples recorded, which may exceed the
@@ -110,15 +109,16 @@ func (s *ExpDecaySample) Percentiles(ps []float64) []float64 {
 func (s *ExpDecaySample) Size() int {
 	s.mutex.Lock()
 	defer s.mutex.Unlock()
-	return len(s.values)
+	return s.values.Size()
 }
 
 // Snapshot returns a read-only copy of the sample.
 func (s *ExpDecaySample) Snapshot() Sample {
 	s.mutex.Lock()
 	defer s.mutex.Unlock()
-	values := make([]int64, len(s.values))
-	for i, v := range s.values {
+	vals := s.values.Values()
+	values := make([]int64, len(vals))
+	for i, v := range vals {
 		values[i] = v.v
 	}
 	return &SampleSnapshot{
@@ -146,8 +146,9 @@ func (s *ExpDecaySample) Update(v int64) {
 func (s *ExpDecaySample) Values() []int64 {
 	s.mutex.Lock()
 	defer s.mutex.Unlock()
-	values := make([]int64, len(s.values))
-	for i, v := range s.values {
+	vals := s.values.Values()
+	values := make([]int64, len(vals))
+	for i, v := range vals {
 		values[i] = v.v
 	}
 	return values
@@ -164,22 +165,22 @@ func (s *ExpDecaySample) update(t time.Time, v int64) {
 	s.mutex.Lock()
 	defer s.mutex.Unlock()
 	s.count++
-	if len(s.values) == s.reservoirSize {
-		heap.Pop(&s.values)
+	if s.values.Size() == s.reservoirSize {
+		s.values.Pop()
 	}
-	heap.Push(&s.values, expDecaySample{
+	s.values.Push(expDecaySample{
 		k: math.Exp(t.Sub(s.t0).Seconds()*s.alpha) / rand.Float64(),
 		v: v,
 	})
 	if t.After(s.t1) {
-		values := s.values
+		values := s.values.Values()
 		t0 := s.t0
-		s.values = make(expDecaySampleHeap, 0, s.reservoirSize)
+		s.values = newExpDecaySampleHeap(s.reservoirSize)
 		s.t0 = t
 		s.t1 = s.t0.Add(rescaleThreshold)
 		for _, v := range values {
 			v.k = v.k * math.Exp(-s.alpha*float64(s.t0.Sub(t0)))
-			heap.Push(&s.values, v)
+			s.values.Push(v)
 		}
 	}
 }
@@ -529,36 +530,68 @@ type expDecaySample struct {
 	v int64
 }
 
+func newExpDecaySampleHeap(reservoirSize int) *expDecaySampleHeap {
+	return &expDecaySampleHeap{make([]expDecaySample, 0, reservoirSize)}
+}
+
 // expDecaySampleHeap is a min-heap of expDecaySamples.
-type expDecaySampleHeap []expDecaySample
+type expDecaySampleHeap struct {
+	s []expDecaySample
+}
+
+func (h *expDecaySampleHeap) Push(s expDecaySample) {
+	n := len(h.s)
+	h.s = h.s[0 : n+1]
+	h.s[n] = s
+	h.up(n)
+}
 
-func (q expDecaySampleHeap) Len() int {
-	return len(q)
+func (h *expDecaySampleHeap) Pop() expDecaySample {
+	n := len(h.s) - 1
+	h.s[0], h.s[n] = h.s[n], h.s[0]
+	h.down(0, n)
+
+	n = len(h.s)
+	s := h.s[n-1]
+	h.s = h.s[0 : n-1]
+	return s
 }
 
-func (q expDecaySampleHeap) Less(i, j int) bool {
-	return q[i].k < q[j].k
+func (h *expDecaySampleHeap) Size() int {
+	return len(h.s)
 }
 
-func (q *expDecaySampleHeap) Pop() interface{} {
-	q_ := *q
-	n := len(q_)
-	i := q_[n-1]
-	q_ = q_[0 : n-1]
-	*q = q_
-	return i
+func (h *expDecaySampleHeap) Values() []expDecaySample {
+	return h.s
 }
 
-func (q *expDecaySampleHeap) Push(x interface{}) {
-	q_ := *q
-	n := len(q_)
-	q_ = q_[0 : n+1]
-	q_[n] = x.(expDecaySample)
-	*q = q_
+func (h *expDecaySampleHeap) up(j int) {
+	for {
+		i := (j - 1) / 2 // parent
+		if i == j || !(h.s[j].k < h.s[i].k) {
+			break
+		}
+		h.s[i], h.s[j] = h.s[j], h.s[i]
+		j = i
+	}
 }
 
-func (q expDecaySampleHeap) Swap(i, j int) {
-	q[i], q[j] = q[j], q[i]
+func (h *expDecaySampleHeap) down(i, n int) {
+	for {
+		j1 := 2*i + 1
+		if j1 >= n || j1 < 0 { // j1 < 0 after int overflow
+			break
+		}
+		j := j1 // left child
+		if j2 := j1 + 1; j2 < n && !(h.s[j1].k < h.s[j2].k) {
+			j = j2 // = 2*i + 2  // right child
+		}
+		if !(h.s[j].k < h.s[i].k) {
+			break
+		}
+		h.s[i], h.s[j] = h.s[j], h.s[i]
+		i = j
+	}
 }
 
 type int64Slice []int64