소스 검색

Specialize sample heap to reduce allocations.

Here's the benchcmp output. I'm not sure why some of the timings changed
as most of those benchmarks have nothing to do with this, I suspect some
other factors at play.

benchmark                       old ns/op     new ns/op     delta
BenchmarkCounter                10.5          10.4          -0.95%
BenchmarkDebugGCStats           1541          1506          -2.27%
BenchmarkEWMA                   79.9          88.1          +10.26%
BenchmarkGuageFloat64           64.0          63.1          -1.41%
BenchmarkGuage                  10.4          11.1          +6.73%
BenchmarkHistogram              115           127           +10.43%
BenchmarkMeter                  361           409           +13.30%
BenchmarkMetrics                204413        405262        +98.26%
BenchmarkRegistry               686           1205          +75.66%
BenchmarkRuntimeMemStats        3246          4280          +31.85%
BenchmarkCompute1000            2319          2497          +7.68%
BenchmarkCompute1000000         2386431       3085427       +29.29%
BenchmarkCopy1000               3975          4268          +7.37%
BenchmarkCopy1000000            2385647       2382330       -0.14%
BenchmarkExpDecaySample257      733           449           -38.74%
BenchmarkExpDecaySample514      787           426           -45.87%
BenchmarkExpDecaySample1028     1104          460           -58.33%
BenchmarkUniformSample257       157           120           -23.57%
BenchmarkUniformSample514       139           116           -16.55%
BenchmarkUniformSample1028      137           111           -18.98%
BenchmarkTimer                  1371          950           -30.71%

benchmark                       old allocs     new allocs     delta
BenchmarkCounter                0              0              +0.00%
BenchmarkDebugGCStats           1              0              -100.00%
BenchmarkEWMA                   0              0              +0.00%
BenchmarkGuageFloat64           0              0              +0.00%
BenchmarkGuage                  0              0              +0.00%
BenchmarkHistogram              0              0              +0.00%
BenchmarkMeter                  0              0              +0.00%
BenchmarkMetrics                260            0              -100.00%
BenchmarkRegistry               2              2              +0.00%
BenchmarkRuntimeMemStats        1              0              -100.00%
BenchmarkCompute1000            0              0              +0.00%
BenchmarkCompute1000000         0              0              +0.00%
BenchmarkCopy1000               1              1              +0.00%
BenchmarkCopy1000000            1              1              +0.00%
BenchmarkExpDecaySample257      1              0              -100.00%
BenchmarkExpDecaySample514      1              0              -100.00%
BenchmarkExpDecaySample1028     1              0              -100.00%
BenchmarkUniformSample257       0              0              +0.00%
BenchmarkUniformSample514       0              0              +0.00%
BenchmarkUniformSample1028      0              0              +0.00%
BenchmarkTimer                  1              0              -100.00%

benchmark                       old bytes     new bytes     delta
BenchmarkCounter                0             0             +0.00%
BenchmarkDebugGCStats           31            0             -100.00%
BenchmarkEWMA                   0             0             +0.00%
BenchmarkGuageFloat64           0             0             +0.00%
BenchmarkGuage                  0             0             +0.00%
BenchmarkHistogram              0             0             +0.00%
BenchmarkMeter                  0             0             +0.00%
BenchmarkMetrics                4162          1             -99.98%
BenchmarkRegistry               340           340           +0.00%
BenchmarkRuntimeMemStats        31            0             -100.00%
BenchmarkCompute1000            0             0             +0.00%
BenchmarkCompute1000000         0             0             +0.00%
BenchmarkCopy1000               8192          8192          +0.00%
BenchmarkCopy1000000            8003599       8003599       +0.00%
BenchmarkExpDecaySample257      32            0             -100.00%
BenchmarkExpDecaySample514      32            0             -100.00%
BenchmarkExpDecaySample1028     31            0             -100.00%
BenchmarkUniformSample257       0             0             +0.00%
BenchmarkUniformSample514       0             0             +0.00%
BenchmarkUniformSample1028      0             0             +0.00%
BenchmarkTimer                  31            0             -100.00%
Kamil Kisiel 11 년 전
부모
커밋
b6699f1da1
1개의 변경된 파일68개의 추가작업 그리고 35개의 파일을 삭제
  1. 68 35
      sample.go

+ 68 - 35
sample.go

@@ -1,7 +1,6 @@
 package metrics
 
 import (
-	"container/heap"
 	"math"
 	"math/rand"
 	"sort"
@@ -41,7 +40,7 @@ type ExpDecaySample struct {
 	mutex         sync.Mutex
 	reservoirSize int
 	t0, t1        time.Time
-	values        expDecaySampleHeap
+	values        *expDecaySampleHeap
 }
 
 // NewExpDecaySample constructs a new exponentially-decaying sample with the
@@ -54,7 +53,7 @@ func NewExpDecaySample(reservoirSize int, alpha float64) Sample {
 		alpha:         alpha,
 		reservoirSize: reservoirSize,
 		t0:            time.Now(),
-		values:        make(expDecaySampleHeap, 0, reservoirSize),
+		values:        newExpDecaySampleHeap(reservoirSize),
 	}
 	s.t1 = time.Now().Add(rescaleThreshold)
 	return s
@@ -67,7 +66,7 @@ func (s *ExpDecaySample) Clear() {
 	s.count = 0
 	s.t0 = time.Now()
 	s.t1 = s.t0.Add(rescaleThreshold)
-	s.values = make(expDecaySampleHeap, 0, s.reservoirSize)
+	s.values = newExpDecaySampleHeap(s.reservoirSize)
 }
 
 // Count returns the number of samples recorded, which may exceed the
@@ -110,15 +109,16 @@ func (s *ExpDecaySample) Percentiles(ps []float64) []float64 {
 func (s *ExpDecaySample) Size() int {
 	s.mutex.Lock()
 	defer s.mutex.Unlock()
-	return len(s.values)
+	return s.values.Size()
 }
 
 // Snapshot returns a read-only copy of the sample.
 func (s *ExpDecaySample) Snapshot() Sample {
 	s.mutex.Lock()
 	defer s.mutex.Unlock()
-	values := make([]int64, len(s.values))
-	for i, v := range s.values {
+	vals := s.values.Values()
+	values := make([]int64, len(vals))
+	for i, v := range vals {
 		values[i] = v.v
 	}
 	return &SampleSnapshot{
@@ -146,8 +146,9 @@ func (s *ExpDecaySample) Update(v int64) {
 func (s *ExpDecaySample) Values() []int64 {
 	s.mutex.Lock()
 	defer s.mutex.Unlock()
-	values := make([]int64, len(s.values))
-	for i, v := range s.values {
+	vals := s.values.Values()
+	values := make([]int64, len(vals))
+	for i, v := range vals {
 		values[i] = v.v
 	}
 	return values
@@ -164,22 +165,22 @@ func (s *ExpDecaySample) update(t time.Time, v int64) {
 	s.mutex.Lock()
 	defer s.mutex.Unlock()
 	s.count++
-	if len(s.values) == s.reservoirSize {
-		heap.Pop(&s.values)
+	if s.values.Size() == s.reservoirSize {
+		s.values.Pop()
 	}
-	heap.Push(&s.values, expDecaySample{
+	s.values.Push(expDecaySample{
 		k: math.Exp(t.Sub(s.t0).Seconds()*s.alpha) / rand.Float64(),
 		v: v,
 	})
 	if t.After(s.t1) {
-		values := s.values
+		values := s.values.Values()
 		t0 := s.t0
-		s.values = make(expDecaySampleHeap, 0, s.reservoirSize)
+		s.values = newExpDecaySampleHeap(s.reservoirSize)
 		s.t0 = t
 		s.t1 = s.t0.Add(rescaleThreshold)
 		for _, v := range values {
 			v.k = v.k * math.Exp(-s.alpha*float64(s.t0.Sub(t0)))
-			heap.Push(&s.values, v)
+			s.values.Push(v)
 		}
 	}
 }
@@ -529,36 +530,68 @@ type expDecaySample struct {
 	v int64
 }
 
+func newExpDecaySampleHeap(reservoirSize int) *expDecaySampleHeap {
+	return &expDecaySampleHeap{make([]expDecaySample, 0, reservoirSize)}
+}
+
 // expDecaySampleHeap is a min-heap of expDecaySamples.
-type expDecaySampleHeap []expDecaySample
+type expDecaySampleHeap struct {
+	s []expDecaySample
+}
+
+func (h *expDecaySampleHeap) Push(s expDecaySample) {
+	n := len(h.s)
+	h.s = h.s[0 : n+1]
+	h.s[n] = s
+	h.up(n)
+}
 
-func (q expDecaySampleHeap) Len() int {
-	return len(q)
+func (h *expDecaySampleHeap) Pop() expDecaySample {
+	n := len(h.s) - 1
+	h.s[0], h.s[n] = h.s[n], h.s[0]
+	h.down(0, n)
+
+	n = len(h.s)
+	s := h.s[n-1]
+	h.s = h.s[0 : n-1]
+	return s
 }
 
-func (q expDecaySampleHeap) Less(i, j int) bool {
-	return q[i].k < q[j].k
+func (h *expDecaySampleHeap) Size() int {
+	return len(h.s)
 }
 
-func (q *expDecaySampleHeap) Pop() interface{} {
-	q_ := *q
-	n := len(q_)
-	i := q_[n-1]
-	q_ = q_[0 : n-1]
-	*q = q_
-	return i
+func (h *expDecaySampleHeap) Values() []expDecaySample {
+	return h.s
 }
 
-func (q *expDecaySampleHeap) Push(x interface{}) {
-	q_ := *q
-	n := len(q_)
-	q_ = q_[0 : n+1]
-	q_[n] = x.(expDecaySample)
-	*q = q_
+func (h *expDecaySampleHeap) up(j int) {
+	for {
+		i := (j - 1) / 2 // parent
+		if i == j || !(h.s[j].k < h.s[i].k) {
+			break
+		}
+		h.s[i], h.s[j] = h.s[j], h.s[i]
+		j = i
+	}
 }
 
-func (q expDecaySampleHeap) Swap(i, j int) {
-	q[i], q[j] = q[j], q[i]
+func (h *expDecaySampleHeap) down(i, n int) {
+	for {
+		j1 := 2*i + 1
+		if j1 >= n || j1 < 0 { // j1 < 0 after int overflow
+			break
+		}
+		j := j1 // left child
+		if j2 := j1 + 1; j2 < n && !(h.s[j1].k < h.s[j2].k) {
+			j = j2 // = 2*i + 2  // right child
+		}
+		if !(h.s[j].k < h.s[i].k) {
+			break
+		}
+		h.s[i], h.s[j] = h.s[j], h.s[i]
+		i = j
+	}
 }
 
 type int64Slice []int64