Browse Source

trace: don't allocate all events upfront

SetMaxEvent allocates storage for all events, which is very expensive
if we want to set a high value; even with small values, the
unnecessary allocation of the initial slice is measurable.

We improve the performance by having a few events preallocated as part
of the trace, and appending as necessary. We also co-locate the flags
in event which makes it smaller (by a word).

We also add a set of benchmarks; before and after amortized cost
per-event is shown:

name                    old time/op  new time/op  delta
Trace_Default_2-32      1.19µs ± 1%  0.96µs ± 0%  -18.94%
Trace_Default_10-32      579ns ± 0%   635ns ± 1%   +9.82%
Trace_Default_100-32     463ns ± 1%   466ns ± 1%     ~
Trace_Default_1000-32    451ns ± 1%   451ns ± 0%     ~
Trace_Default_10000-32   451ns ± 2%   449ns ± 1%     ~
Trace_10_2-32           1.41µs ± 1%  0.96µs ± 1%  -31.74%
Trace_10_10-32           623ns ± 1%   634ns ± 1%   +1.73%
Trace_10_100-32          469ns ± 1%   466ns ± 1%     ~
Trace_10_1000-32         452ns ± 1%   453ns ± 2%     ~
Trace_10_10000-32        451ns ± 1%   449ns ± 1%     ~
Trace_100_2-32          2.78µs ± 2%  0.97µs ± 1%  -65.28%
Trace_100_10-32          912ns ± 1%   637ns ± 1%  -30.23%
Trace_100_100-32         479ns ± 1%   541ns ± 0%  +12.77%
Trace_100_1000-32        510ns ± 0%   541ns ± 1%   +6.08%
Trace_100_10000-32       514ns ± 1%   540ns ± 0%   +5.14%
Trace_1000_2-32         17.2µs ± 1%   1.0µs ± 1%  -94.39%
Trace_1000_10-32        3.90µs ± 1%  0.64µs ± 0%  -83.68%
Trace_1000_100-32        814ns ± 1%   542ns ± 0%  -33.46%
Trace_1000_1000-32       503ns ± 1%   581ns ± 0%  +15.56%
Trace_1000_10000-32     1.28µs ± 2%  1.03µs ± 1%  -19.68%

Change-Id: I5bbc12153a9f30dff821ef139583cb1c07a11069
Reviewed-on: https://go-review.googlesource.com/30615
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Radu Berinde 9 years ago
parent
commit
f4b625ec9b
2 changed files with 109 additions and 8 deletions
  1. 16 8
      trace/trace.go
  2. 93 0
      trace/trace_test.go

+ 16 - 8
trace/trace.go

@@ -333,7 +333,8 @@ func New(family, title string) Trace {
 	tr.ref()
 	tr.Family, tr.Title = family, title
 	tr.Start = time.Now()
-	tr.events = make([]event, 0, maxEventsPerTrace)
+	tr.maxEvents = maxEventsPerTrace
+	tr.events = tr.eventsBuf[:0]
 
 	activeMu.RLock()
 	s := activeTraces[tr.Family]
@@ -650,8 +651,8 @@ type event struct {
 	Elapsed    time.Duration // since previous event in trace
 	NewDay     bool          // whether this event is on a different day to the previous event
 	Recyclable bool          // whether this event was passed via LazyLog
-	What       interface{}   // string or fmt.Stringer
 	Sensitive  bool          // whether this event contains sensitive information
+	What       interface{}   // string or fmt.Stringer
 }
 
 // WhenString returns a string representation of the elapsed time of the event.
@@ -692,14 +693,17 @@ type trace struct {
 	IsError bool
 
 	// Append-only sequence of events (modulo discards).
-	mu     sync.RWMutex
-	events []event
+	mu        sync.RWMutex
+	events    []event
+	maxEvents int
 
 	refs     int32 // how many buckets this is in
 	recycler func(interface{})
 	disc     discarded // scratch space to avoid allocation
 
 	finishStack []byte // where finish was called, if DebugUseAfterFinish is set
+
+	eventsBuf [4]event // preallocated buffer in case we only log a few events
 }
 
 func (tr *trace) reset() {
@@ -711,11 +715,15 @@ func (tr *trace) reset() {
 	tr.traceID = 0
 	tr.spanID = 0
 	tr.IsError = false
+	tr.maxEvents = 0
 	tr.events = nil
 	tr.refs = 0
 	tr.recycler = nil
 	tr.disc = 0
 	tr.finishStack = nil
+	for i := range tr.eventsBuf {
+		tr.eventsBuf[i] = event{}
+	}
 }
 
 // delta returns the elapsed time since the last event or the trace start,
@@ -753,11 +761,11 @@ func (tr *trace) addEvent(x interface{}, recyclable, sensitive bool) {
 	e := event{When: time.Now(), What: x, Recyclable: recyclable, Sensitive: sensitive}
 	tr.mu.Lock()
 	e.Elapsed, e.NewDay = tr.delta(e.When)
-	if len(tr.events) < cap(tr.events) {
+	if len(tr.events) < tr.maxEvents {
 		tr.events = append(tr.events, e)
 	} else {
 		// Discard the middle events.
-		di := int((cap(tr.events) - 1) / 2)
+		di := int((tr.maxEvents - 1) / 2)
 		if d, ok := tr.events[di].What.(*discarded); ok {
 			(*d)++
 		} else {
@@ -777,7 +785,7 @@ func (tr *trace) addEvent(x interface{}, recyclable, sensitive bool) {
 			go tr.recycler(tr.events[di+1].What)
 		}
 		copy(tr.events[di+1:], tr.events[di+2:])
-		tr.events[cap(tr.events)-1] = e
+		tr.events[tr.maxEvents-1] = e
 	}
 	tr.mu.Unlock()
 }
@@ -803,7 +811,7 @@ func (tr *trace) SetTraceInfo(traceID, spanID uint64) {
 func (tr *trace) SetMaxEvents(m int) {
 	// Always keep at least three events: first, discarded count, last.
 	if len(tr.events) == 0 && m > 3 {
-		tr.events = make([]event, 0, m)
+		tr.maxEvents = m
 	}
 }
 

+ 93 - 0
trace/trace_test.go

@@ -69,3 +69,96 @@ func TestAuthRequest(t *testing.T) {
 		}
 	}
 }
+
+func benchmarkTrace(b *testing.B, maxEvents, numEvents int) {
+	numSpans := (b.N + numEvents + 1) / numEvents
+
+	for i := 0; i < numSpans; i++ {
+		tr := New("test", "test")
+		tr.SetMaxEvents(maxEvents)
+		for j := 0; j < numEvents; j++ {
+			tr.LazyPrintf("%d", j)
+		}
+		tr.Finish()
+	}
+}
+
+func BenchmarkTrace_Default_2(b *testing.B) {
+	benchmarkTrace(b, 0, 2)
+}
+
+func BenchmarkTrace_Default_10(b *testing.B) {
+	benchmarkTrace(b, 0, 10)
+}
+
+func BenchmarkTrace_Default_100(b *testing.B) {
+	benchmarkTrace(b, 0, 100)
+}
+
+func BenchmarkTrace_Default_1000(b *testing.B) {
+	benchmarkTrace(b, 0, 1000)
+}
+
+func BenchmarkTrace_Default_10000(b *testing.B) {
+	benchmarkTrace(b, 0, 10000)
+}
+
+func BenchmarkTrace_10_2(b *testing.B) {
+	benchmarkTrace(b, 10, 2)
+}
+
+func BenchmarkTrace_10_10(b *testing.B) {
+	benchmarkTrace(b, 10, 10)
+}
+
+func BenchmarkTrace_10_100(b *testing.B) {
+	benchmarkTrace(b, 10, 100)
+}
+
+func BenchmarkTrace_10_1000(b *testing.B) {
+	benchmarkTrace(b, 10, 1000)
+}
+
+func BenchmarkTrace_10_10000(b *testing.B) {
+	benchmarkTrace(b, 10, 10000)
+}
+
+func BenchmarkTrace_100_2(b *testing.B) {
+	benchmarkTrace(b, 100, 2)
+}
+
+func BenchmarkTrace_100_10(b *testing.B) {
+	benchmarkTrace(b, 100, 10)
+}
+
+func BenchmarkTrace_100_100(b *testing.B) {
+	benchmarkTrace(b, 100, 100)
+}
+
+func BenchmarkTrace_100_1000(b *testing.B) {
+	benchmarkTrace(b, 100, 1000)
+}
+
+func BenchmarkTrace_100_10000(b *testing.B) {
+	benchmarkTrace(b, 100, 10000)
+}
+
+func BenchmarkTrace_1000_2(b *testing.B) {
+	benchmarkTrace(b, 1000, 2)
+}
+
+func BenchmarkTrace_1000_10(b *testing.B) {
+	benchmarkTrace(b, 1000, 10)
+}
+
+func BenchmarkTrace_1000_100(b *testing.B) {
+	benchmarkTrace(b, 1000, 100)
+}
+
+func BenchmarkTrace_1000_1000(b *testing.B) {
+	benchmarkTrace(b, 1000, 1000)
+}
+
+func BenchmarkTrace_1000_10000(b *testing.B) {
+	benchmarkTrace(b, 1000, 10000)
+}