Ver código fonte

lz4block: Use pointer-to-array in pool, not slice

This fixes a performance regression introduced in
fb4a2ec8e1c77e10b14dda65076589e53c37605b. Putting a slice in a sync.Pool
causes an allocation, because the assignment to interface{} needs a
single pointer but the slice header is a two-element struct.

Skipping these tiny allocation by using arrays can speed up block
compression by almost 30% for some inputs:

name              old time/op    new time/op    delta
Compress-8          3.19ms ± 4%    3.19ms ± 2%      ~     (p=0.219 n=30+30)
CompressRandom-8    23.5µs ± 1%    18.2µs ± 1%   -22.55%  (p=0.000 n=30+30)
CompressHC-8        11.4ms ± 2%    11.0ms ± 1%    -3.33%  (p=0.000 n=30+30)
CompressPg1661-8     741µs ± 4%     726µs ± 5%    -2.04%  (p=0.001 n=30+30)
CompressDigits-8     689µs ± 5%     685µs ± 5%      ~     (p=0.697 n=30+30)
CompressTwain-8      732µs ± 3%     712µs ± 5%    -2.69%  (p=0.000 n=29+30)
CompressRand-8       674µs ± 5%     671µs ± 7%      ~     (p=0.676 n=30+30)

name              old alloc/op   new alloc/op   delta
Compress-8          1.08kB ±97%   0.95kB ±100%   -12.30%  (p=0.001 n=30+30)
CompressRandom-8     39.0B ±18%     5.5B ±100%   -85.81%  (p=0.000 n=30+30)
CompressHC-8        5.10kB ± 2%    4.84kB ± 2%    -5.18%  (p=0.000 n=24+20)
CompressPg1661-8    4.26MB ± 1%    4.26MB ± 0%      ~     (p=0.644 n=30+30)
CompressDigits-8    4.26MB ± 1%    4.27MB ± 1%      ~     (p=0.293 n=30+30)
CompressTwain-8     4.26MB ± 1%    4.27MB ± 1%      ~     (p=0.947 n=30+30)
CompressRand-8      4.25MB ± 0%    4.26MB ± 1%      ~     (p=0.414 n=28+29)

name              old allocs/op  new allocs/op  delta
Compress-8            1.00 ± 0%      0.00       -100.00%  (p=0.000 n=30+30)
CompressRandom-8      1.00 ± 0%      0.00       -100.00%  (p=0.000 n=30+30)
CompressHC-8          2.00 ± 0%      0.00       -100.00%  (p=0.000 n=30+30)
CompressPg1661-8      4.00 ± 0%      4.00 ± 0%      ~     (all equal)
CompressDigits-8      4.00 ± 0%      4.00 ± 0%      ~     (all equal)
CompressTwain-8       4.00 ± 0%      4.00 ± 0%      ~     (all equal)
CompressRand-8        4.00 ± 0%      4.00 ± 0%      ~     (all equal)

name              old speed      new speed      delta
CompressRandom-8   696MB/s ± 1%   899MB/s ± 1%   +29.11%  (p=0.000 n=30+30)
CompressPg1661-8   803MB/s ± 4%   820MB/s ± 5%    +2.11%  (p=0.001 n=30+30)
CompressDigits-8   145MB/s ± 5%   146MB/s ± 5%      ~     (p=0.695 n=30+30)
CompressTwain-8    530MB/s ± 3%   545MB/s ± 5%    +2.81%  (p=0.000 n=29+30)
CompressRand-8    24.3MB/s ± 5%  24.5MB/s ± 7%      ~     (p=0.690 n=30+30)
greatroar 5 anos atrás
pai
commit
f335e39ec2
1 arquivos alterados com 14 adições e 13 exclusões
  1. 14 13
      internal/lz4block/block.go

+ 14 - 13
internal/lz4block/block.go

@@ -26,21 +26,19 @@ const (
 )
 
 // Pool of hash tables for CompressBlock.
-var HashTablePool = hashTablePool{sync.Pool{New: func() interface{} { return make([]int, htSize) }}}
+var HashTablePool = hashTablePool{sync.Pool{New: func() interface{} { return new([htSize]int) }}}
 
 type hashTablePool struct {
 	sync.Pool
 }
 
-func (p *hashTablePool) Get() []int {
-	return p.Pool.Get().([]int)
+func (p *hashTablePool) Get() *[htSize]int {
+	return p.Pool.Get().(*[htSize]int)
 }
 
 // Zero out the table to avoid non-deterministic outputs (see issue#65).
-func (p *hashTablePool) Put(t []int) {
-	for i := range t {
-		t[i] = 0
-	}
+func (p *hashTablePool) Put(t *[htSize]int) {
+	*t = [htSize]int{}
 	p.Pool.Put(t)
 }
 
@@ -90,8 +88,9 @@ func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) {
 	}
 
 	if cap(hashTable) < htSize {
-		hashTable = HashTablePool.Get()
-		defer HashTablePool.Put(hashTable)
+		poolTable := HashTablePool.Get()
+		defer HashTablePool.Put(poolTable)
+		hashTable = poolTable[:]
 	} else {
 		hashTable = hashTable[:htSize]
 	}
@@ -275,15 +274,17 @@ func CompressBlockHC(src, dst []byte, depth CompressionLevel, hashTable, chainTa
 	// hashTable: stores the last position found for a given hash
 	// chainTable: stores previous positions for a given hash
 	if cap(hashTable) < htSize {
-		hashTable = HashTablePool.Get()
-		defer HashTablePool.Put(hashTable)
+		poolTable := HashTablePool.Get()
+		defer HashTablePool.Put(poolTable)
+		hashTable = poolTable[:]
 	} else {
 		hashTable = hashTable[:htSize]
 	}
 	_ = hashTable[htSize-1]
 	if cap(chainTable) < htSize {
-		chainTable = HashTablePool.Get()
-		defer HashTablePool.Put(chainTable)
+		poolTable := HashTablePool.Get()
+		defer HashTablePool.Put(poolTable)
+		chainTable = poolTable[:]
 	} else {
 		chainTable = chainTable[:htSize]
 	}