Browse Source

Faster matching

A simple trick that gives 10-20% faster encodes.

Avoids having to re-count individual bytes when match ends.

```
Before:
file	out	level	insize	outsize	millis	mb/s
enwik9	lz4	0	1000000000	489160485	6968	136.86
enwik9	lz4	1	1000000000	472338434	10406	91.65
enwik9	lz4	2	1000000000	405391862	20422	46.70

After:
file	out	level	insize	outsize	millis	mb/s
enwik9	lz4	0	1000000000	489160431	5342	178.52
enwik9	lz4	1	1000000000	472338287	9259	103.00
enwik9	lz4	2	1000000000	405391713	18551	51.41

Before:
file	out	level	insize	outsize	millis	mb/s
10gb.tar	lz4	0	10065157632	6441120237	71134	134.94
10gb.tar	lz4	1	10065157632	6369810447	117489	81.70
10gb.tar	lz4	2	10065157632	5697686196	266037	36.08

After:
file	out	level	insize	outsize	millis	mb/s
10gb.tar	lz4	0	10065157632	6441120013	65363	146.85
10gb.tar	lz4	1	10065157632	6369810471	110032	87.24
10gb.tar	lz4	2	10065157632	5697685530	244089	39.33

Before:
file	out	level	insize	outsize	millis	mb/s
adresser.json	lz4	0	7983034785	481827641	11168	681.70
adresser.json	lz4	1	7983034785	522398797	31439	242.16
adresser.json	lz4	2	7983034785	380853678	36492	208.62

After:
file	out	level	insize	outsize	millis	mb/s
adresser.json	lz4	0	7983034785	481827462	10197	746.54
adresser.json	lz4	1	7983034785	522398669	30017	253.63
adresser.json	lz4	2	7983034785	380853598	34770	218.95

benchmark                     old ns/op     new ns/op     delta
BenchmarkCompress-8           4139335       3146599       -23.98%
BenchmarkCompressHC-8         31650000      30165988      -4.69%
BenchmarkCompressPg1661-8     1230701       1209301       -1.74%
BenchmarkCompressDigits-8     1160600       1158303       -0.20%
BenchmarkCompressTwain-8      1183101       1181510       -0.13%
BenchmarkCompressRand-8       1169600       1156202       -1.15%

benchmark                     old MB/s     new MB/s     speedup
BenchmarkCompressPg1661-8     483.41       491.96       1.02x
BenchmarkCompressDigits-8     86.16        86.34        1.00x
BenchmarkCompressTwain-8      327.83       328.27       1.00x
BenchmarkCompressRand-8       14.01        14.17        1.01x

benchmark                     old bytes     new bytes     delta
BenchmarkCompress-8           0             0             +0.00%
BenchmarkCompressHC-8         0             0             +0.00%
BenchmarkCompressPg1661-8     8388683       8388683       +0.00%
BenchmarkCompressDigits-8     8388625       8388624       -0.00%
BenchmarkCompressTwain-8      8388658       8388658       +0.00%
BenchmarkCompressRand-8       8388634       8388634       +0.00%

```

There are a few bytes of difference, which I can't really explain, if your bounds are correct (which they appear to be).
Klaus Post 6 years ago
parent
commit
2d52959d56
1 changed files with 19 additions and 11 deletions
  1. 19 11
      block.go

+ 19 - 11
block.go

@@ -2,6 +2,7 @@ package lz4
 
 import (
 	"encoding/binary"
+	"math/bits"
 )
 
 // blockHash hashes 4 bytes into a value < winSize.
@@ -77,12 +78,15 @@ func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
 		si += minMatch
 		mLen := si // Match length has minMatch already.
 		// Find the longest match, first looking by batches of 8 bytes.
-		for si < sn && binary.LittleEndian.Uint64(src[si:]) == binary.LittleEndian.Uint64(src[si-offset:]) {
-			si += 8
-		}
-		// Then byte by byte.
-		for si < sn && src[si] == src[si-offset] {
-			si++
+		for si < sn {
+			x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
+			if x == 0 {
+				si += 8
+			} else {
+				// Stop is first non-zero byte.
+				si += bits.TrailingZeros64(x) >> 3
+				break
+			}
 		}
 
 		mLen = si - mLen
@@ -198,11 +202,15 @@ func CompressBlockHC(src, dst []byte, depth int) (di int, err error) {
 			}
 			ml := 0
 			// Compare the current position with a previous with the same hash.
-			for ml < sn-si && binary.LittleEndian.Uint64(src[next+ml:]) == binary.LittleEndian.Uint64(src[si+ml:]) {
-				ml += 8
-			}
-			for ml < sn-si && src[next+ml] == src[si+ml] {
-				ml++
+			for ml < sn-si {
+				x := binary.LittleEndian.Uint64(src[next+ml:]) ^ binary.LittleEndian.Uint64(src[si+ml:])
+				if x == 0 {
+					ml += 8
+				} else {
+					// Stop is first non-zero byte.
+					ml += bits.TrailingZeros64(x) >> 3
+					break
+				}
 			}
 			if ml < minMatch || ml <= mLen {
 				// Match too small (<minMath) or smaller than the current match.