Browse Source

Merge pull request #47 from klauspost/skip-incompressible-data

Skip in-compressible data (32x) faster
Pierre Curto 6 years ago
parent
commit
85f050b9aa
2 changed files with 26 additions and 3 deletions
  1. 13 0
      bench_test.go
  2. 13 3
      block.go

+ 13 - 0
bench_test.go

@@ -21,6 +21,19 @@ func BenchmarkCompress(b *testing.B) {
 	}
 }
 
+func BenchmarkCompressRandom(b *testing.B) {
+	var hashTable [1 << 16]int
+	buf := make([]byte, len(randomLZ4))
+
+	b.ReportAllocs()
+	b.SetBytes(int64(len(random)))
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		lz4.CompressBlock(random, buf, hashTable[:])
+	}
+}
+
 func BenchmarkCompressHC(b *testing.B) {
 	buf := make([]byte, len(pg1661))
 

+ 13 - 3
block.go

@@ -42,6 +42,11 @@ func UncompressBlock(src, dst []byte) (int, error) {
 func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
 	defer recoverBlock(&err)
 
+	// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
+	// This significantly speeds up incompressible data and usually has very small impact on compresssion.
+	// bytes to skip =  1 + (bytes since last match >> adaptSkipLog)
+	const adaptSkipLog = 7
+
 	sn, dn := len(src)-mfLimit, len(dst)
 	if sn <= 0 || dn == 0 {
 		return 0, nil
@@ -60,13 +65,13 @@ func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
 		ref := hashTable[h]
 		hashTable[h] = si
 		if ref >= sn { // Invalid reference (dirty hashtable).
-			si++
+			si += 1 + (si-anchor)>>adaptSkipLog
 			continue
 		}
 		offset := si - ref
 		if offset <= 0 || offset >= winSize || // Out of window.
 			match != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches.
-			si++
+			si += 1 + (si-anchor)>>adaptSkipLog
 			continue
 		}
 
@@ -171,6 +176,11 @@ func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
 func CompressBlockHC(src, dst []byte, depth int) (di int, err error) {
 	defer recoverBlock(&err)
 
+	// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
+	// This significantly speeds up incompressible data and usually has very small impact on compresssion.
+	// bytes to skip =  1 + (bytes since last match >> adaptSkipLog)
+	const adaptSkipLog = 7
+
 	sn, dn := len(src)-mfLimit, len(dst)
 	if sn <= 0 || dn == 0 {
 		return 0, nil
@@ -227,7 +237,7 @@ func CompressBlockHC(src, dst []byte, depth int) (di int, err error) {
 
 		// No match found.
 		if mLen == 0 {
-			si++
+			si += 1 + (si-anchor)>>adaptSkipLog
 			continue
 		}