Browse Source

Merge pull request #50 from klauspost/rework-fast-matching

Rework fast matching
Pierre Curto 6 years ago
parent
commit
8ef35db829
5 changed files with 81 additions and 33 deletions
  1. 2 2
      bench_test.go
  2. 63 20
      block.go
  3. 13 8
      block_test.go
  4. 3 3
      lz4.go
  5. BIN
      testdata/upperbound.data

+ 2 - 2
bench_test.go

@@ -10,7 +10,7 @@ import (
 )
 )
 
 
 func BenchmarkCompress(b *testing.B) {
 func BenchmarkCompress(b *testing.B) {
-	var hashTable [1 << 16]int
+	var hashTable [htSize]int
 	buf := make([]byte, len(pg1661))
 	buf := make([]byte, len(pg1661))
 
 
 	b.ReportAllocs()
 	b.ReportAllocs()
@@ -22,7 +22,7 @@ func BenchmarkCompress(b *testing.B) {
 }
 }
 
 
 func BenchmarkCompressRandom(b *testing.B) {
 func BenchmarkCompressRandom(b *testing.B) {
-	var hashTable [1 << 16]int
+	var hashTable [htSize]int
 	buf := make([]byte, len(randomLZ4))
 	buf := make([]byte, len(randomLZ4))
 
 
 	b.ReportAllocs()
 	b.ReportAllocs()

+ 63 - 20
block.go

@@ -2,13 +2,14 @@ package lz4
 
 
 import (
 import (
 	"encoding/binary"
 	"encoding/binary"
+	"fmt"
 	"math/bits"
 	"math/bits"
 )
 )
 
 
-// blockHash hashes 4 bytes into a value < winSize.
-func blockHash(x uint32) uint32 {
-	const hasher uint32 = 2654435761 // Knuth multiplicative hash.
-	return x * hasher >> hashShift
+// blockHash hashes the lower 6 bytes into a value < htSize.
+func blockHash(x uint64) uint32 {
+	const prime6bytes = 227718039650203
+	return uint32(((x << (64 - 48)) * prime6bytes) >> (64 - hashLog))
 }
 }
 
 
 // CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
 // CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
@@ -46,33 +47,62 @@ func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
 	// This significantly speeds up incompressible data and usually has very small impact on compresssion.
 	// This significantly speeds up incompressible data and usually has very small impact on compresssion.
 	// bytes to skip =  1 + (bytes since last match >> adaptSkipLog)
 	// bytes to skip =  1 + (bytes since last match >> adaptSkipLog)
 	const adaptSkipLog = 7
 	const adaptSkipLog = 7
-
 	sn, dn := len(src)-mfLimit, len(dst)
 	sn, dn := len(src)-mfLimit, len(dst)
 	if sn <= 0 || dn == 0 {
 	if sn <= 0 || dn == 0 {
 		return 0, nil
 		return 0, nil
 	}
 	}
-	var si int
-
-	// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
+	if len(hashTable) < htSize {
+		return 0, fmt.Errorf("hash table too small, should be at least %d in size", htSize)
+	}
+	// Prove to the compiler the table has at least htSize elements.
+	// The compiler can see that "uint32() >> hashShift" cannot be out of bounds.
+	hashTable = hashTable[:htSize]
 
 
-	anchor := si // Position of the current literals.
+	// si: Current position of the search.
+	// anchor: Position of the current literals.
+	var si, anchor int
 
 
+	// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
 	for si < sn {
 	for si < sn {
-		// Hash the next 4 bytes (sequence)...
-		match := binary.LittleEndian.Uint32(src[si:])
+		// Hash the next 6 bytes (sequence)...
+		match := binary.LittleEndian.Uint64(src[si:])
 		h := blockHash(match)
 		h := blockHash(match)
+		h2 := blockHash(match >> 8)
 
 
+		// We check a match at s, s+1 and s+2 and pick the first one we get.
+		// Checking 3 only requires us to load the source one.
 		ref := hashTable[h]
 		ref := hashTable[h]
+		ref2 := hashTable[h2]
 		hashTable[h] = si
 		hashTable[h] = si
-		if ref >= sn { // Invalid reference (dirty hashtable).
-			si += 1 + (si-anchor)>>adaptSkipLog
-			continue
-		}
+		hashTable[h2] = si + 1
 		offset := si - ref
 		offset := si - ref
+
+		// If offset <= 0 we got an old entry in the hash table.
 		if offset <= 0 || offset >= winSize || // Out of window.
 		if offset <= 0 || offset >= winSize || // Out of window.
-			match != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches.
-			si += 1 + (si-anchor)>>adaptSkipLog
-			continue
+			uint32(match) != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches.
+			// No match. Start calculating another hash.
+			// The processor can usually do this out-of-order.
+			h = blockHash(match >> 16)
+			ref = hashTable[h]
+
+			// Check the second match at si+1
+			si += 1
+			offset = si - ref2
+
+			if offset <= 0 || offset >= winSize ||
+				uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
+				// No match. Check the third match at si+2
+				si += 1
+				offset = si - ref
+				hashTable[h] = si
+
+				if offset <= 0 || offset >= winSize ||
+					uint32(match>>16) != binary.LittleEndian.Uint32(src[ref:]) {
+					// Skip one extra byte (at si+3) before we check 3 matches again.
+					si += 2 + (si-anchor)>>adaptSkipLog
+					continue
+				}
+			}
 		}
 		}
 
 
 		// Match found.
 		// Match found.
@@ -145,6 +175,13 @@ func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
 			dst[di] = byte(mLen)
 			dst[di] = byte(mLen)
 			di++
 			di++
 		}
 		}
+		// Check if we can load next values.
+		if si >= sn {
+			break
+		}
+		// Hash match end-2
+		h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
+		hashTable[h] = si - 2
 	}
 	}
 
 
 	if anchor == 0 {
 	if anchor == 0 {
@@ -176,6 +213,12 @@ func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
 	return di, nil
 	return di, nil
 }
 }
 
 
+// blockHash hashes 4 bytes into a value < winSize.
+func blockHashHC(x uint32) uint32 {
+	const hasher uint32 = 2654435761 // Knuth multiplicative hash.
+	return x * hasher >> (32 - winSizeLog)
+}
+
 // CompressBlockHC compresses the source buffer src into the destination dst
 // CompressBlockHC compresses the source buffer src into the destination dst
 // with max search depth (use 0 or negative value for no max).
 // with max search depth (use 0 or negative value for no max).
 //
 //
@@ -210,7 +253,7 @@ func CompressBlockHC(src, dst []byte, depth int) (di int, err error) {
 	for si < sn {
 	for si < sn {
 		// Hash the next 4 bytes (sequence).
 		// Hash the next 4 bytes (sequence).
 		match := binary.LittleEndian.Uint32(src[si:])
 		match := binary.LittleEndian.Uint32(src[si:])
-		h := blockHash(match)
+		h := blockHashHC(match)
 
 
 		// Follow the chain until out of window and give the longest match.
 		// Follow the chain until out of window and give the longest match.
 		mLen := 0
 		mLen := 0
@@ -262,7 +305,7 @@ func CompressBlockHC(src, dst []byte, depth int) (di int, err error) {
 		for si, ml := winStart, si+mLen; si < ml; {
 		for si, ml := winStart, si+mLen; si < ml; {
 			match >>= 8
 			match >>= 8
 			match |= uint32(src[si+3]) << 24
 			match |= uint32(src[si+3]) << 24
-			h := blockHash(match)
+			h := blockHashHC(match)
 			chainTable[si&winMask] = hashTable[h]
 			chainTable[si&winMask] = hashTable[h]
 			hashTable[h] = si
 			hashTable[h] = si
 			si++
 			si++

+ 13 - 8
block_test.go

@@ -11,8 +11,11 @@ import (
 	"github.com/pierrec/lz4"
 	"github.com/pierrec/lz4"
 )
 )
 
 
-// Hash table size.
-const htSize = 1 << 16 // 64kb
+const (
+	// Should match values in lz4.go
+	hashLog = 16
+	htSize  = 1 << hashLog
+)
 
 
 type testcase struct {
 type testcase struct {
 	file         string
 	file         string
@@ -22,11 +25,11 @@ type testcase struct {
 
 
 var rawFiles = []testcase{
 var rawFiles = []testcase{
 	// {"testdata/207326ba-36f8-11e7-954a-aca46ba8ca73.png", true, nil},
 	// {"testdata/207326ba-36f8-11e7-954a-aca46ba8ca73.png", true, nil},
-	{"testdata/e.txt", true, nil},
+	{"testdata/e.txt", false, nil},
 	{"testdata/gettysburg.txt", true, nil},
 	{"testdata/gettysburg.txt", true, nil},
 	{"testdata/Mark.Twain-Tom.Sawyer.txt", true, nil},
 	{"testdata/Mark.Twain-Tom.Sawyer.txt", true, nil},
 	{"testdata/pg1661.txt", true, nil},
 	{"testdata/pg1661.txt", true, nil},
-	{"testdata/pi.txt", true, nil},
+	{"testdata/pi.txt", false, nil},
 	{"testdata/random.data", false, nil},
 	{"testdata/random.data", false, nil},
 	{"testdata/repeat.txt", true, nil},
 	{"testdata/repeat.txt", true, nil},
 	{"testdata/pg1661.txt", true, nil},
 	{"testdata/pg1661.txt", true, nil},
@@ -125,10 +128,12 @@ func TestCompressCornerCase_CopyDstUpperBound(t *testing.T) {
 		t.Helper()
 		t.Helper()
 
 
 		// Compress the data.
 		// Compress the data.
-		zbuf := make([]byte, int(float64(len(src))*0.85))
+		// We provide a destination that is too small to trigger an out-of-bounds,
+		// which makes it return the error we want.
+		zbuf := make([]byte, int(float64(len(src))*0.40))
 		_, err := compress(src, zbuf)
 		_, err := compress(src, zbuf)
 		if err != lz4.ErrInvalidSourceShortBuffer {
 		if err != lz4.ErrInvalidSourceShortBuffer {
-			t.Fatal("err should be ErrInvalidSourceShortBuffer")
+			t.Fatal("err should be ErrInvalidSourceShortBuffer, was", err)
 		}
 		}
 	}
 	}
 
 
@@ -154,9 +159,9 @@ func TestCompressCornerCase_CopyDstUpperBound(t *testing.T) {
 }
 }
 
 
 func TestIssue23(t *testing.T) {
 func TestIssue23(t *testing.T) {
-	compressBuf := make([]byte, lz4.CompressBlockBound(htSize))
+	compressBuf := make([]byte, lz4.CompressBlockBound(1<<16))
 	for j := 1; j < 16; j++ {
 	for j := 1; j < 16; j++ {
-		var buf [htSize]byte
+		var buf [1 << 16]byte
 		var ht [htSize]int
 		var ht [htSize]int
 
 
 		for i := 0; i < len(buf); i += j {
 		for i := 0; i < len(buf); i += j {

+ 3 - 3
lz4.go

@@ -30,9 +30,9 @@ const (
 	// hashLog determines the size of the hash table used to quickly find a previous match position.
 	// hashLog determines the size of the hash table used to quickly find a previous match position.
 	// Its value influences the compression speed and memory usage, the lower the faster,
 	// Its value influences the compression speed and memory usage, the lower the faster,
 	// but at the expense of the compression ratio.
 	// but at the expense of the compression ratio.
-	// 16 seems to be the best compromise.
-	hashLog   = 16
-	hashShift = uint((minMatch * 8) - hashLog)
+	// 16 seems to be the best compromise for fast compression.
+	hashLog = 16
+	htSize  = 1 << hashLog
 
 
 	mfLimit = 8 + minMatch // The last match cannot start within the last 12 bytes.
 	mfLimit = 8 + minMatch // The last match cannot start within the last 12 bytes.
 )
 )

BIN
testdata/upperbound.data