6 years ago · a207029655
--- a/bench_test.go
+++ b/bench_test.go
@@ -10,7 +10,7 @@ import (
 
				 )
			
 
				 
			
 
				 func BenchmarkCompress(b *testing.B) {
			
 
				-	var hashTable [1 << 16]int
			
 
				+	var hashTable [htSize]int
			
 
				 	buf := make([]byte, len(pg1661))
			
 
				 
			
 
				 	b.ReportAllocs()
			
@@ -22,7 +22,7 @@ func BenchmarkCompress(b *testing.B) {
 
				 }
			
 
				 
			
 
				 func BenchmarkCompressRandom(b *testing.B) {
			
 
				-	var hashTable [1 << 16]int
			
 
				+	var hashTable [htSize]int
			
 
				 	buf := make([]byte, len(randomLZ4))
			
 
				 
			
 
				 	b.ReportAllocs()
			
--- a/block.go
+++ b/block.go
@@ -2,13 +2,14 @@ package lz4
 
				 
			
 
				 import (
			
 
				 	"encoding/binary"
			
 
				+	"fmt"
			
 
				 	"math/bits"
			
 
				 )
			
 
				 
			
 
				-// blockHash hashes 4 bytes into a value < winSize.
			
 
				-func blockHash(x uint32) uint32 {
			
 
				-	const hasher uint32 = 2654435761 // Knuth multiplicative hash.
			
 
				-	return x * hasher >> hashShift
			
 
				+// blockHash hashes the lower 6 bytes into a value < htSize.
			
 
				+func blockHash(x uint64) uint32 {
			
 
				+	const prime6bytes = 227718039650203
			
 
				+	return uint32(((x << (64 - 48)) * prime6bytes) >> (64 - hashLog))
			
 
				 }
			
 
				 
			
 
				 // CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
			
@@ -46,33 +47,62 @@ func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
 
				 	// This significantly speeds up incompressible data and usually has very small impact on compresssion.
			
 
				 	// bytes to skip =  1 + (bytes since last match >> adaptSkipLog)
			
 
				 	const adaptSkipLog = 7
			
 
				-
			
 
				 	sn, dn := len(src)-mfLimit, len(dst)
			
 
				 	if sn <= 0 || dn == 0 {
			
 
				 		return 0, nil
			
 
				 	}
			
 
				-	var si int
			
 
				-
			
 
				-	// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
			
 
				+	if len(hashTable) < htSize {
			
 
				+		return 0, fmt.Errorf("hash table too small, should be at least %d in size", htSize)
			
 
				+	}
			
 
				+	// Prove to the compiler the table has at least htSize elements.
			
 
				+	// The compiler can see that "uint32() >> hashShift" cannot be out of bounds.
			
 
				+	hashTable = hashTable[:htSize]
			
 
				 
			
 
				-	anchor := si // Position of the current literals.
			
 
				+	// si: Current position of the search.
			
 
				+	// anchor: Position of the current literals.
			
 
				+	var si, anchor int
			
 
				 
			
 
				+	// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
			
 
				 	for si < sn {
			
 
				-		// Hash the next 4 bytes (sequence)...
			
 
				-		match := binary.LittleEndian.Uint32(src[si:])
			
 
				+		// Hash the next 6 bytes (sequence)...
			
 
				+		match := binary.LittleEndian.Uint64(src[si:])
			
 
				 		h := blockHash(match)
			
 
				+		h2 := blockHash(match >> 8)
			
 
				 
			
 
				+		// We check a match at s, s+1 and s+2 and pick the first one we get.
			
 
				+		// Checking 3 only requires us to load the source one.
			
 
				 		ref := hashTable[h]
			
 
				+		ref2 := hashTable[h2]
			
 
				 		hashTable[h] = si
			
 
				-		if ref >= sn { // Invalid reference (dirty hashtable).
			
 
				-			si += 1 + (si-anchor)>>adaptSkipLog
			
 
				-			continue
			
 
				-		}
			
 
				+		hashTable[h2] = si + 1
			
 
				 		offset := si - ref
			
 
				+
			
 
				+		// If offset <= 0 we got an old entry in the hash table.
			
 
				 		if offset <= 0 || offset >= winSize || // Out of window.
			
 
				-			match != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches.
			
 
				-			si += 1 + (si-anchor)>>adaptSkipLog
			
 
				-			continue
			
 
				+			uint32(match) != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches.
			
 
				+			// No match. Start calculating another hash.
			
 
				+			// The processor can usually do this out-of-order.
			
 
				+			h = blockHash(match >> 16)
			
 
				+			ref = hashTable[h]
			
 
				+
			
 
				+			// Check the second match at si+1
			
 
				+			si += 1
			
 
				+			offset = si - ref2
			
 
				+
			
 
				+			if offset <= 0 || offset >= winSize ||
			
 
				+				uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
			
 
				+				// No match. Check the third match at si+2
			
 
				+				si += 1
			
 
				+				offset = si - ref
			
 
				+				hashTable[h] = si
			
 
				+
			
 
				+				if offset <= 0 || offset >= winSize ||
			
 
				+					uint32(match>>16) != binary.LittleEndian.Uint32(src[ref:]) {
			
 
				+					// Skip one extra byte (at si+3) before we check 3 matches again.
			
 
				+					si += 2 + (si-anchor)>>adaptSkipLog
			
 
				+					continue
			
 
				+				}
			
 
				+			}
			
 
				 		}
			
 
				 
			
 
				 		// Match found.
			
@@ -134,6 +164,13 @@ func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
 
				 			dst[di] = byte(mLen)
			
 
				 			di++
			
 
				 		}
			
 
				+		// Check if we can load next values.
			
 
				+		if si >= sn {
			
 
				+			break
			
 
				+		}
			
 
				+		// Hash match end-2
			
 
				+		h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
			
 
				+		hashTable[h] = si - 2
			
 
				 	}
			
 
				 
			
 
				 	if anchor == 0 {
			
@@ -165,6 +202,12 @@ func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
 
				 	return di, nil
			
 
				 }
			
 
				 
			
 
				+// blockHash hashes 4 bytes into a value < winSize.
			
 
				+func blockHashHC(x uint32) uint32 {
			
 
				+	const hasher uint32 = 2654435761 // Knuth multiplicative hash.
			
 
				+	return x * hasher >> (32 - winSizeLog)
			
 
				+}
			
 
				+
			
 
				 // CompressBlockHC compresses the source buffer src into the destination dst
			
 
				 // with max search depth (use 0 or negative value for no max).
			
 
				 //
			
@@ -199,7 +242,7 @@ func CompressBlockHC(src, dst []byte, depth int) (di int, err error) {
 
				 	for si < sn {
			
 
				 		// Hash the next 4 bytes (sequence).
			
 
				 		match := binary.LittleEndian.Uint32(src[si:])
			
 
				-		h := blockHash(match)
			
 
				+		h := blockHashHC(match)
			
 
				 
			
 
				 		// Follow the chain until out of window and give the longest match.
			
 
				 		mLen := 0
			
@@ -251,7 +294,7 @@ func CompressBlockHC(src, dst []byte, depth int) (di int, err error) {
 
				 		for si, ml := winStart, si+mLen; si < ml; {
			
 
				 			match >>= 8
			
 
				 			match |= uint32(src[si+3]) << 24
			
 
				-			h := blockHash(match)
			
 
				+			h := blockHashHC(match)
			
 
				 			chainTable[si&winMask] = hashTable[h]
			
 
				 			hashTable[h] = si
			
 
				 			si++
			
--- a/block_test.go
+++ b/block_test.go
@@ -11,8 +11,11 @@ import (
 
				 	"github.com/pierrec/lz4"
			
 
				 )
			
 
				 
			
 
				-// Hash table size.
			
 
				-const htSize = 1 << 16 // 64kb
			
 
				+const (
			
 
				+	// Should match values in lz4.go
			
 
				+	hashLog = 16
			
 
				+	htSize  = 1 << hashLog
			
 
				+)
			
 
				 
			
 
				 type testcase struct {
			
 
				 	file         string
			
@@ -22,11 +25,11 @@ type testcase struct {
 
				 
			
 
				 var rawFiles = []testcase{
			
 
				 	// {"testdata/207326ba-36f8-11e7-954a-aca46ba8ca73.png", true, nil},
			
 
				-	{"testdata/e.txt", true, nil},
			
 
				+	{"testdata/e.txt", false, nil},
			
 
				 	{"testdata/gettysburg.txt", true, nil},
			
 
				 	{"testdata/Mark.Twain-Tom.Sawyer.txt", true, nil},
			
 
				 	{"testdata/pg1661.txt", true, nil},
			
 
				-	{"testdata/pi.txt", true, nil},
			
 
				+	{"testdata/pi.txt", false, nil},
			
 
				 	{"testdata/random.data", false, nil},
			
 
				 	{"testdata/repeat.txt", true, nil},
			
 
				 	{"testdata/pg1661.txt", true, nil},
			
@@ -125,10 +128,12 @@ func TestCompressCornerCase_CopyDstUpperBound(t *testing.T) {
 
				 		t.Helper()
			
 
				 
			
 
				 		// Compress the data.
			
 
				-		zbuf := make([]byte, int(float64(len(src))*0.85))
			
 
				+		// We provide a destination that is too small to trigger an out-of-bounds,
			
 
				+		// which makes it return the error we want.
			
 
				+		zbuf := make([]byte, int(float64(len(src))*0.40))
			
 
				 		_, err := compress(src, zbuf)
			
 
				 		if err != lz4.ErrInvalidSourceShortBuffer {
			
 
				-			t.Fatal("err should be ErrInvalidSourceShortBuffer")
			
 
				+			t.Fatal("err should be ErrInvalidSourceShortBuffer, was", err)
			
 
				 		}
			
 
				 	}
			
 
				 
			
@@ -154,9 +159,9 @@ func TestCompressCornerCase_CopyDstUpperBound(t *testing.T) {
 
				 }
			
 
				 
			
 
				 func TestIssue23(t *testing.T) {
			
 
				-	compressBuf := make([]byte, lz4.CompressBlockBound(htSize))
			
 
				+	compressBuf := make([]byte, lz4.CompressBlockBound(1<<16))
			
 
				 	for j := 1; j < 16; j++ {
			
 
				-		var buf [htSize]byte
			
 
				+		var buf [1 << 16]byte
			
 
				 		var ht [htSize]int
			
 
				 
			
 
				 		for i := 0; i < len(buf); i += j {
			
--- a/lz4.go
+++ b/lz4.go
@@ -30,9 +30,9 @@ const (
 
				 	// hashLog determines the size of the hash table used to quickly find a previous match position.
			
 
				 	// Its value influences the compression speed and memory usage, the lower the faster,
			
 
				 	// but at the expense of the compression ratio.
			
 
				-	// 16 seems to be the best compromise.
			
 
				-	hashLog   = 16
			
 
				-	hashShift = uint((minMatch * 8) - hashLog)
			
 
				+	// 16 seems to be the best compromise for fast compression.
			
 
				+	hashLog = 16
			
 
				+	htSize  = 1 << hashLog
			
 
				 
			
 
				 	mfLimit = 8 + minMatch // The last match cannot start within the last 12 bytes.
			
 
				 )
			
--- a/testdata/upperbound.data
+++ b/testdata/upperbound.data