Browse Source

Match backwards on matches

Do backwards matching into the queued literals.

This seems to gain about 1.5% of compression I will add benchmarks later.

This could potentially make up for a smaller hash tables (15, maybe even 14 bits).

I will add that as a separate PR since the window size and hash table size is kinda mixed up.
Klaus Post 6 years ago
parent
commit
d307069a28
2 changed files with 27 additions and 7 deletions
  1. 16 5
      block.go
  2. 11 2
      errors.go

+ 16 - 5
block.go

@@ -76,13 +76,24 @@ func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
 		}
 		}
 
 
 		// Match found.
 		// Match found.
-		// acc = accInit
 		lLen := si - anchor // Literal length.
 		lLen := si - anchor // Literal length.
+		// We already matched 4 bytes.
+		mLen := 4
+
+		// Extend backwards if we can, reducing literals.
+		tOff := si - offset - 1
+		for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] {
+			si--
+			tOff--
+			lLen--
+			mLen++
+		}
+
+		// Add the match length, so we continue search at the end.
+		// Use mLen to store the offset base.
+		si, mLen = si+mLen, si+minMatch
 
 
-		// Encode match length part 1.
-		si += minMatch
-		mLen := si // Match length has minMatch already.
-		// Find the longest match, first looking by batches of 8 bytes.
+		// Find the longest match by looking by batches of 8 bytes.
 		for si < sn {
 		for si < sn {
 			x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
 			x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
 			if x == 0 {
 			if x == 0 {

+ 11 - 2
errors.go

@@ -1,6 +1,11 @@
 package lz4
 package lz4
 
 
-import "errors"
+import (
+	"errors"
+	"fmt"
+	"os"
+	rdebug "runtime/debug"
+)
 
 
 var (
 var (
 	// ErrInvalidSourceShortBuffer is returned by UncompressBlock or CompressBLock when a compressed
 	// ErrInvalidSourceShortBuffer is returned by UncompressBlock or CompressBLock when a compressed
@@ -13,7 +18,11 @@ var (
 )
 )
 
 
 func recoverBlock(e *error) {
 func recoverBlock(e *error) {
-	if recover() != nil && *e == nil {
+	if r := recover(); r != nil && *e == nil {
+		if debugFlag {
+			fmt.Fprintln(os.Stderr, r)
+			rdebug.PrintStack()
+		}
 		*e = ErrInvalidSourceShortBuffer
 		*e = ErrInvalidSourceShortBuffer
 	}
 	}
 }
 }