9 jaren geleden · 8939696c22
--- a/encode.go
+++ b/encode.go
@@ -14,11 +14,17 @@ import (
 
				 // code.
			
 
				 const maxOffset = 1 << 15
			
 
				 
			
 
				-func load32(b []byte, i int32) uint32 {
			
 
				+func load32(b []byte, i int) uint32 {
			
 
				 	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
			
 
				 	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
			
 
				 }
			
 
				 
			
 
				+func load64(b []byte, i int) uint64 {
			
 
				+	b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
			
 
				+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
			
 
				+		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
			
 
				+}
			
 
				+
			
 
				 // emitLiteral writes a literal chunk and returns the number of bytes written.
			
 
				 func emitLiteral(dst, lit []byte) int {
			
 
				 	i, n := 0, uint(len(lit)-1)
			
@@ -58,7 +64,7 @@ func emitLiteral(dst, lit []byte) int {
 
				 }
			
 
				 
			
 
				 // emitCopy writes a copy chunk and returns the number of bytes written.
			
 
				-func emitCopy(dst []byte, offset, length int32) int {
			
 
				+func emitCopy(dst []byte, offset, length int) int {
			
 
				 	i := 0
			
 
				 	// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
			
 
				 	// threshold for this loop is a little higher (at 68 = 64 + 4), and the
			
@@ -138,8 +144,6 @@ func Encode(dst, src []byte) []byte {
 
				 // that we don't overrun the dst and src buffers.
			
 
				 //
			
 
				 // TODO: implement this fast path.
			
 
				-//
			
 
				-// TODO: actually use inputMargin inside encodeBlock.
			
 
				 const inputMargin = 16 - 1
			
 
				 
			
 
				 // minBlockSize is the minimum size of the input to encodeBlock. As above, we
			
@@ -149,6 +153,10 @@ const inputMargin = 16 - 1
 
				 // TODO: can we make this bound a little tighter, raising it by 1 or 2?
			
 
				 const minBlockSize = inputMargin
			
 
				 
			
 
				+func hash(u, shift uint32) uint32 {
			
 
				+	return (u * 0x1e35a7bd) >> shift
			
 
				+}
			
 
				+
			
 
				 // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
			
 
				 // assumes that the varint-encoded length of the decompressed bytes has already
			
 
				 // been written.
			
@@ -159,19 +167,27 @@ const minBlockSize = inputMargin
 
				 func encodeBlock(dst, src []byte) (d int) {
			
 
				 	// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
			
 
				 	const maxTableSize = 1 << 14
			
 
				-	shift, tableSize := uint(32-8), 1<<8
			
 
				+	shift, tableSize := uint32(32-8), 1<<8
			
 
				 	for tableSize < maxTableSize && tableSize < len(src) {
			
 
				 		shift--
			
 
				 		tableSize *= 2
			
 
				 	}
			
 
				 	var table [maxTableSize]int32
			
 
				 
			
 
				-	// Iterate over the source bytes.
			
 
				-	var (
			
 
				-		s   int32 // The iterator position.
			
 
				-		t   int32 // The last position with the same hash as s.
			
 
				-		lit int32 // The start position of any pending literal bytes.
			
 
				+	// sLimit is when to stop looking for offset/length copies. The inputMargin
			
 
				+	// lets us use a fast path for emitLiteral in the main loop, while we are
			
 
				+	// looking for copies.
			
 
				+	sLimit := len(src) - inputMargin
			
 
				 
			
 
				+	// nextEmit is where in src the next emitLiteral should start from.
			
 
				+	nextEmit := 0
			
 
				+
			
 
				+	// The encoded form must start with a literal, as there are no previous
			
 
				+	// bytes to copy, so we start looking for hash matches at s == 1.
			
 
				+	s := 1
			
 
				+	nextHash := hash(load32(src, s), shift)
			
 
				+
			
 
				+	for {
			
 
				 		// Copied from the C++ snappy implementation:
			
 
				 		//
			
 
				 		// Heuristic match skipping: If 32 bytes are scanned with no matches
			
@@ -186,43 +202,74 @@ func encodeBlock(dst, src []byte) (d int) {
 
				 		// The "skip" variable keeps track of how many bytes there are since
			
 
				 		// the last match; dividing it by 32 (ie. right-shifting by five) gives
			
 
				 		// the number of bytes to move ahead for each iteration.
			
 
				-		skip uint32 = 32
			
 
				-	)
			
 
				-	for uint32(s+3) < uint32(len(src)) { // The uint32 conversions catch overflow from the +3.
			
 
				-		// Update the hash table.
			
 
				-		h := load32(src, s)
			
 
				-		p := &table[(h*0x1e35a7bd)>>shift]
			
 
				-		// We need to to store values in [-1, inf) in table. To save
			
 
				-		// some initialization time, (re)use the table's zero value
			
 
				-		// and shift the values against this zero: add 1 on writes,
			
 
				-		// subtract 1 on reads.
			
 
				-		t, *p = *p-1, s+1
			
 
				-		// If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte.
			
 
				-		if t < 0 || s-t >= maxOffset || h != load32(src, t) {
			
 
				-			s += int32(skip >> 5)
			
 
				+		skip := 32
			
 
				+
			
 
				+		nextS := s
			
 
				+		candidate := 0
			
 
				+		for {
			
 
				+			s = nextS
			
 
				+			nextS = s + skip>>5
			
 
				 			skip++
			
 
				-			continue
			
 
				-		}
			
 
				-		skip = 32
			
 
				-		// Otherwise, we have a match. First, emit any pending literal bytes.
			
 
				-		if lit != s {
			
 
				-			d += emitLiteral(dst[d:], src[lit:s])
			
 
				+			if nextS > sLimit {
			
 
				+				goto emitRemainder
			
 
				+			}
			
 
				+			candidate = int(table[nextHash])
			
 
				+			table[nextHash] = int32(s)
			
 
				+			nextHash = hash(load32(src, nextS), shift)
			
 
				+			if load32(src, s) == load32(src, candidate) {
			
 
				+				break
			
 
				+			}
			
 
				 		}
			
 
				-		// Extend the match to be as long as possible.
			
 
				-		s0 := s
			
 
				-		s, t = s+4, t+4
			
 
				-		for int(s) < len(src) && src[s] == src[t] {
			
 
				-			s++
			
 
				-			t++
			
 
				+
			
 
				+		// A 4-byte match has been found. We'll later see if more than 4 bytes
			
 
				+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
			
 
				+		// them as literal bytes.
			
 
				+		d += emitLiteral(dst[d:], src[nextEmit:s])
			
 
				+
			
 
				+		// Call emitCopy, and then see if another emitCopy could be our next
			
 
				+		// move. Repeat until we find no match for the input immediately after
			
 
				+		// what was consumed by the last emitCopy call.
			
 
				+		//
			
 
				+		// If we exit this loop normally then we need to call emitLiteral next,
			
 
				+		// though we don't yet know how big the literal will be. We handle that
			
 
				+		// by proceeding to the next iteration of the main loop. We also can
			
 
				+		// exit this loop via goto if we get close to exhausting the input.
			
 
				+		for {
			
 
				+			// Invariant: we have a 4-byte match at s, and no need to emit any
			
 
				+			// literal bytes prior to s.
			
 
				+			base := s
			
 
				+			s += 4
			
 
				+			for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 {
			
 
				+			}
			
 
				+			d += emitCopy(dst[d:], base-candidate, s-base)
			
 
				+			nextEmit = s
			
 
				+			if s >= sLimit {
			
 
				+				goto emitRemainder
			
 
				+			}
			
 
				+
			
 
				+			// We could immediately start working at s now, but to improve
			
 
				+			// compression we first update the hash table at s-1 and at s. If
			
 
				+			// another emitCopy is not our next move, also calculate nextHash
			
 
				+			// at s+1. At least on GOARCH=amd64, these three hash calculations
			
 
				+			// are faster as one load64 call (with some shifts) instead of
			
 
				+			// three load32 calls.
			
 
				+			x := load64(src, s-1)
			
 
				+			prevHash := hash(uint32(x>>0), shift)
			
 
				+			table[prevHash] = int32(s - 1)
			
 
				+			currHash := hash(uint32(x>>8), shift)
			
 
				+			candidate = int(table[currHash])
			
 
				+			table[currHash] = int32(s)
			
 
				+			if uint32(x>>8) != load32(src, candidate) {
			
 
				+				nextHash = hash(uint32(x>>16), shift)
			
 
				+				s++
			
 
				+				break
			
 
				+			}
			
 
				 		}
			
 
				-		// Emit the copied bytes.
			
 
				-		d += emitCopy(dst[d:], s-t, s-s0)
			
 
				-		lit = s
			
 
				 	}
			
 
				 
			
 
				-	// Emit any final pending literal bytes and return.
			
 
				-	if int(lit) != len(src) {
			
 
				-		d += emitLiteral(dst[d:], src[lit:])
			
 
				+emitRemainder:
			
 
				+	if nextEmit < len(src) {
			
 
				+		d += emitLiteral(dst[d:], src[nextEmit:])
			
 
				 	}
			
 
				 	return d
			
 
				 }
			
--- a/snappy_test.go
+++ b/snappy_test.go
@@ -14,13 +14,31 @@ import (
 
				 	"math/rand"
			
 
				 	"net/http"
			
 
				 	"os"
			
 
				+	"os/exec"
			
 
				 	"path/filepath"
			
 
				+	"runtime"
			
 
				 	"strings"
			
 
				 	"testing"
			
 
				 )
			
 
				 
			
 
				 var download = flag.Bool("download", false, "If true, download any missing files before running benchmarks")
			
 
				 
			
 
				+// goEncoderShouldMatchCppEncoder is whether to test that the algorithm used by
			
 
				+// Go's encoder matches byte-for-byte what the C++ snappy encoder produces.
			
 
				+// There is more than one valid encoding of any given input, and there is more
			
 
				+// than one good algorithm along the frontier of trading off throughput for
			
 
				+// output size. Nonetheless, we presume that the C++ encoder's algorithm is a
			
 
				+// good one and has been tested on a wide range of inputs, so matching that
			
 
				+// exactly should mean that the Go encoder's algorithm is also good, without
			
 
				+// needing to gather our own corpus of test data.
			
 
				+//
			
 
				+// The exact algorithm used, though, is endianness-dependent, as it puns a
			
 
				+// byte-pointer to a uint32-pointer to load and compare 4 bytes at a time. For
			
 
				+// example, the "testdata/pi.txt.rawsnappy" file was generated by C++ code on a
			
 
				+// little-endian system. The runtime package doesn't export endianness per se,
			
 
				+// but we can restrict this match-C++ test to common little-endian systems.
			
 
				+const goEncoderShouldMatchCppEncoder = runtime.GOARCH == "386" || runtime.GOARCH == "amd64"
			
 
				+
			
 
				 func TestMaxEncodedLenOfMaxBlockSize(t *testing.T) {
			
 
				 	got := maxEncodedLenOfMaxBlockSize
			
 
				 	want := MaxEncodedLen(maxBlockSize)
			
@@ -450,6 +468,57 @@ func TestDecodeGoldenInput(t *testing.T) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+func TestEncodeGoldenInput(t *testing.T) {
			
 
				+	if !goEncoderShouldMatchCppEncoder {
			
 
				+		t.Skipf("skipping testing that the encoding is byte-for-byte identical to C++: GOARCH=%s", runtime.GOARCH)
			
 
				+	}
			
 
				+	src, err := ioutil.ReadFile("testdata/pi.txt")
			
 
				+	if err != nil {
			
 
				+		t.Fatalf("ReadFile: %v", err)
			
 
				+	}
			
 
				+	got := Encode(nil, src)
			
 
				+	want, err := ioutil.ReadFile("testdata/pi.txt.rawsnappy")
			
 
				+	if err != nil {
			
 
				+		t.Fatalf("ReadFile: %v", err)
			
 
				+	}
			
 
				+	if err := cmp(got, want); err != nil {
			
 
				+		t.Fatal(err)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func TestSameEncodingAsCpp(t *testing.T) {
			
 
				+	if !goEncoderShouldMatchCppEncoder {
			
 
				+		t.Skipf("skipping testing that the encoding is byte-for-byte identical to C++: GOARCH=%s", runtime.GOARCH)
			
 
				+	}
			
 
				+	const cmdName = "cmd/snappytool/snappytool"
			
 
				+	_, err := os.Stat(cmdName)
			
 
				+	if err != nil {
			
 
				+		t.Skipf("could not find snappytool: %v", err)
			
 
				+	}
			
 
				+	for i, tf := range testFiles {
			
 
				+		if err := downloadBenchmarkFiles(t, tf.filename); err != nil {
			
 
				+			t.Fatalf("failed to download testdata: %s", err)
			
 
				+		}
			
 
				+		data := readFile(t, filepath.Join(benchDir, tf.filename))
			
 
				+		if n := tf.sizeLimit; 0 < n && n < len(data) {
			
 
				+			data = data[:n]
			
 
				+		}
			
 
				+
			
 
				+		got := Encode(nil, data)
			
 
				+
			
 
				+		cmd := exec.Command(cmdName, "-e")
			
 
				+		cmd.Stdin = bytes.NewReader(data)
			
 
				+		want, err := cmd.Output()
			
 
				+		if err != nil {
			
 
				+			t.Fatalf("could not run snappytool: %v", err)
			
 
				+		}
			
 
				+
			
 
				+		if err := cmp(got, want); err != nil {
			
 
				+			t.Errorf("i=%d: %v", i, err)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 // TestSlowForwardCopyOverrun tests the "expand the pattern" algorithm
			
 
				 // described in decode_amd64.s and its claim of a 10 byte overrun worst case.
			
 
				 func TestSlowForwardCopyOverrun(t *testing.T) {
			
@@ -822,10 +891,17 @@ func benchEncode(b *testing.B, src []byte) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+func testOrBenchmark(b testing.TB) string {
			
 
				+	if _, ok := b.(*testing.B); ok {
			
 
				+		return "benchmark"
			
 
				+	}
			
 
				+	return "test"
			
 
				+}
			
 
				+
			
 
				 func readFile(b testing.TB, filename string) []byte {
			
 
				 	src, err := ioutil.ReadFile(filename)
			
 
				 	if err != nil {
			
 
				-		b.Skipf("skipping benchmark: %v", err)
			
 
				+		b.Skipf("skipping %s: %v", testOrBenchmark(b), err)
			
 
				 	}
			
 
				 	if len(src) == 0 {
			
 
				 		b.Fatalf("%s has zero length", filename)
			
@@ -906,14 +982,14 @@ const (
 
				 	benchDir = "testdata/bench"
			
 
				 )
			
 
				 
			
 
				-func downloadBenchmarkFiles(b *testing.B, basename string) (errRet error) {
			
 
				+func downloadBenchmarkFiles(b testing.TB, basename string) (errRet error) {
			
 
				 	filename := filepath.Join(benchDir, basename)
			
 
				 	if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 {
			
 
				 		return nil
			
 
				 	}
			
 
				 
			
 
				 	if !*download {
			
 
				-		b.Skipf("test data not found; skipping benchmark without the -download flag")
			
 
				+		b.Skipf("test data not found; skipping %s without the -download flag", testOrBenchmark(b))
			
 
				 	}
			
 
				 	// Download the official snappy C++ implementation reference test data
			
 
				 	// files for benchmarking.
			
@@ -947,12 +1023,12 @@ func downloadBenchmarkFiles(b *testing.B, basename string) (errRet error) {
 
				 	return nil
			
 
				 }
			
 
				 
			
 
				-func benchFile(b *testing.B, n int, decode bool) {
			
 
				-	if err := downloadBenchmarkFiles(b, testFiles[n].filename); err != nil {
			
 
				+func benchFile(b *testing.B, i int, decode bool) {
			
 
				+	if err := downloadBenchmarkFiles(b, testFiles[i].filename); err != nil {
			
 
				 		b.Fatalf("failed to download testdata: %s", err)
			
 
				 	}
			
 
				-	data := readFile(b, filepath.Join(benchDir, testFiles[n].filename))
			
 
				-	if n := testFiles[n].sizeLimit; 0 < n && n < len(data) {
			
 
				+	data := readFile(b, filepath.Join(benchDir, testFiles[i].filename))
			
 
				+	if n := testFiles[i].sizeLimit; 0 < n && n < len(data) {
			
 
				 		data = data[:n]
			
 
				 	}
			
 
				 	if decode {