9 năm trước cách đây · d1d908a252
--- a/encode.go
+++ b/encode.go
@@ -137,6 +137,22 @@ func encode(dst, src []byte) (d int) {
 
				 		s   int32 // The iterator position.
			
 
				 		t   int32 // The last position with the same hash as s.
			
 
				 		lit int32 // The start position of any pending literal bytes.
			
 
				+
			
 
				+		// Copied from the C++ snappy implementation:
			
 
				+		//
			
 
				+		// Heuristic match skipping: If 32 bytes are scanned with no matches
			
 
				+		// found, start looking only at every other byte. If 32 more bytes are
			
 
				+		// scanned, look at every third byte, etc.. When a match is found,
			
 
				+		// immediately go back to looking at every byte. This is a small loss
			
 
				+		// (~5% performance, ~0.1% density) for compressible data due to more
			
 
				+		// bookkeeping, but for non-compressible data (such as JPEG) it's a
			
 
				+		// huge win since the compressor quickly "realizes" the data is
			
 
				+		// incompressible and doesn't bother looking for matches everywhere.
			
 
				+		//
			
 
				+		// The "skip" variable keeps track of how many bytes there are since
			
 
				+		// the last match; dividing it by 32 (ie. right-shifting by five) gives
			
 
				+		// the number of bytes to move ahead for each iteration.
			
 
				+		skip uint32 = 32
			
 
				 	)
			
 
				 	for uint32(s+3) < uint32(len(src)) { // The uint32 conversions catch overflow from the +3.
			
 
				 		// Update the hash table.
			
@@ -150,10 +166,11 @@ func encode(dst, src []byte) (d int) {
 
				 		t, *p = *p-1, s+1
			
 
				 		// If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte.
			
 
				 		if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] {
			
 
				-			// Skip multiple bytes if the last match was >= 32 bytes prior.
			
 
				-			s += 1 + (s-lit)>>5
			
 
				+			s += int32(skip >> 5)
			
 
				+			skip++
			
 
				 			continue
			
 
				 		}
			
 
				+		skip = 32
			
 
				 		// Otherwise, we have a match. First, emit any pending literal bytes.
			
 
				 		if lit != s {
			
 
				 			d += emitLiteral(dst[d:], src[lit:s])
			
--- a/snappy_test.go
+++ b/snappy_test.go
@@ -66,7 +66,7 @@ func TestSmallRand(t *testing.T) {
 
				 	for n := 1; n < 20000; n += 23 {
			
 
				 		b := make([]byte, n)
			
 
				 		for i := range b {
			
 
				-			b[i] = uint8(rng.Uint32())
			
 
				+			b[i] = uint8(rng.Intn(256))
			
 
				 		}
			
 
				 		if err := roundtrip(b, nil, nil); err != nil {
			
 
				 			t.Fatal(err)
			
@@ -237,6 +237,26 @@ func TestDecode(t *testing.T) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+// TestEncodeNoiseThenRepeats encodes a 32K block for which the first half is
			
 
				+// very incompressible and the second half is very compressible. The encoded
			
 
				+// form's length should be closer to 50% of the original length than 100%.
			
 
				+func TestEncodeNoiseThenRepeats(t *testing.T) {
			
 
				+	const origLen = 32768
			
 
				+	src := make([]byte, origLen)
			
 
				+	rng := rand.New(rand.NewSource(1))
			
 
				+	firstHalf, secondHalf := src[:origLen/2], src[origLen/2:]
			
 
				+	for i := range firstHalf {
			
 
				+		firstHalf[i] = uint8(rng.Intn(256))
			
 
				+	}
			
 
				+	for i := range secondHalf {
			
 
				+		secondHalf[i] = uint8(i >> 8)
			
 
				+	}
			
 
				+	dst := Encode(nil, src)
			
 
				+	if got, want := len(dst), origLen*3/4; got >= want {
			
 
				+		t.Fatalf("got %d encoded bytes, want less than %d", got, want)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 func cmp(a, b []byte) error {
			
 
				 	if len(a) != len(b) {
			
 
				 		return fmt.Errorf("got %d bytes, want %d", len(a), len(b))