|
|
@@ -137,6 +137,22 @@ func encode(dst, src []byte) (d int) {
|
|
|
s int32 // The iterator position.
|
|
|
t int32 // The last position with the same hash as s.
|
|
|
lit int32 // The start position of any pending literal bytes.
|
|
|
+
|
|
|
+ // Copied from the C++ snappy implementation:
|
|
|
+ //
|
|
|
+ // Heuristic match skipping: If 32 bytes are scanned with no matches
|
|
|
+ // found, start looking only at every other byte. If 32 more bytes are
|
|
|
+ // scanned, look at every third byte, etc.. When a match is found,
|
|
|
+ // immediately go back to looking at every byte. This is a small loss
|
|
|
+ // (~5% performance, ~0.1% density) for compressible data due to more
|
|
|
+ // bookkeeping, but for non-compressible data (such as JPEG) it's a
|
|
|
+ // huge win since the compressor quickly "realizes" the data is
|
|
|
+ // incompressible and doesn't bother looking for matches everywhere.
|
|
|
+ //
|
|
|
+ // The "skip" variable keeps track of how many bytes there are since
|
|
|
+ // the last match; dividing it by 32 (ie. right-shifting by five) gives
|
|
|
+ // the number of bytes to move ahead for each iteration.
|
|
|
+ skip uint32 = 32
|
|
|
)
|
|
|
for uint32(s+3) < uint32(len(src)) { // The uint32 conversions catch overflow from the +3.
|
|
|
// Update the hash table.
|
|
|
@@ -150,10 +166,11 @@ func encode(dst, src []byte) (d int) {
|
|
|
t, *p = *p-1, s+1
|
|
|
// If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte.
|
|
|
if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] {
|
|
|
- // Skip multiple bytes if the last match was >= 32 bytes prior.
|
|
|
- s += 1 + (s-lit)>>5
|
|
|
+ s += int32(skip >> 5)
|
|
|
+ skip++
|
|
|
continue
|
|
|
}
|
|
|
+ skip = 32
|
|
|
// Otherwise, we have a match. First, emit any pending literal bytes.
|
|
|
if lit != s {
|
|
|
d += emitLiteral(dst[d:], src[lit:s])
|