Browse Source

Uncompress(): added shortcuts optimizations as per the asm implementation.

benchmark                       old ns/op     new ns/op     delta
BenchmarkUncompress-2           196           197           +0.51%
BenchmarkUncompressPg1661-2     2335937       1884413       -19.33%
BenchmarkUncompressDigits-2     144170        137391        -4.70%
BenchmarkUncompressTwain-2      1494622       1205248       -19.36%
BenchmarkUncompressRand-2       6138          6107          -0.51%

benchmark                       old MB/s     new MB/s     speedup
BenchmarkUncompressPg1661-2     254.69       315.71       1.24x
BenchmarkUncompressDigits-2     693.64       727.87       1.05x
BenchmarkUncompressTwain-2      259.50       321.80       1.24x
BenchmarkUncompressRand-2       2669.11      2682.43      1.00x

benchmark                       old allocs     new allocs     delta
BenchmarkUncompress-2           0              0              +0.00%
BenchmarkUncompressPg1661-2     0              0              +0.00%
BenchmarkUncompressDigits-2     0              0              +0.00%
BenchmarkUncompressTwain-2      0              0              +0.00%
BenchmarkUncompressRand-2       0              0              +0.00%

benchmark                       old bytes     new bytes     delta
BenchmarkUncompress-2           0             0             +0.00%
BenchmarkUncompressPg1661-2     0             0             +0.00%
BenchmarkUncompressDigits-2     0             0             +0.00%
BenchmarkUncompressTwain-2      0             0             +0.00%
BenchmarkUncompressRand-2       0             0             +0.00%
Pierre Curto 6 years ago
parent
commit
d8d93984d9
1 changed files with 39 additions and 16 deletions
  1. 39 16
      decode_other.go

+ 39 - 16
decode_other.go

@@ -19,27 +19,51 @@ func decodeBlock(dst, src []byte) (ret int) {
 
 		// Literals.
 		if lLen := b >> 4; lLen > 0 {
-			if lLen == 0xF {
+			switch {
+			case lLen < 0xF && di+18 < len(dst) && si+16 < len(src):
+				// Shortcut 1
+				// if we have enough room in src and dst, and the literals length
+				// is small enough (0..14) then copy all 16 bytes, even if not all
+				// are part of the literals.
+				copy(dst[di:], src[si:si+16])
+				si += lLen
+				di += lLen
+				if mLen := b & 0xF; mLen < 0xF {
+					// Shortcut 2
+					// if the match length (4..18) fits within the literals, then copy
+					// all 18 bytes, even if not all are part of the literals.
+					mLen += 4
+					if offset := int(src[si]) | int(src[si+1])<<8; mLen <= offset {
+						i := di - offset
+						copy(dst[di:], dst[i:i+18])
+						si += 2
+						di += mLen
+						continue
+					}
+				}
+			case lLen == 0xF:
 				for src[si] == 0xFF {
 					lLen += 0xFF
 					si++
 				}
 				lLen += int(src[si])
 				si++
+				fallthrough
+			default:
+				copy(dst[di:di+lLen], src[si:si+lLen])
+				si += lLen
+				di += lLen
 			}
-			i := si
-			si += lLen
-			di += copy(dst[di:di+si-i], src[i:si])
-
-			if si >= len(src) {
-				return di
-			}
+		}
+		if si >= len(src) {
+			return di
 		}
 
-		si++
-		_ = src[si] // Bound check elimination.
-		offset := int(src[si-1]) | int(src[si])<<8
-		si++
+		offset := int(src[si]) | int(src[si+1])<<8
+		if offset == 0 {
+			return -2
+		}
+		si += 2
 
 		// Match.
 		mLen := b & 0xF
@@ -54,18 +78,17 @@ func decodeBlock(dst, src []byte) (ret int) {
 		mLen += minMatch
 
 		// Copy the match.
-		i := di - offset
-		if offset > 0 && mLen >= offset {
+		expanded := dst[di-offset:]
+		if mLen > offset {
 			// Efficiently copy the match dst[di-offset:di] into the dst slice.
 			bytesToCopy := offset * (mLen / offset)
-			expanded := dst[i:]
 			for n := offset; n <= bytesToCopy+offset; n *= 2 {
 				copy(expanded[n:], expanded[:n])
 			}
 			di += bytesToCopy
 			mLen -= bytesToCopy
 		}
-		di += copy(dst[di:di+mLen], dst[i:i+mLen])
+		di += copy(dst[di:di+mLen], expanded[:mLen])
 	}
 
 	return di