Browse Source

lz4block: Use unsigned indexes

This allows the compiler to simplify some bounds checks.

Benchmark results on amd64 with -tags noasm:

name                old time/op    new time/op    delta
Uncompress-8           499ns ± 3%     511ns ± 2%  +2.42%  (p=0.000 n=20+20)
UncompressPg1661-8    1.13ms ± 1%    1.10ms ± 1%  -2.20%  (p=0.000 n=20+20)
UncompressDigits-8    77.4µs ± 1%    75.9µs ± 1%  -1.92%  (p=0.000 n=20+20)
UncompressTwain-8      724µs ± 1%     702µs ± 1%  -3.02%  (p=0.000 n=20+19)
UncompressRand-8      3.67µs ± 2%    3.61µs ± 2%  -1.55%  (p=0.000 n=20+20)

name                old speed      new speed      delta
UncompressPg1661-8   527MB/s ± 1%   539MB/s ± 1%  +2.25%  (p=0.000 n=20+20)
UncompressDigits-8  1.29GB/s ± 1%  1.32GB/s ± 1%  +1.96%  (p=0.000 n=20+20)
UncompressTwain-8    536MB/s ± 1%   553MB/s ± 1%  +3.11%  (p=0.000 n=20+19)
UncompressRand-8    4.46GB/s ± 2%  4.53GB/s ± 2%  +1.57%  (p=0.000 n=20+20)

(Allocation rates are all the same.)
greatroar 5 years ago
parent
commit
6690aab2a4
1 changed files with 12 additions and 12 deletions
  1. 12 12
      internal/lz4block/decode_other.go

+ 12 - 12
internal/lz4block/decode_other.go

@@ -10,16 +10,16 @@ func decodeBlock(dst, src []byte) (ret int) {
 		}
 	}()
 
-	var si, di int
+	var si, di uint
 	for {
 		// Literals and match lengths (token).
-		b := int(src[si])
+		b := uint(src[si])
 		si++
 
 		// Literals.
 		if lLen := b >> 4; lLen > 0 {
 			switch {
-			case lLen < 0xF && si+16 < len(src):
+			case lLen < 0xF && si+16 < uint(len(src)):
 				// Shortcut 1
 				// if we have enough room in src and dst, and the literals length
 				// is small enough (0..14) then copy all 16 bytes, even if not all
@@ -32,13 +32,13 @@ func decodeBlock(dst, src []byte) (ret int) {
 					// if the match length (4..18) fits within the literals, then copy
 					// all 18 bytes, even if not all are part of the literals.
 					mLen += 4
-					if offset := int(src[si]) | int(src[si+1])<<8; mLen <= offset {
+					if offset := uint(src[si]) | uint(src[si+1])<<8; mLen <= offset {
 						i := di - offset
 						end := i + 18
-						if end > len(dst) {
+						if end > uint(len(dst)) {
 							// The remaining buffer may not hold 18 bytes.
 							// See https://github.com/pierrec/lz4/issues/51.
-							end = len(dst)
+							end = uint(len(dst))
 						}
 						copy(dst[di:], dst[i:end])
 						si += 2
@@ -51,7 +51,7 @@ func decodeBlock(dst, src []byte) (ret int) {
 					lLen += 0xFF
 					si++
 				}
-				lLen += int(src[si])
+				lLen += uint(src[si])
 				si++
 				fallthrough
 			default:
@@ -60,11 +60,11 @@ func decodeBlock(dst, src []byte) (ret int) {
 				di += lLen
 			}
 		}
-		if si >= len(src) {
-			return di
+		if si >= uint(len(src)) {
+			return int(di)
 		}
 
-		offset := int(src[si]) | int(src[si+1])<<8
+		offset := uint(src[si]) | uint(src[si+1])<<8
 		if offset == 0 {
 			return hasError
 		}
@@ -77,7 +77,7 @@ func decodeBlock(dst, src []byte) (ret int) {
 				mLen += 0xFF
 				si++
 			}
-			mLen += int(src[si])
+			mLen += uint(src[si])
 			si++
 		}
 		mLen += minMatch
@@ -93,6 +93,6 @@ func decodeBlock(dst, src []byte) (ret int) {
 			di += bytesToCopy
 			mLen -= bytesToCopy
 		}
-		di += copy(dst[di:di+mLen], expanded[:mLen])
+		di += uint(copy(dst[di:di+mLen], expanded[:mLen]))
 	}
 }