Sfoglia il codice sorgente

Eliminate some bounds checks in the encoder.

As per
https://groups.google.com/d/msg/golang-dev/jVP6h21OyL8/Syhfot9XBQAJ,
recent versions of the gc compiler can optimize:

func load32(b []byte, i int32) uint32 {
  b = b[i : i+4 : len(b)]
  return uint32(b[0]) | etc | uint32(b[3])<<24
}

benchmark                     old MB/s     new MB/s     speedup
BenchmarkWordsEncode1e1-8     5.78         5.77         1.00x
BenchmarkWordsEncode1e2-8     47.22        47.96        1.02x
BenchmarkWordsEncode1e3-8     183.53       190.33       1.04x
BenchmarkWordsEncode1e4-8     198.95       190.25       0.96x
BenchmarkWordsEncode1e5-8     144.60       150.65       1.04x
BenchmarkWordsEncode1e6-8     172.11       180.11       1.05x
BenchmarkRandomEncode-8       4547.98      4782.70      1.05x
Benchmark_ZFlat0-8            359.18       372.49       1.04x
Benchmark_ZFlat1-8            181.57       186.49       1.03x
Benchmark_ZFlat2-8            4566.75      4979.47      1.09x
Benchmark_ZFlat3-8            86.00        85.76        1.00x
Benchmark_ZFlat4-8            558.08       566.31       1.01x
Benchmark_ZFlat5-8            354.18       366.01       1.03x
Benchmark_ZFlat6-8            156.20       162.13       1.04x
Benchmark_ZFlat7-8            147.76       153.69       1.04x
Benchmark_ZFlat8-8            162.49       167.91       1.03x
Benchmark_ZFlat9-8            142.33       147.71       1.04x
Benchmark_ZFlat10-8           401.93       414.06       1.03x
Benchmark_ZFlat11-8           235.94       248.87       1.05x
Nigel Tao 9 anni fa
parent
commit
7ede8d1b13
1 ha cambiato i file con 7 aggiunte e 3 eliminazioni
  1. 7 3
      encode.go

+ 7 - 3
encode.go

@@ -14,6 +14,11 @@ import (
 // code.
 const maxOffset = 1 << 15
 
+func load32(b []byte, i int32) uint32 {
+	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
+	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
 // emitLiteral writes a literal chunk and returns the number of bytes written.
 func emitLiteral(dst, lit []byte) int {
 	i, n := 0, uint(len(lit)-1)
@@ -165,8 +170,7 @@ func encodeBlock(dst, src []byte) (d int) {
 	)
 	for uint32(s+3) < uint32(len(src)) { // The uint32 conversions catch overflow from the +3.
 		// Update the hash table.
-		b0, b1, b2, b3 := src[s], src[s+1], src[s+2], src[s+3]
-		h := uint32(b0) | uint32(b1)<<8 | uint32(b2)<<16 | uint32(b3)<<24
+		h := load32(src, s)
 		p := &table[(h*0x1e35a7bd)>>shift]
 		// We need to to store values in [-1, inf) in table. To save
 		// some initialization time, (re)use the table's zero value
@@ -174,7 +178,7 @@ func encodeBlock(dst, src []byte) (d int) {
 		// subtract 1 on reads.
 		t, *p = *p-1, s+1
 		// If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte.
-		if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] {
+		if t < 0 || s-t >= maxOffset || h != load32(src, t) {
 			s += int32(skip >> 5)
 			skip++
 			continue