8 years ago · f20ad86db2
--- a/block.go
+++ b/block.go
@@ -3,7 +3,6 @@ package lz4
 
				 import (
			
 
				 	"encoding/binary"
			
 
				 	"errors"
			
 
				-	"unsafe"
			
 
				 )
			
 
				 
			
 
				 // block represents a frame data block.
			
@@ -111,11 +110,6 @@ func UncompressBlock(src, dst []byte, di int) (int, error) {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-type hashEntry struct {
			
 
				-	generation uint
			
 
				-	value      int
			
 
				-}
			
 
				-
			
 
				 // CompressBlock compresses the source buffer starting at soffet into the destination one.
			
 
				 // This is the fast version of LZ4 compression and also the default one.
			
 
				 //
			
@@ -123,27 +117,6 @@ type hashEntry struct {
 
				 //
			
 
				 // An error is returned if the destination buffer is too small.
			
 
				 func CompressBlock(src, dst []byte, soffset int) (int, error) {
			
 
				-	var hashTable [hashTableSize]hashEntry
			
 
				-	return compressGenerationalBlock(src, dst, soffset, 0, hashTable[:])
			
 
				-}
			
 
				-
			
 
				-// getUint32 is a despicably evil function (well, for Go!) that takes advantage
			
 
				-// of the machine's byte order to save some operations. This may look
			
 
				-// inefficient but it is significantly faster on littleEndian machines,
			
 
				-// which include x84, amd64, and some ARM processors.
			
 
				-func getUint32(b []byte) uint32 {
			
 
				-	_ = b[3]
			
 
				-	if isLittleEndian {
			
 
				-		return *(*uint32)(unsafe.Pointer(&b))
			
 
				-	}
			
 
				-
			
 
				-	return uint32(b[0]) |
			
 
				-		uint32(b[1])<<8 |
			
 
				-		uint32(b[2])<<16 |
			
 
				-		uint32(b[3])<<24
			
 
				-}
			
 
				-
			
 
				-func compressGenerationalBlock(src, dst []byte, soffset int, generation uint, hashTable []hashEntry) (int, error) {
			
 
				 	sn, dn := len(src)-mfLimit, len(dst)
			
 
				 	if sn <= 0 || dn == 0 || soffset >= sn {
			
 
				 		return 0, nil
			
@@ -152,28 +125,26 @@ func compressGenerationalBlock(src, dst []byte, soffset int, generation uint, ha
 
				 
			
 
				 	// fast scan strategy:
			
 
				 	// we only need a hash table to store the last sequences (4 bytes)
			
 
				+	var hashTable [1 << hashLog]int
			
 
				 	var hashShift = uint((minMatch * 8) - hashLog)
			
 
				 
			
 
				 	// Initialise the hash table with the first 64Kb of the input buffer
			
 
				 	// (used when compressing dependent blocks)
			
 
				 	for si < soffset {
			
 
				-		h := getUint32(src[si:]) * hasher >> hashShift
			
 
				+		h := binary.LittleEndian.Uint32(src[si:]) * hasher >> hashShift
			
 
				 		si++
			
 
				-		hashTable[h] = hashEntry{generation, si}
			
 
				+		hashTable[h] = si
			
 
				 	}
			
 
				 
			
 
				 	anchor := si
			
 
				 	fma := 1 << skipStrength
			
 
				 	for si < sn-minMatch {
			
 
				 		// hash the next 4 bytes (sequence)...
			
 
				-		h := getUint32(src[si:]) * hasher >> hashShift
			
 
				-		if hashTable[h].generation != generation {
			
 
				-			hashTable[h] = hashEntry{generation, 0}
			
 
				-		}
			
 
				+		h := binary.LittleEndian.Uint32(src[si:]) * hasher >> hashShift
			
 
				 		// -1 to separate existing entries from new ones
			
 
				-		ref := hashTable[h].value - 1
			
 
				+		ref := hashTable[h] - 1
			
 
				 		// ...and store the position of the hash in the hash table (+1 to compensate the -1 upon saving)
			
 
				-		hashTable[h].value = si + 1
			
 
				+		hashTable[h] = si + 1
			
 
				 		// no need to check the last 3 bytes in the first literal 4 bytes as
			
 
				 		// this guarantees that the next match, if any, is compressed with
			
 
				 		// a lower size, since to have some compression we must have:
			
--- a/lz4.go
+++ b/lz4.go
@@ -20,7 +20,6 @@ package lz4
 
				 import (
			
 
				 	"hash"
			
 
				 	"sync"
			
 
				-	"unsafe"
			
 
				 
			
 
				 	"github.com/pierrec/xxHash/xxHash32"
			
 
				 )
			
@@ -44,9 +43,8 @@ const (
 
				 	// Its value influences the compression speed and memory usage, the lower the faster,
			
 
				 	// but at the expense of the compression ratio.
			
 
				 	// 16 seems to be the best compromise.
			
 
				-	hashLog       = 16
			
 
				-	hashTableSize = 1 << hashLog
			
 
				-	hashShift     = uint((minMatch * 8) - hashLog)
			
 
				+	hashLog   = 16
			
 
				+	hashShift = uint((minMatch * 8) - hashLog)
			
 
				 
			
 
				 	mfLimit      = 8 + minMatch // The last match cannot start within the last 12 bytes.
			
 
				 	skipStrength = 6            // variable step for fast scan
			
@@ -65,18 +63,6 @@ func init() {
 
				 	}
			
 
				 }
			
 
				 
			
 
				-var isLittleEndian = getIsLittleEndian()
			
 
				-
			
 
				-func getIsLittleEndian() (ret bool) {
			
 
				-	var i int = 0x1
			
 
				-	bs := (*[1]byte)(unsafe.Pointer(&i))
			
 
				-	if bs[0] == 0 {
			
 
				-		return false
			
 
				-	}
			
 
				-
			
 
				-	return true
			
 
				-}
			
 
				-
			
 
				 // Header describes the various flags that can be set on a Writer or obtained from a Reader.
			
 
				 // The default values match those of the LZ4 frame format definition (http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html).
			
 
				 //
			
--- a/lz4_test.go
+++ b/lz4_test.go
@@ -6,7 +6,6 @@ import (
 
				 	"encoding/binary"
			
 
				 	"fmt"
			
 
				 	"io"
			
 
				-	"io/ioutil"
			
 
				 	"math/big"
			
 
				 	"reflect"
			
 
				 	"testing"
			
@@ -304,15 +303,6 @@ func BenchmarkCompressBlockHC(b *testing.B) {
 
				 		lz4.CompressBlockHC(d, z, 0)
			
 
				 	}
			
 
				 }
			
 
				-func BenchmarkCompressEndToEnd(b *testing.B) {
			
 
				-	w := lz4.NewWriter(ioutil.Discard)
			
 
				-	b.ResetTimer()
			
 
				-	for i := 0; i < b.N; i++ {
			
 
				-		if _, err := w.Write(lorem); err != nil {
			
 
				-			b.Fatal(err)
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				 
			
 
				 // TestNoWrite compresses without any call to Write() (empty frame).
			
 
				 // It does so checking all possible headers.
			
--- a/writer.go
+++ b/writer.go
@@ -6,20 +6,17 @@ import (
 
				 	"hash"
			
 
				 	"io"
			
 
				 	"runtime"
			
 
				+	"sync"
			
 
				 )
			
 
				 
			
 
				 // Writer implements the LZ4 frame encoder.
			
 
				 type Writer struct {
			
 
				 	Header
			
 
				 	dst      io.Writer
			
 
				-	checksum hash.Hash32 // frame checksum
			
 
				-	data     []byte      // data to be compressed, only used when dealing with block dependency as we need 64Kb to work with
			
 
				-	window   []byte      // last 64KB of decompressed data (block dependency) + blockMaxSize buffer
			
 
				-
			
 
				-	zbCompressBuf     []byte // buffer for compressing lz4 blocks
			
 
				-	writeSizeBuf      []byte // four-byte slice for writing checksums and sizes in writeblock
			
 
				-	hashTable         []hashEntry
			
 
				-	currentGeneration uint
			
 
				+	checksum hash.Hash32    // frame checksum
			
 
				+	wg       sync.WaitGroup // decompressing go routine wait group
			
 
				+	data     []byte         // data to be compressed, only used when dealing with block dependency as we need 64Kb to work with
			
 
				+	window   []byte         // last 64KB of decompressed data (block dependency) + blockMaxSize buffer
			
 
				 }
			
 
				 
			
 
				 // NewWriter returns a new LZ4 frame encoder.
			
@@ -33,8 +30,6 @@ func NewWriter(dst io.Writer) *Writer {
 
				 		Header: Header{
			
 
				 			BlockMaxSize: 4 << 20,
			
 
				 		},
			
 
				-		hashTable:    make([]hashEntry, hashTableSize),
			
 
				-		writeSizeBuf: make([]byte, 4),
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -69,9 +64,9 @@ func (z *Writer) writeHeader() error {
 
				 	if !z.Header.NoChecksum {
			
 
				 		flg |= 1 << 2
			
 
				 	}
			
 
				-	//  if z.Header.Dict {
			
 
				-	//      flg |= 1
			
 
				-	//  }
			
 
				+	// 	if z.Header.Dict {
			
 
				+	// 		flg |= 1
			
 
				+	// 	}
			
 
				 	buf[4] = flg
			
 
				 	buf[5] = bSize << 4
			
 
				 
			
@@ -82,10 +77,10 @@ func (z *Writer) writeHeader() error {
 
				 		binary.LittleEndian.PutUint64(buf[n:], z.Header.Size)
			
 
				 		n += 8
			
 
				 	}
			
 
				-	//  if z.Header.Dict {
			
 
				-	//      binary.LittleEndian.PutUint32(buf[n:], z.Header.DictID)
			
 
				-	//      n += 4
			
 
				-	//  }
			
 
				+	// 	if z.Header.Dict {
			
 
				+	// 		binary.LittleEndian.PutUint32(buf[n:], z.Header.DictID)
			
 
				+	// 		n += 4
			
 
				+	// 	}
			
 
				 
			
 
				 	// header checksum includes the flags, block max size and optional Size and DictID
			
 
				 	z.checksum.Write(buf[4:n])
			
@@ -98,9 +93,6 @@ func (z *Writer) writeHeader() error {
 
				 	}
			
 
				 	z.Header.done = true
			
 
				 
			
 
				-	// initialize buffers dependent on header info
			
 
				-	z.zbCompressBuf = make([]byte, winSize+z.BlockMaxSize)
			
 
				-
			
 
				 	return nil
			
 
				 }
			
 
				 
			
@@ -124,7 +116,11 @@ func (z *Writer) Write(buf []byte) (n int, err error) {
 
				 	}
			
 
				 
			
 
				 	if !z.NoChecksum {
			
 
				-		z.checksum.Write(buf)
			
 
				+		z.wg.Add(1)
			
 
				+		go func(b []byte) {
			
 
				+			z.checksum.Write(b)
			
 
				+			z.wg.Done()
			
 
				+		}(buf)
			
 
				 	}
			
 
				 
			
 
				 	// with block dependency, require at least 64Kb of data to work with
			
@@ -134,6 +130,7 @@ func (z *Writer) Write(buf []byte) (n int, err error) {
 
				 		bl = len(z.data)
			
 
				 		z.data = append(z.data, buf...)
			
 
				 		if len(z.data) < winSize {
			
 
				+			z.wg.Wait()
			
 
				 			return len(buf), nil
			
 
				 		}
			
 
				 		buf = z.data
			
@@ -142,16 +139,11 @@ func (z *Writer) Write(buf []byte) (n int, err error) {
 
				 
			
 
				 	// Break up the input buffer into BlockMaxSize blocks, provisioning the left over block.
			
 
				 	// Then compress into each of them concurrently if possible (no dependency).
			
 
				-	var (
			
 
				-		zb       block
			
 
				-		wbuf     = buf
			
 
				-		zn       = len(wbuf) / z.BlockMaxSize
			
 
				-		zi       = 0
			
 
				-		leftover = len(buf) % z.BlockMaxSize
			
 
				-	)
			
 
				-
			
 
				-loop:
			
 
				-	for zi < zn {
			
 
				+	wbuf := buf
			
 
				+	zn := len(wbuf) / z.BlockMaxSize
			
 
				+	zblocks := make([]block, zn, zn+1)
			
 
				+	for zi := 0; zi < zn; zi++ {
			
 
				+		zb := &zblocks[zi]
			
 
				 		if z.BlockDependency {
			
 
				 			if zi == 0 {
			
 
				 				// first block does not have the window
			
@@ -169,12 +161,14 @@ loop:
 
				 			wbuf = wbuf[z.BlockMaxSize:]
			
 
				 		}
			
 
				 
			
 
				-		goto write
			
 
				+		z.wg.Add(1)
			
 
				+		go z.compressBlock(zb)
			
 
				 	}
			
 
				 
			
 
				 	// left over
			
 
				-	if leftover > 0 {
			
 
				-		zb = block{data: wbuf}
			
 
				+	if len(buf)%z.BlockMaxSize > 0 {
			
 
				+		zblocks = append(zblocks, block{data: wbuf})
			
 
				+		zb := &zblocks[zn]
			
 
				 		if z.BlockDependency {
			
 
				 			if zn == 0 {
			
 
				 				zb.data = append(z.window, zb.data...)
			
@@ -183,9 +177,38 @@ loop:
 
				 				zb.offset = winSize
			
 
				 			}
			
 
				 		}
			
 
				+		z.wg.Add(1)
			
 
				+		go z.compressBlock(zb)
			
 
				+	}
			
 
				+	z.wg.Wait()
			
 
				+
			
 
				+	// outputs the compressed data
			
 
				+	for zi, zb := range zblocks {
			
 
				+		_, err = z.writeBlock(&zb)
			
 
				+
			
 
				+		written := len(zb.data)
			
 
				+		if bl > 0 {
			
 
				+			if written >= bl {
			
 
				+				written -= bl
			
 
				+				bl = 0
			
 
				+			} else {
			
 
				+				bl -= written
			
 
				+				written = 0
			
 
				+			}
			
 
				+		}
			
 
				 
			
 
				-		leftover = 0
			
 
				-		goto write
			
 
				+		n += written
			
 
				+		// remove the window in zb.data
			
 
				+		if z.BlockDependency {
			
 
				+			if zi == 0 {
			
 
				+				n -= len(z.window)
			
 
				+			} else {
			
 
				+				n -= winSize
			
 
				+			}
			
 
				+		}
			
 
				+		if err != nil {
			
 
				+			return
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	if z.BlockDependency {
			
@@ -202,54 +225,20 @@ loop:
 
				 	}
			
 
				 
			
 
				 	return
			
 
				-
			
 
				-write:
			
 
				-	zb = z.compressBlock(zb)
			
 
				-	_, err = z.writeBlock(zb)
			
 
				-
			
 
				-	written := len(zb.data)
			
 
				-	if bl > 0 {
			
 
				-		if written >= bl {
			
 
				-			written -= bl
			
 
				-			bl = 0
			
 
				-		} else {
			
 
				-			bl -= written
			
 
				-			written = 0
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	n += written
			
 
				-	// remove the window in zb.data
			
 
				-	if z.BlockDependency {
			
 
				-		if zi == 0 {
			
 
				-			n -= len(z.window)
			
 
				-		} else {
			
 
				-			n -= winSize
			
 
				-		}
			
 
				-	}
			
 
				-	if err != nil {
			
 
				-		return
			
 
				-	}
			
 
				-	zi++
			
 
				-	goto loop
			
 
				 }
			
 
				 
			
 
				 // compressBlock compresses a block.
			
 
				-func (z *Writer) compressBlock(zb block) block {
			
 
				+func (z *Writer) compressBlock(zb *block) {
			
 
				 	// compressed block size cannot exceed the input's
			
 
				+	zbuf := make([]byte, len(zb.data)-zb.offset)
			
 
				 	var (
			
 
				-		n    int
			
 
				-		err  error
			
 
				-		zbuf = z.zbCompressBuf
			
 
				+		n   int
			
 
				+		err error
			
 
				 	)
			
 
				 	if z.HighCompression {
			
 
				 		n, err = CompressBlockHC(zb.data, zbuf, zb.offset)
			
 
				 	} else {
			
 
				-		n, err = compressGenerationalBlock(zb.data, zbuf, zb.offset, z.currentGeneration, z.hashTable)
			
 
				-		z.currentGeneration++
			
 
				-		if z.currentGeneration == 0 { // wrapped around, reset table
			
 
				-			z.hashTable = make([]hashEntry, hashTableSize)
			
 
				-		}
			
 
				+		n, err = CompressBlock(zb.data, zbuf, zb.offset)
			
 
				 	}
			
 
				 
			
 
				 	// compressible and compressed size smaller than decompressed: ok!
			
@@ -267,23 +256,21 @@ func (z *Writer) compressBlock(zb block) block {
 
				 		hashPool.Put(xxh)
			
 
				 	}
			
 
				 
			
 
				-	return zb
			
 
				+	z.wg.Done()
			
 
				 }
			
 
				 
			
 
				 // writeBlock writes a frame block to the underlying io.Writer (size, data).
			
 
				-func (z *Writer) writeBlock(zb block) (int, error) {
			
 
				+func (z *Writer) writeBlock(zb *block) (int, error) {
			
 
				 	bLen := uint32(len(zb.zdata))
			
 
				 	if !zb.compressed {
			
 
				 		bLen |= 1 << 31
			
 
				 	}
			
 
				 
			
 
				 	n := 0
			
 
				-
			
 
				-	binary.LittleEndian.PutUint32(z.writeSizeBuf, bLen)
			
 
				-	n, err := z.dst.Write(z.writeSizeBuf)
			
 
				-	if err != nil {
			
 
				+	if err := binary.Write(z.dst, binary.LittleEndian, bLen); err != nil {
			
 
				 		return n, err
			
 
				 	}
			
 
				+	n += 4
			
 
				 
			
 
				 	m, err := z.dst.Write(zb.zdata)
			
 
				 	n += m
			
@@ -292,13 +279,10 @@ func (z *Writer) writeBlock(zb block) (int, error) {
 
				 	}
			
 
				 
			
 
				 	if z.BlockChecksum {
			
 
				-		binary.LittleEndian.PutUint32(z.writeSizeBuf, zb.checksum)
			
 
				-		m, err := z.dst.Write(z.writeSizeBuf)
			
 
				-		n += m
			
 
				-
			
 
				-		if err != nil {
			
 
				+		if err := binary.Write(z.dst, binary.LittleEndian, zb.checksum); err != nil {
			
 
				 			return n, err
			
 
				 		}
			
 
				+		n += 4
			
 
				 	}
			
 
				 
			
 
				 	return n, nil
			
@@ -314,9 +298,10 @@ func (z *Writer) Flush() error {
 
				 	if len(z.data) == 0 {
			
 
				 		return nil
			
 
				 	}
			
 
				-
			
 
				-	zb := z.compressBlock(block{data: z.data})
			
 
				-	if _, err := z.writeBlock(zb); err != nil {
			
 
				+	zb := block{data: z.data}
			
 
				+	z.wg.Add(1)
			
 
				+	z.compressBlock(&zb)
			
 
				+	if _, err := z.writeBlock(&zb); err != nil {
			
 
				 		return err
			
 
				 	}
			
 
				 	return nil
			
@@ -333,7 +318,9 @@ func (z *Writer) Close() error {
 
				 	// buffered data for the block dependency window
			
 
				 	if z.BlockDependency && len(z.data) > 0 {
			
 
				 		zb := block{data: z.data}
			
 
				-		if _, err := z.writeBlock(z.compressBlock(zb)); err != nil {
			
 
				+		z.wg.Add(1)
			
 
				+		z.compressBlock(&zb)
			
 
				+		if _, err := z.writeBlock(&zb); err != nil {
			
 
				 			return err
			
 
				 		}
			
 
				 	}