Browse Source

Initial release

Pierre Curto 10 years ago
commit
9cbcbabc0a
7 changed files with 1925 additions and 0 deletions
  1. 432 0
      block.go
  2. 13 0
      export_test.go
  3. 103 0
      lz4.go
  4. 567 0
      lz4_test.go
  5. 105 0
      lz4c/main.go
  6. 351 0
      reader.go
  7. 354 0
      writer.go

+ 432 - 0
block.go

@@ -0,0 +1,432 @@
+package lz4
+
+import "errors"
+
+// block represents a frame data block.
+// Used when compressing or decompressing frame blocks concurrently.
+type block struct {
+	compressed bool
+	zdata      []byte // compressed data
+	data       []byte // decompressed data
+	offset     int    // offset within the data as with block dependency the 64Kb window is prepended to it
+	checksum   uint32 // compressed data checksum
+	err        error  // error while [de]compressing
+}
+
+var (
+	// ErrInvalidSource is returned by UncompressBlock when a compressed block is corrupted.
+	ErrInvalidSource = errors.New("lz4: invalid source")
+	// ErrShortBuffer is returned by UncompressBlock, CompressBlock or CompressBlockHC when
+	// the supplied buffer for [de]compression is too small.
+	ErrShortBuffer = errors.New("lz4: short buffer")
+)
+
+// CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
+func CompressBlockBound(n int) int {
+	return n + n/255 + 16
+}
+
+// UncompressBlock decompresses the source buffer into the destination one,
+// starting at the di index and returning the decompressed size.
+//
+// The destination buffer must be sized appropriately.
+//
+// An error is returned if the source data is invalid or the destination buffer is too small.
+func UncompressBlock(src, dst []byte, di int) (int, error) {
+	si, sn, di0 := 0, len(src), di
+	if sn == 0 {
+		return 0, nil
+	}
+
+	for {
+		// literals and match lengths (token)
+		lLen := int(src[si] >> 4)
+		mLen := int(src[si] & 0xF)
+		if si++; si == sn {
+			return di, ErrInvalidSource
+		}
+
+		// literals
+		if lLen > 0 {
+			if lLen == 0xF {
+				for src[si] == 0xFF {
+					lLen += 0xFF
+					if si++; si == sn {
+						return di - di0, ErrInvalidSource
+					}
+				}
+				lLen += int(src[si])
+				if si++; si == sn {
+					return di - di0, ErrInvalidSource
+				}
+			}
+			if len(dst)-di < lLen || si+lLen > sn {
+				return di - di0, ErrShortBuffer
+			}
+			di += copy(dst[di:], src[si:si+lLen])
+
+			if si += lLen; si >= sn {
+				return di - di0, nil
+			}
+		}
+
+		if si += 2; si >= sn {
+			return di, ErrInvalidSource
+		}
+		offset := int(src[si-2]) | int(src[si-1])<<8
+		if di-offset < 0 {
+			return di - di0, ErrInvalidSource
+		}
+
+		// match
+		if mLen == 0xF {
+			for src[si] == 0xFF {
+				mLen += 0xFF
+				if si++; si == sn {
+					return di - di0, ErrInvalidSource
+				}
+			}
+			mLen += int(src[si])
+			if si++; si == sn {
+				return di - di0, ErrInvalidSource
+			}
+		}
+		// minimum match length is 4
+		mLen += 4
+		if len(dst)-di <= mLen {
+			return di - di0, ErrShortBuffer
+		}
+
+		// copy the match (NB. match is at least 4 bytes long)
+		// NB. past di, copy() would write old bytes instead of
+		// the ones we just copied, so split the work into the largest chunk.
+		for ; mLen >= offset; mLen -= offset {
+			di += copy(dst[di:], dst[di-offset:di])
+		}
+		di += copy(dst[di:], dst[di-offset:di-offset+mLen])
+	}
+}
+
+// CompressBlock compresses the source buffer starting at soffet into the destination one.
+// This is the fast version of LZ4 compression and also the default one.
+//
+// The size of the compressed data is returned. If it is 0 and no error, then the data is incompressible.
+//
+// An error is returned if the destination buffer is too small.
+func CompressBlock(src, dst []byte, soffset int) (int, error) {
+	sn, dn := len(src)-mfLimit, len(dst)
+	if sn <= 0 || dn == 0 || soffset >= sn {
+		return 0, nil
+	}
+	var si, di int
+
+	// fast scan strategy:
+	// we only need a hash table to store the last sequences (4 bytes)
+	var hashTable [1 << hashLog]int
+	var hashShift = uint((minMatch * 8) - hashLog)
+
+	// Initialise the hash table with the first 64Kb of the input buffer
+	// (used when compressing dependent blocks)
+	for si < soffset {
+		h := (uint32(src[si+3])<<24 | uint32(src[si+2])<<16 | uint32(src[si+1])<<8 | uint32(src[si])) * hasher >> hashShift
+		si++
+		hashTable[h] = si
+	}
+
+	anchor := si
+	fma := 1<<skipStrength + 3
+	for si < sn-minMatch {
+		// hash the next 4 bytes (sequence)...
+		h := (uint32(src[si+3])<<24 | uint32(src[si+2])<<16 | uint32(src[si+1])<<8 | uint32(src[si])) * hasher >> hashShift
+		// -1 to separate existing entries from new ones
+		ref := hashTable[h] - 1
+		// ...and store the position of the hash in the hash table (+1 to compensate the -1 upon saving)
+		hashTable[h] = si + 1
+		// the sequence is new, out of bound (64kb) or not valid: try next sequence
+		if ref < 0 ||
+			(si-ref)>>winSizeLog > 0 ||
+			src[ref] != src[si] ||
+			src[ref+1] != src[si+1] ||
+			src[ref+2] != src[si+2] ||
+			src[ref+3] != src[si+3] {
+			// variable step: improves performance on non-compressible data
+			si += fma >> skipStrength
+			fma++
+			continue
+		}
+		// match found
+		fma = 1<<skipStrength + 3
+		lLen := si - anchor
+		offset := si - ref
+
+		// encode match length part 1
+		si += minMatch
+		mLen := si // match length has minMatch already
+		for si <= sn && src[si] == src[si-offset] {
+			si++
+		}
+		mLen = si - mLen
+		if mLen < 0xF {
+			dst[di] = byte(mLen)
+		} else {
+			dst[di] = 0xF
+		}
+
+		// encode literals length
+		if lLen < 0xF {
+			dst[di] |= byte(lLen << 4)
+		} else {
+			dst[di] |= 0xF0
+			if di++; di == dn {
+				return di, ErrShortBuffer
+			}
+			l := lLen - 0xF
+			for ; l >= 0xFF; l -= 0xFF {
+				dst[di] = 0xFF
+				if di++; di == dn {
+					return di, ErrShortBuffer
+				}
+			}
+			dst[di] = byte(l)
+		}
+		if di++; di == dn {
+			return di, ErrShortBuffer
+		}
+
+		// literals
+		if di+lLen >= dn {
+			return di, ErrShortBuffer
+		}
+		di += copy(dst[di:], src[anchor:anchor+lLen])
+		anchor = si
+
+		// encode offset
+		if di += 2; di >= dn {
+			return di, ErrShortBuffer
+		}
+		dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
+
+		// encode match length part 2
+		if mLen >= 0xF {
+			for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
+				dst[di] = 0xFF
+				if di++; di == dn {
+					return di, ErrShortBuffer
+				}
+			}
+			dst[di] = byte(mLen)
+			if di++; di == dn {
+				return di, ErrShortBuffer
+			}
+		}
+	}
+
+	if anchor == 0 {
+		// incompressible
+		return 0, nil
+	}
+
+	// last literals
+	lLen := len(src) - anchor
+	if lLen < 0xF {
+		dst[di] = byte(lLen << 4)
+	} else {
+		dst[di] = 0xF0
+		if di++; di == dn {
+			return di, ErrShortBuffer
+		}
+		lLen -= 0xF
+		for ; lLen >= 0xFF; lLen -= 0xFF {
+			dst[di] = 0xFF
+			if di++; di == dn {
+				return di, ErrShortBuffer
+			}
+		}
+		dst[di] = byte(lLen)
+	}
+	if di++; di == dn {
+		return di, ErrShortBuffer
+	}
+
+	// write literals
+	src = src[anchor:]
+	switch n := di + len(src); {
+	case n > dn:
+		return di, ErrShortBuffer
+	case n >= sn:
+		// incompressible
+		return 0, nil
+	}
+	di += copy(dst[di:], src)
+	return di, nil
+}
+
+// CompressBlockHC compresses the source buffer starting at soffet into the destination one.
+// CompressBlockHC compression ratio is better than CompressBlock but it is also slower.
+//
+// The size of the compressed data is returned. If it is 0 and no error, then the data is not compressible.
+//
+// An error is returned if the destination buffer is too small.
+func CompressBlockHC(src, dst []byte, soffset int) (int, error) {
+	sn, dn := len(src)-mfLimit, len(dst)
+	if sn <= 0 || dn == 0 || soffset >= sn {
+		return 0, nil
+	}
+	var si, di int
+
+	// Hash Chain strategy:
+	// we need a hash table and a chain table
+	// the chain table cannot contain more entries than the window size (64Kb entries)
+	var hashTable [1 << hashLog]int
+	var chainTable [winSize]int
+	var hashShift = uint((minMatch * 8) - hashLog)
+
+	// Initialise the hash table with the first 64Kb of the input buffer
+	// (used when compressing dependent blocks)
+	for si < soffset {
+		h := (uint32(src[si+3])<<24 | uint32(src[si+2])<<16 | uint32(src[si+1])<<8 | uint32(src[si])) * hasher >> hashShift
+		chainTable[si&winMask] = hashTable[h]
+		si++
+		hashTable[h] = si
+	}
+
+	anchor := si
+	for si < sn-minMatch {
+		// hash the next 4 bytes (sequence)...
+		h := (uint32(src[si+3])<<24 | uint32(src[si+2])<<16 | uint32(src[si+1])<<8 | uint32(src[si])) * hasher >> hashShift
+
+		// follow the chain until out of window and give the longest match
+		mLen := 0
+		offset := 0
+		for next := hashTable[h] - 1; next > 0 && next > si-winSize; next = chainTable[next&winMask] - 1 {
+			// the first (mLen==0) or next byte (mLen>=minMatch) at current match length must match to improve on the match length
+			if src[next+mLen] == src[si+mLen] {
+				for ml := 0; ; ml++ {
+					if src[next+ml] != src[si+ml] || si+ml > sn {
+						// found a longer match, keep its position and length
+						if mLen < ml && ml >= minMatch {
+							mLen = ml
+							offset = si - next
+						}
+						break
+					}
+				}
+			}
+		}
+		chainTable[si&winMask] = hashTable[h]
+		hashTable[h] = si + 1
+
+		// no match found
+		if mLen == 0 {
+			si++
+			continue
+		}
+
+		// match found
+		// update hash/chain tables with overlaping bytes:
+		// si already hashed, add everything from si+1 up to the match length
+		for si, ml := si+1, si+mLen; si < ml; {
+			h := (uint32(src[si+3])<<24 | uint32(src[si+2])<<16 | uint32(src[si+1])<<8 | uint32(src[si])) * hasher >> hashShift
+			chainTable[si&winMask] = hashTable[h]
+			si++
+			hashTable[h] = si
+		}
+
+		lLen := si - anchor
+		si += mLen
+		mLen -= minMatch // match length does not include minMatch
+
+		if mLen < 0xF {
+			dst[di] = byte(mLen)
+		} else {
+			dst[di] = 0xF
+		}
+
+		// encode literals length
+		if lLen < 0xF {
+			dst[di] |= byte(lLen << 4)
+		} else {
+			dst[di] |= 0xF0
+			if di++; di == dn {
+				return di, ErrShortBuffer
+			}
+			l := lLen - 0xF
+			for ; l >= 0xFF; l -= 0xFF {
+				dst[di] = 0xFF
+				if di++; di == dn {
+					return di, ErrShortBuffer
+				}
+			}
+			dst[di] = byte(l)
+		}
+		if di++; di == dn {
+			return di, ErrShortBuffer
+		}
+
+		// literals
+		if di+lLen >= dn {
+			return di, ErrShortBuffer
+		}
+		di += copy(dst[di:], src[anchor:anchor+lLen])
+		anchor = si
+
+		// encode offset
+		if di += 2; di >= dn {
+			return di, ErrShortBuffer
+		}
+		dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
+
+		// encode match length part 2
+		if mLen >= 0xF {
+			for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
+				dst[di] = 0xFF
+				if di++; di == dn {
+					return di, ErrShortBuffer
+				}
+			}
+			dst[di] = byte(mLen)
+			if di++; di == dn {
+				return di, ErrShortBuffer
+			}
+		}
+	}
+
+	if anchor == 0 {
+		// incompressible
+		return 0, nil
+	}
+
+	// last literals
+	lLen := len(src) - anchor
+	if lLen < 0xF {
+		dst[di] = byte(lLen << 4)
+	} else {
+		dst[di] = 0xF0
+		if di++; di == dn {
+			return di, ErrShortBuffer
+		}
+		lLen -= 0xF
+		for ; lLen >= 0xFF; lLen -= 0xFF {
+			dst[di] = 0xFF
+			if di++; di == dn {
+				return di, ErrShortBuffer
+			}
+		}
+		dst[di] = byte(lLen)
+	}
+	if di++; di == dn {
+		return di, ErrShortBuffer
+	}
+
+	// write literals
+	src = src[anchor:]
+	switch n := di + len(src); {
+	case n > dn:
+		return di, ErrShortBuffer
+	case n >= sn:
+		// incompressible
+		return 0, nil
+	}
+	di += copy(dst[di:], src)
+	return di, nil
+}

+ 13 - 0
export_test.go

@@ -0,0 +1,13 @@
+// Expose some internals for testing purposes
+package lz4
+
+// expose the possible block max sizes
+var BlockMaxSizeItems []int
+
+func init() {
+	for s := range bsMapValue {
+		BlockMaxSizeItems = append(BlockMaxSizeItems, s)
+	}
+}
+
+var FrameSkipMagic = frameSkipMagic

+ 103 - 0
lz4.go

@@ -0,0 +1,103 @@
+// Package lz4 implements reading and writing lz4 compressed data (a frame),
+// as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html,
+// using an io.Reader (decompression) and io.Writer (compression).
+// It is designed to minimize memory usage while maximizing throughput by being able to
+// [de]compress data concurrently.
+//
+// The Reader and the Writer support concurrent processing provided the supplied buffers are
+// large enough (in multiples of BlockMaxSize) and there is no block dependency.
+// Reader.WriteTo and Writer.ReadFrom do leverage the concurrency transparently.
+// The runtime.GOMAXPROCS() value is used to apply concurrency or not.
+//
+// Although the block level compression and decompression functions are exposed and are fully compatible
+// with the lz4 block format definition, they are low level and should not be used directly.
+// For a complete description of an lz4 compressed block, see:
+// http://fastcompression.blogspot.fr/2011/05/lz4-explained.html
+//
+// See https://github.com/Cyan4973/lz4 for the reference C implementation.
+package lz4
+
+import (
+	"hash"
+	"sync"
+
+	"github.com/pierrec/xxHash/xxHash32"
+)
+
+const (
+	// Extension is the LZ4 frame file name extension
+	Extension = ".lz4"
+	// Version is the LZ4 frame format version
+	Version = 1
+
+	frameMagic     = uint32(0x184D2204)
+	frameSkipMagic = uint32(0x184D2A50)
+
+	// The following constants are used to setup the compression algorithm.
+	minMatch   = 4  // the minimum size of the match sequence size (4 bytes)
+	winSizeLog = 16 // LZ4 64Kb window size limit
+	winSize    = 1 << winSizeLog
+	winMask    = winSize - 1 // 64Kb window of previous data for dependent blocks
+
+	// hashLog determines the size of the hash table used to quickly find a previous match position.
+	// Its value influences the compression speed and memory usage, the lower the faster,
+	// but at the expense of the compression ratio.
+	// 16 seems to be the best compromise.
+	hashLog = 16
+
+	mfLimit      = 8 + minMatch // The last match cannot start within the last 12 bytes.
+	skipStrength = 6            // variable step for fast scan
+
+	hasher = uint32(2654435761) // prime number used to hash minMatch
+)
+
+// map the block max size id with its value in bytes: 64Kb, 256Kb, 1Mb and 4Mb.
+var bsMapID = map[byte]int{4: 64 << 10, 5: 256 << 10, 6: 1 << 20, 7: 4 << 20}
+var bsMapValue = map[int]byte{}
+
+// Reversed.
+func init() {
+	for i, v := range bsMapID {
+		bsMapValue[v] = i
+	}
+}
+
+// Header describes the various flags that can be set on a Writer or obtained from a Reader.
+// The default values match those of the LZ4 frame format definition (http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html).
+//
+// NB. in a Reader, in case of concatenated frames, the Header values may change between Read() calls.
+// It is the caller responsibility to check them if necessary (typically when using the Reader concurrency).
+type Header struct {
+	BlockDependency bool   // compressed blocks are dependent (one block depends on the last 64Kb of the previous one)
+	BlockChecksum   bool   // compressed blocks are checksumed
+	NoChecksum      bool   // frame checksum
+	BlockMaxSize    int    // the size of the decompressed data block (one of [64KB, 256KB, 1MB, 4MB]). Default=4MB.
+	Size            uint64 // the frame total size. It is _not_ computed by the Writer.
+	HighCompression bool   // use high compression (only for the Writer)
+	done            bool   // whether the descriptor was processed (Read or Write and checked)
+	// Removed as not supported
+	// 	Dict            bool   // a dictionary id is to be used
+	// 	DictID          uint32 // the dictionary id read from the frame, if any.
+}
+
+// xxhPool wraps the standard pool for xxHash items.
+// Putting items back in the pool automatically resets them.
+type xxhPool struct {
+	sync.Pool
+}
+
+func (p *xxhPool) Get() hash.Hash32 {
+	return p.Pool.Get().(hash.Hash32)
+}
+
+func (p *xxhPool) Put(h hash.Hash32) {
+	h.Reset()
+	p.Pool.Put(h)
+}
+
+// hashPool is used by readers and writers and contains xxHash items.
+var hashPool = xxhPool{
+	Pool: sync.Pool{
+		New: func() interface{} { return xxHash32.New(0) },
+	},
+}

+ 567 - 0
lz4_test.go

@@ -0,0 +1,567 @@
+package lz4_test
+
+import (
+	"bytes"
+	"crypto/rand"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math/big"
+	"reflect"
+	"testing"
+
+	"github.com/pierrec/lz4"
+)
+
+// testBuffer wraps bytes.Buffer to remove the WriteTo() and ReadFrom() methods.
+type testBuffer struct {
+	buf *bytes.Buffer
+}
+
+func (b *testBuffer) Read(buf []byte) (int, error) {
+	return b.buf.Read(buf)
+}
+
+func (b *testBuffer) Write(buf []byte) (int, error) {
+	return b.buf.Write(buf)
+}
+
+func (b *testBuffer) Len() int {
+	return b.buf.Len()
+}
+
+func (b *testBuffer) Bytes() []byte {
+	return b.buf.Bytes()
+}
+
+// testData represents a test data item. It is really used to provide a human readable label to a slice of bytes.
+type testData struct {
+	label string
+	data  []byte
+}
+
+// testHeader represents a test data item. It is really used to provide a human readable label to an LZ4 header.
+type testHeader struct {
+	label  string
+	header lz4.Header
+}
+
+// compareHeaders... compares 2 lz4 headers.
+func compareHeaders(h, hh lz4.Header, t *testing.T) {
+	ok := true
+	if h.BlockDependency != hh.BlockDependency {
+		t.Errorf("BlockDependency: expected %v, got %v", h.BlockDependency, hh.BlockDependency)
+		ok = false
+	}
+	if h.BlockChecksum != hh.BlockChecksum {
+		t.Errorf("BlockChecksum: expected %v, got %v", h.BlockChecksum, hh.BlockChecksum)
+		ok = false
+	}
+	if h.NoChecksum != hh.NoChecksum {
+		t.Errorf("NoChecksum: expected %v, got %v", h.NoChecksum, hh.NoChecksum)
+		ok = false
+	}
+	if h.BlockMaxSize != hh.BlockMaxSize {
+		t.Errorf("BlockMaxSize: expected %d, got %d", h.BlockMaxSize, hh.BlockMaxSize)
+		ok = false
+	}
+	if h.Size != hh.Size {
+		t.Errorf("Size: expected %d, got %d", h.Size, hh.Size)
+		ok = false
+	}
+	// 	if h.Dict != hh.Dict {
+	// 		t.Errorf("Dict: expected %d, got %d", h.Dict, hh.Dict)
+	// 		ok = false
+	// 	}
+	// 	if h.DictID != hh.DictID {
+	// 		t.Errorf("DictID: expected %d, got %d", h.DictID, hh.DictID)
+	// 		ok = false
+	// 	}
+	if !ok {
+		t.FailNow()
+	}
+}
+
+var (
+	lorem = []byte("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.")
+	// Initial data items used for testing. More are added with random and other kind of data.
+	testDataItems = []testData{
+		testData{"empty", nil},
+		testData{
+			"small pattern",
+			[]byte("aaaaaaaaaaaaaaaaaaa"),
+		},
+		testData{
+			"small pattern long",
+			[]byte("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
+		},
+		testData{
+			"medium pattern",
+			[]byte("abcdefghijklmnopqabcdefghijklmnopq"),
+		},
+		testData{
+			"lorem",
+			lorem,
+		},
+	}
+	testHeaderItems = []testHeader{}
+)
+
+// Build the list of all possible headers with the default values + the ones defined in the map.
+func buildHeaders(options map[string][]interface{}) []testHeader {
+	testHeaderItems := make([]testHeader, 1)
+	for fieldName, fieldData := range options {
+		for _, o := range fieldData {
+			for _, d := range testHeaderItems {
+				s := reflect.ValueOf(&d.header).Elem()
+				t := s.Type()
+				for i := 0; i < s.NumField(); i++ {
+					if t.Field(i).Name == fieldName {
+						switch f := s.Field(i); f.Kind() {
+						case reflect.Bool:
+							f.SetBool(o.(bool))
+						case reflect.Int:
+							f.SetInt(int64(o.(int)))
+						case reflect.Int64:
+							switch o.(type) {
+							case int:
+								f.SetInt(int64(o.(int)))
+							default:
+								f.SetInt(o.(int64))
+							}
+						case reflect.Uint32:
+							switch o.(type) {
+							case int:
+								f.SetUint(uint64(o.(int)))
+							default:
+								f.SetUint(uint64(o.(uint32)))
+							}
+						case reflect.Uint64:
+							switch o.(type) {
+							case int:
+								f.SetUint(uint64(o.(int)))
+							default:
+								f.SetUint(o.(uint64))
+							}
+						default:
+							panic(fmt.Sprintf("unsupported type: %v", f.Kind()))
+						}
+						d.label = fmt.Sprintf("%+v", d.header)
+						testHeaderItems = append(testHeaderItems, d)
+						break
+					}
+				}
+			}
+		}
+	}
+
+	for i, n := 0, len(testHeaderItems); i < n; {
+		testHeaderItem := testHeaderItems[i]
+		// remove the 0 BlockMaxSize value as it is invalid and we have provisioned all possible values already.
+		if testHeaderItem.header.BlockMaxSize == 0 {
+			n--
+			testHeaderItems[i], testHeaderItems = testHeaderItems[n], testHeaderItems[:n]
+		} else {
+			testHeaderItem.label = fmt.Sprintf("%+v", testHeaderItem)
+			i++
+		}
+	}
+
+	return testHeaderItems
+}
+
+// Generate all possible LZ4 headers.
+func init() {
+	// Only set the relevant headers having an impact on the comrpession.
+	seed := map[string][]interface{}{
+		"BlockDependency": []interface{}{true},
+		"BlockChecksum":   []interface{}{true},
+		"NoChecksum":      []interface{}{true},
+		// "Dict":            []interface{}{true},
+		// Enabling this substantially increase the testing time.
+		// As this test is not really required it is disabled.
+		// "HighCompression": []interface{}{true},
+	}
+	for _, bms := range lz4.BlockMaxSizeItems {
+		seed["BlockMaxSize"] = append(seed["BlockMaxSize"], bms)
+	}
+	testHeaderItems = buildHeaders(seed)
+}
+
+// Initialize the test data with various sizes of uncompressible and compressible data.
+func init() {
+	maxSize := 10 << 20 // > max block max size of 4Mb
+
+	// repeated data with very high compression ratio
+	repeat := make([]byte, maxSize)
+	for i := copy(repeat, lorem); i < len(repeat); {
+		i += copy(repeat[i:], repeat[:i])
+	}
+
+	// repeated data with small compression ratio
+	repeatlow := make([]byte, maxSize)
+	for i := 0; i < len(repeatlow); {
+		i += copy(repeatlow[i:], lorem)
+		// randomly skip some bytes to make sure the pattern does not repeat too much
+		n, _ := rand.Int(rand.Reader, big.NewInt(int64(10)))
+		i += int(n.Int64())
+	}
+
+	// random data: low to no compression
+	random := make([]byte, maxSize)
+	if _, err := rand.Read(random); err != nil {
+		panic(fmt.Sprintf("cannot initialize random data for size %d", maxSize))
+	}
+
+	// generate some test data with various sizes and kind of data: all valid block max sizes + others
+	for _, size := range lz4.BlockMaxSizeItems {
+		testDataItems = append(
+			testDataItems,
+			testData{fmt.Sprintf("random %d", size), random[:size]},
+			testData{fmt.Sprintf("random < %d", size), random[:size/3]},
+			testData{fmt.Sprintf("repeated %d", size), repeat[:size]},
+			testData{fmt.Sprintf("repeated < %d", size), repeat[:size/3]},
+		)
+	}
+}
+
+// Test low levels core functions:
+// a. compress and compare with supplied data if any
+// b. decompress the previous data and compare it with the original one
+func TestBlock(t *testing.T) {
+	for _, compress := range []func([]byte, []byte, int) (int, error){
+		lz4.CompressBlock,
+		lz4.CompressBlockHC,
+	} {
+		for _, item := range testDataItems {
+			data := item.data
+			z := make([]byte, lz4.CompressBlockBound(len(data)))
+			n, err := compress(data, z, 0)
+			if n == 0 { // not compressible
+				continue
+			}
+			if err != nil {
+				t.Errorf("CompressBlock: %s", err)
+				t.FailNow()
+			}
+			z = z[:n]
+			d := make([]byte, len(data))
+			n, err = lz4.UncompressBlock(z, d, 0)
+			if err != nil {
+				t.Errorf("UncompressBlock: %s", err)
+				t.FailNow()
+			}
+			d = d[:n]
+			if !bytes.Equal(d, data) {
+				t.Errorf("invalid decompressed data: %s: %s", item.label, string(d))
+				t.FailNow()
+			}
+		}
+	}
+}
+
+func BenchmarkUncompressBlock(b *testing.B) {
+	d := make([]byte, len(lorem))
+	z := make([]byte, len(lorem))
+	n, err := lz4.CompressBlock(lorem, z, 0)
+	if err != nil {
+		b.Errorf("CompressBlock: %s", err)
+		b.FailNow()
+	}
+	z = z[:n]
+	for i := 0; i < b.N; i++ {
+		lz4.UncompressBlock(z, d, 0)
+	}
+}
+
+func BenchmarkCompressBlock(b *testing.B) {
+	d := make([]byte, len(lorem))
+	z := make([]byte, len(lorem))
+	n, err := lz4.CompressBlock(lorem, z, 0)
+	if err != nil {
+		b.Errorf("CompressBlock: %s", err)
+		b.FailNow()
+	}
+	z = z[:n]
+	for i := 0; i < b.N; i++ {
+		lz4.CompressBlock(z, d, 0)
+	}
+}
+
+func BenchmarkCompressBlockHC(b *testing.B) {
+	d := make([]byte, len(lorem))
+	z := make([]byte, len(lorem))
+	n, err := lz4.CompressBlockHC(lorem, z, 0)
+	if err != nil {
+		b.Errorf("CompressBlock: %s", err)
+		b.FailNow()
+	}
+	z = z[:n]
+	for i := 0; i < b.N; i++ {
+		lz4.CompressBlockHC(z, d, 0)
+	}
+}
+
+// TestNoWrite compresses without any call to Write() (empty frame).
+// It does so checking all possible headers.
+func TestNoWrite(t *testing.T) {
+	// that is 2*2*2*2*2*2^4 = 512 headers!
+	seed := map[string][]interface{}{
+		"BlockDependency": []interface{}{true},
+		"BlockChecksum":   []interface{}{true},
+		"NoChecksum":      []interface{}{true},
+		"Size":            []interface{}{999},
+		// "Dict":            []interface{}{true},
+		// Enabling this substantially increase the testing time.
+		// As this test is not really required it is disabled.
+		// "HighCompression": []interface{}{true},
+	}
+	for _, bms := range lz4.BlockMaxSizeItems {
+		seed["BlockMaxSize"] = append(seed["BlockMaxSize"], bms)
+	}
+	testHeaderItems := buildHeaders(seed)
+
+	for _, h := range testHeaderItems {
+		rw := bytes.NewBuffer(nil)
+
+		w := lz4.NewWriter(rw)
+		w.Header = h.header
+		if err := w.Close(); err != nil {
+			t.Errorf("Close(): unexpected error: %v", err)
+			t.FailNow()
+		}
+
+		r := lz4.NewReader(rw)
+		n, err := r.Read(nil)
+		if err != nil {
+			t.Errorf("Read(): unexpected error: %v", err)
+			t.FailNow()
+		}
+		if n != 0 {
+			t.Errorf("expected 0 bytes read, got %d", n)
+			t.FailNow()
+		}
+
+		buf := make([]byte, 16)
+		n, err = r.Read(buf)
+		if err != nil && err != io.EOF {
+			t.Errorf("Read(): unexpected error: %v", err)
+			t.FailNow()
+		}
+		if n != 0 {
+			t.Errorf("expected 0 bytes read, got %d", n)
+			t.FailNow()
+		}
+	}
+}
+
+// TestReset tests that the Reset() method resets the header on the Reader and Writer.
+func TestReset(t *testing.T) {
+	h := lz4.Header{
+		BlockDependency: true,
+		BlockChecksum:   true,
+		NoChecksum:      true,
+		BlockMaxSize:    123,
+		Size:            999,
+		// Dict:            true,
+		// DictID:          555,
+	}
+	dh := lz4.Header{}
+
+	w := lz4.NewWriter(nil)
+	w.Header = h
+	w.Reset(nil)
+	compareHeaders(w.Header, dh, t)
+
+	r := lz4.NewReader(nil)
+	r.Header = h
+	r.Reset(nil)
+	compareHeaders(r.Header, dh, t)
+}
+
+// TestFrame compresses and decompresses LZ4 streams with various input data and options.
+func TestFrame(t *testing.T) {
+	for _, tdata := range testDataItems {
+		data := tdata.data
+		// test various options
+		for _, headerItem := range testHeaderItems {
+			tag := tdata.label + ": " + headerItem.label
+			rw := bytes.NewBuffer(nil)
+
+			// Set all options to non default values and compress
+			w := lz4.NewWriter(rw)
+			w.Header = headerItem.header
+
+			n, err := w.Write(data)
+			if err != nil {
+				t.Errorf("%s: Write(): unexpected error: %v", tag, err)
+				t.FailNow()
+			}
+			if n != len(data) {
+				t.Errorf("%s: Write(): expected %d bytes written, got %d", tag, len(data), n)
+				t.FailNow()
+			}
+			if err = w.Close(); err != nil {
+				t.Errorf("%s: Close(): unexpected error: %v", tag, err)
+				t.FailNow()
+			}
+
+			// Decompress
+			r := lz4.NewReader(rw)
+			n, err = r.Read(nil)
+			if err != nil {
+				t.Errorf("%s: Read(): unexpected error: %v", tag, err)
+				t.FailNow()
+			}
+			if n != 0 {
+				t.Errorf("%s: Read(): expected 0 bytes read, got %d", tag, n)
+			}
+
+			buf := make([]byte, len(data))
+			n, err = r.Read(buf)
+			if err != nil && err != io.EOF {
+				t.Errorf("%s: Read(): unexpected error: %v", tag, err)
+				t.FailNow()
+			}
+			if n != len(data) {
+				t.Errorf("%s: Read(): expected %d bytes read, got %d", tag, len(data), n)
+			}
+			buf = buf[:n]
+			if !bytes.Equal(buf, data) {
+				t.Errorf("%s: decompress(compress(data)) != data (%d/%d)", tag, len(buf), len(data))
+				t.FailNow()
+			}
+
+			compareHeaders(w.Header, r.Header, t)
+		}
+	}
+}
+
+// TestReadFromWriteTo tests the Reader.WriteTo() and Writer.ReadFrom() methods.
+func TestReadFromWriteTo(t *testing.T) {
+	for _, tdata := range testDataItems {
+		data := tdata.data
+
+		// test various options
+		for _, headerItem := range testHeaderItems {
+			tag := "ReadFromWriteTo: " + tdata.label + ": " + headerItem.label
+			dbuf := bytes.NewBuffer(data)
+
+			zbuf := bytes.NewBuffer(nil)
+			w := lz4.NewWriter(zbuf)
+			w.Header = headerItem.header
+			if _, err := w.ReadFrom(dbuf); err != nil {
+				t.Errorf("%s: unexpected error: %s", tag, err)
+				t.FailNow()
+			}
+
+			if err := w.Close(); err != nil {
+				t.Errorf("%s: unexpected error: %s", tag, err)
+				t.FailNow()
+			}
+
+			buf := bytes.NewBuffer(nil)
+			r := lz4.NewReader(zbuf)
+			if _, err := r.WriteTo(buf); err != nil {
+				t.Errorf("%s: unexpected error: %s", tag, err)
+				t.FailNow()
+			}
+
+			if !bytes.Equal(buf.Bytes(), data) {
+				t.Errorf("%s: decompress(compress(data)) != data (%d/%d)", tag, buf.Len(), len(data))
+				t.FailNow()
+			}
+		}
+	}
+}
+
+// TestCopy will use io.Copy and avoid using Reader.WriteTo() and Writer.ReadFrom().
+func TestCopy(t *testing.T) {
+	w := lz4.NewWriter(nil)
+	r := lz4.NewReader(nil)
+	for _, tdata := range testDataItems {
+		data := tdata.data
+
+		// test various options
+		for _, headerItem := range testHeaderItems {
+			tag := "io.Copy: " + tdata.label + ": " + headerItem.label
+			dbuf := &testBuffer{bytes.NewBuffer(data)}
+
+			zbuf := bytes.NewBuffer(nil)
+			w.Reset(zbuf)
+			w.Header = headerItem.header
+			if _, err := io.Copy(w, dbuf); err != nil {
+				t.Errorf("%s: unexpected error: %s", tag, err)
+				t.FailNow()
+			}
+
+			if err := w.Close(); err != nil {
+				t.Errorf("%s: unexpected error: %s", tag, err)
+				t.FailNow()
+			}
+
+			buf := &testBuffer{bytes.NewBuffer(nil)}
+			r.Reset(zbuf)
+			if _, err := io.Copy(buf, r); err != nil {
+				t.Errorf("%s: unexpected error: %s", tag, err)
+				t.FailNow()
+			}
+
+			if !bytes.Equal(buf.Bytes(), data) {
+				t.Errorf("%s: decompress(compress(data)) != data (%d/%d)", tag, buf.Len(), len(data))
+				t.FailNow()
+			}
+		}
+	}
+}
+
+func TestSkippable(t *testing.T) {
+	w := lz4.NewWriter(nil)
+	r := lz4.NewReader(nil)
+
+	skippable := make([]byte, 1<<20)
+	binary.LittleEndian.PutUint32(skippable, lz4.FrameSkipMagic)
+	binary.LittleEndian.PutUint32(skippable[4:], uint32(len(skippable)-8))
+
+	buf := make([]byte, len(lorem))
+
+	tag := "skippable first"
+	zbuf := bytes.NewBuffer(skippable)
+	w.Reset(zbuf)
+	w.Write(lorem)
+	w.Close()
+
+	r.Reset(zbuf)
+	if _, err := r.Read(buf); err != nil {
+		t.Errorf("%s: unexpected error: %s", tag, err)
+		t.FailNow()
+	}
+
+	tag = "skippable last"
+	zbuf = bytes.NewBuffer(nil)
+	w.Reset(zbuf)
+	w.Write(lorem)
+	w.Close()
+	zbuf.Write(skippable)
+
+	r.Reset(zbuf)
+	if _, err := r.Read(buf); err != nil {
+		t.Errorf("%s: unexpected error: %s", tag, err)
+		t.FailNow()
+	}
+
+	tag = "skippable middle"
+	zbuf = bytes.NewBuffer(nil)
+	w.Reset(zbuf)
+	w.Write(lorem)
+	zbuf.Write(skippable)
+	w.Write(lorem)
+	w.Close()
+
+	r.Reset(zbuf)
+	if _, err := r.Read(buf); err != nil {
+		t.Errorf("%s: unexpected error: %s", tag, err)
+		t.FailNow()
+	}
+
+}

+ 105 - 0
lz4c/main.go

@@ -0,0 +1,105 @@
+// Command line utility for the lz4 package.
+package main
+
+import (
+	// 	"bytes"
+
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"path"
+	"runtime"
+	"strings"
+
+	"github.com/pierrec/lz4"
+)
+
+func main() {
+	// Process command line arguments
+	var (
+		blockMaxSizeDefault = 4 << 20
+		flagStdout          = flag.Bool("c", false, "output to stdout")
+		flagDecompress      = flag.Bool("d", false, "decompress flag")
+		flagBlockMaxSize    = flag.Int("B", blockMaxSizeDefault, "block max size [64Kb,256Kb,1Mb,4Mb]")
+		flagBlockDependency = flag.Bool("BD", false, "enable block dependency")
+		flagBlockChecksum   = flag.Bool("BX", false, "enable block checksum")
+		flagStreamChecksum  = flag.Bool("Sx", false, "disable stream checksum")
+		flagHighCompression = flag.Bool("9", false, "enabled high compression")
+	)
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr, "Usage:\n\t%s [arg] [input]...\n\tNo input means [de]compress stdin to stdout\n\n", os.Args[0])
+		flag.PrintDefaults()
+	}
+	flag.Parse()
+
+	// Use all CPUs
+	runtime.GOMAXPROCS(runtime.NumCPU())
+
+	zr := lz4.NewReader(nil)
+	zw := lz4.NewWriter(nil)
+	zh := lz4.Header{
+		BlockDependency: *flagBlockDependency,
+		BlockChecksum:   *flagBlockChecksum,
+		BlockMaxSize:    *flagBlockMaxSize,
+		NoChecksum:      *flagStreamChecksum,
+		HighCompression: *flagHighCompression,
+	}
+
+	worker := func(in io.Reader, out io.Writer) {
+		if *flagDecompress {
+			zr.Reset(in)
+			if _, err := io.Copy(out, zr); err != nil {
+				log.Fatalf("Error while decompressing input: %v", err)
+			}
+		} else {
+			zw.Reset(out)
+			zw.Header = zh
+			if _, err := io.Copy(zw, in); err != nil {
+				log.Fatalf("Error while compressing input: %v", err)
+			}
+		}
+	}
+
+	// No input means [de]compress stdin to stdout
+	if len(flag.Args()) == 0 {
+		worker(os.Stdin, os.Stdout)
+		os.Exit(0)
+	}
+
+	// Compress or decompress all input files
+	for _, inputFileName := range flag.Args() {
+		outputFileName := path.Clean(inputFileName)
+
+		if !*flagStdout {
+			if *flagDecompress {
+				outputFileName = strings.TrimSuffix(outputFileName, lz4.Extension)
+				if outputFileName == inputFileName {
+					log.Fatalf("Invalid output file name: same as input: %s", inputFileName)
+				}
+			} else {
+				outputFileName += lz4.Extension
+			}
+		}
+
+		inputFile, err := os.Open(inputFileName)
+		if err != nil {
+			log.Fatalf("Error while opening input: %v", err)
+		}
+
+		outputFile := os.Stdout
+		if !*flagStdout {
+			outputFile, err = os.Create(outputFileName)
+			if err != nil {
+				log.Fatalf("Error while opening output: %v", err)
+			}
+		}
+		worker(inputFile, outputFile)
+
+		inputFile.Close()
+		if !*flagStdout {
+			outputFile.Close()
+		}
+	}
+}

+ 351 - 0
reader.go

@@ -0,0 +1,351 @@
+package lz4
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"hash"
+	"io"
+	"io/ioutil"
+	"runtime"
+	"sync"
+	"sync/atomic"
+)
+
+// errEndOfBlock is returned by readBlock when it has reached the last block of the frame.
+// It is not an error.
+var errEndOfBlock = errors.New("end of block")
+
+// Reader implements the LZ4 frame decoder.
+// The Header is set after the first call to Read().
+// The Header may change between Read() calls in case of concatenated frames.
+type Reader struct {
+	Header
+	Pos      int64 // position within the source
+	src      io.Reader
+	checksum hash.Hash32    // frame hash
+	wg       sync.WaitGroup // decompressing go routine wait group
+	data     []byte         // buffered decompressed data
+	window   []byte         // 64Kb decompressed data window
+}
+
+// NewReader returns a new LZ4 frame decoder.
+// No access to the underlying io.Reader is performed.
+func NewReader(src io.Reader) *Reader {
+	return &Reader{
+		src:      src,
+		checksum: hashPool.Get(),
+	}
+}
+
+// readHeader checks the frame magic number and parses the frame descriptoz.
+// Skippable frames are supported even as a first frame although the LZ4
+// specifications recommends skippable frames not to be used as first frames.
+func (z *Reader) readHeader(first bool) error {
+	defer z.checksum.Reset()
+
+	for {
+		var magic uint32
+		if err := binary.Read(z.src, binary.LittleEndian, &magic); err != nil {
+			if !first && err == io.ErrUnexpectedEOF {
+				return io.EOF
+			}
+			return err
+		}
+		z.Pos += 4
+		if magic>>8 == frameSkipMagic>>8 {
+			var skipSize uint32
+			if err := binary.Read(z.src, binary.LittleEndian, &skipSize); err != nil {
+				return err
+			}
+			z.Pos += 4
+			m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize))
+			z.Pos += m
+			if err != nil {
+				return err
+			}
+			continue
+		}
+		if magic != frameMagic {
+			return fmt.Errorf("lz4.Read: invalid frame magic number: got %x expected %x", magic, frameMagic)
+		}
+		break
+	}
+
+	// header
+	var buf [8]byte
+	if _, err := io.ReadFull(z.src, buf[:2]); err != nil {
+		return err
+	}
+	z.Pos += 2
+
+	b := buf[0]
+	if b>>6 != Version {
+		return fmt.Errorf("lz4.Read: invalid version: got %d expected %d", b>>6, Version)
+	}
+	z.BlockDependency = b>>5&1 == 0
+	z.BlockChecksum = b>>4&1 > 0
+	frameSize := b>>3&1 > 0
+	z.NoChecksum = b>>2&1 == 0
+	// 	z.Dict = b&1 > 0
+
+	bmsID := buf[1] >> 4 & 0x7
+	bSize, ok := bsMapID[bmsID]
+	if !ok {
+		return fmt.Errorf("lz4.Read: invalid block max size: %d", bmsID)
+	}
+	z.BlockMaxSize = bSize
+
+	z.checksum.Write(buf[0:2])
+
+	if frameSize {
+		if err := binary.Read(z.src, binary.LittleEndian, &z.Size); err != nil {
+			return err
+		}
+		z.Pos += 8
+		binary.LittleEndian.PutUint64(buf[:], z.Size)
+		z.checksum.Write(buf[0:8])
+	}
+
+	// 	if z.Dict {
+	// 		if err := binary.Read(z.src, binary.LittleEndian, &z.DictID); err != nil {
+	// 			return err
+	// 		}
+	// 		z.Pos += 4
+	// 		binary.LittleEndian.PutUint32(buf[:], z.DictID)
+	// 		z.checksum.Write(buf[0:4])
+	// 	}
+
+	// header checksum
+	if _, err := io.ReadFull(z.src, buf[:1]); err != nil {
+		return err
+	}
+	z.Pos++
+	if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] {
+		return fmt.Errorf("lz4.Read: invalid header checksum: got %v expected %v", buf[0], h)
+	}
+
+	z.Header.done = true
+
+	return nil
+}
+
+// Read decompresses data from the underlying source into the supplied buffer.
+//
+// Since there can be multiple streams concatenated, Header values may
+// change between calls to Read(). If that is the case, no data is actually read from
+// the underlying io.Reader, to allow for potential input buffer resizing.
+//
+// Data is buffered if the input buffer is too small, and exhausted upon successive calls.
+//
+// If the buffer is large enough (typically in multiples of BlockMaxSize) and there is
+// no block dependency, then the data will be decompressed concurrently based on the GOMAXPROCS value.
+func (z *Reader) Read(buf []byte) (n int, err error) {
+	if !z.Header.done {
+		if err = z.readHeader(true); err != nil {
+			return
+		}
+	}
+
+	if len(buf) == 0 {
+		return
+	}
+
+	// exhaust remaining data from previous Read()
+	if len(z.data) > 0 {
+		n = copy(buf, z.data)
+		z.data = z.data[n:]
+		if len(z.data) == 0 {
+			z.data = nil
+		}
+		return
+	}
+
+	// Break up the input buffer into BlockMaxSize blocks with at least one block.
+	// Then decompress into each of them concurrently if possible (no dependency).
+	// In case of dependency, the first block will be missing the window (except on the
+	// very first call), the rest will have it already since it comes from the previous block.
+	wbuf := buf
+	zn := (len(wbuf) + z.BlockMaxSize - 1) / z.BlockMaxSize
+	zblocks := make([]block, zn)
+	for zi, abort := 0, uint32(0); zi < zn && atomic.LoadUint32(&abort) == 0; zi++ {
+		zb := &zblocks[zi]
+		// last block may be too small
+		if len(wbuf) < z.BlockMaxSize+len(z.window) {
+			wbuf = make([]byte, z.BlockMaxSize+len(z.window))
+		}
+		copy(wbuf, z.window)
+		if zb.err = z.readBlock(wbuf, zb); zb.err != nil {
+			break
+		}
+		wbuf = wbuf[z.BlockMaxSize:]
+		if !z.BlockDependency {
+			z.wg.Add(1)
+			go z.decompressBlock(zb, &abort)
+			continue
+		}
+		// cannot decompress concurrently when dealing with block dependency
+		z.decompressBlock(zb, nil)
+		// the last block may not contain enough data
+		if len(zb.data) >= winSize {
+			if len(z.window) == 0 {
+				z.window = make([]byte, winSize)
+			}
+			copy(z.window, zb.data[len(zb.data)-winSize:])
+		}
+	}
+	z.wg.Wait()
+
+	// since a block size may be less then BlockMaxSize, trim the decompressed buffers
+	for _, zb := range zblocks {
+		if zb.err != nil {
+			if zb.err == errEndOfBlock {
+				return n, z.close()
+			}
+			return n, zb.err
+		}
+		bLen := len(zb.data)
+		z.checksum.Write(zb.data)
+		m := copy(buf[n:], zb.data)
+		// buffer the remaining data (this is necessarily the last block)
+		if m < bLen {
+			z.data = zb.data[m:]
+		}
+		n += m
+	}
+
+	return
+}
+
+// readBlock reads an entire frame block from the frame.
+// The input buffer is the one that will receive the decompressed data.
+// If the end of the frame is detected, it returns the errEndOfBlock error.
+func (z *Reader) readBlock(buf []byte, b *block) error {
+	var bLen int32
+	if err := binary.Read(z.src, binary.LittleEndian, &bLen); err != nil {
+		return err
+	}
+	z.Pos += 4
+
+	switch {
+	case bLen == 0:
+		return errEndOfBlock
+	case bLen > 0:
+		b.compressed = true
+		b.data = buf
+		b.zdata = make([]byte, bLen)
+	default:
+		b.data = buf[:-bLen]
+		b.zdata = buf[:-bLen]
+	}
+	if _, err := io.ReadFull(z.src, b.zdata); err != nil {
+		return err
+	}
+
+	if z.BlockChecksum {
+		if err := binary.Read(z.src, binary.LittleEndian, &b.checksum); err != nil {
+			return err
+		}
+		xxh := hashPool.Get()
+		defer hashPool.Put(xxh)
+		xxh.Write(b.zdata)
+		if h := xxh.Sum32(); h != b.checksum {
+			return fmt.Errorf("lz4.Read: invalid block checksum: got %x expected %x", h, b.checksum)
+		}
+	}
+
+	return nil
+}
+
+// decompressBlock decompresses a frame block.
+// In case of an error, the block err is set with it and abort is set to 1.
+func (z *Reader) decompressBlock(b *block, abort *uint32) {
+	if abort != nil {
+		defer z.wg.Done()
+	}
+	if b.compressed {
+		n := len(z.window)
+		m, err := UncompressBlock(b.zdata, b.data, n)
+		if err != nil {
+			if abort != nil {
+				atomic.StoreUint32(abort, 1)
+			}
+			b.err = err
+			return
+		}
+		b.data = b.data[n : n+m]
+	}
+	z.Pos += int64(len(b.data))
+}
+
+// close validates the frame checksum (if any) and checks the next frame (if any).
+func (z *Reader) close() error {
+	if !z.NoChecksum {
+		var checksum uint32
+		if err := binary.Read(z.src, binary.LittleEndian, &checksum); err != nil {
+			return err
+		}
+		if checksum != z.checksum.Sum32() {
+			return fmt.Errorf("lz4.Read: invalid frame checksum: got %x expected %x", z.checksum.Sum32(), checksum)
+		}
+	}
+
+	// get ready for the next concatenated frame, but do not change the position
+	pos := z.Pos
+	z.Reset(z.src)
+	z.Pos = pos
+
+	// since multiple frames can be concatenated, check for another one
+	return z.readHeader(false)
+}
+
+// Reset discards the Reader's state and makes it equivalent to the
+// result of its original state from NewReader, but reading from r instead.
+// This permits reusing a Reader rather than allocating a new one.
+func (z *Reader) Reset(r io.Reader) {
+	z.Header = Header{}
+	z.Pos = 0
+	z.src = r
+	z.checksum.Reset()
+	z.data = nil
+	z.window = nil
+}
+
+// WriteTo decompresses the data from the underlying io.Reader and writes it to the io.Writer.
+// Returns the number of bytes written.
+func (z *Reader) WriteTo(w io.Writer) (n int64, err error) {
+	cpus := runtime.GOMAXPROCS(0)
+	var buf []byte
+
+	// The initial buffer being nil, the first Read will be only read the compressed frame options.
+	// The buffer can then be sized appropriately to support maximum concurrency decompression.
+	// If multiple frames are concatenated, Read() will return with no data decompressed but with
+	// potentially changed options. The buffer will be resized accordingly, always trying to
+	// maximize concurrency.
+	for {
+		nsize := 0
+		// the block max size can change if multiple streams are concatenated.
+		// Check it after every Read().
+		if z.BlockDependency {
+			// in case of dependency, we cannot decompress concurrently,
+			// so allocate the minimum buffer + window size
+			nsize = len(z.window) + z.BlockMaxSize
+		} else {
+			// if no dependency, allocate a buffer large enough for concurrent decompression
+			nsize = cpus * z.BlockMaxSize
+		}
+		if nsize != len(buf) {
+			buf = make([]byte, nsize)
+		}
+
+		m, er := z.Read(buf)
+		if er != nil && er != io.EOF {
+			return n, er
+		}
+		m, err = w.Write(buf[:m])
+		n += int64(m)
+		if err != nil || er == io.EOF {
+			return
+		}
+	}
+}

+ 354 - 0
writer.go

@@ -0,0 +1,354 @@
+package lz4
+
+import (
+	"encoding/binary"
+	"fmt"
+	"hash"
+	"io"
+	"runtime"
+	"sync"
+)
+
+// Writer implements the LZ4 frame encoder.
+type Writer struct {
+	Header
+	dst      io.Writer
+	checksum hash.Hash32    // frame checksum
+	wg       sync.WaitGroup // decompressing go routine wait group
+	data     []byte         // data to be compressed, only used when dealing with block dependency as we need 64Kb to work with
+	window   []byte         // last 64KB of decompressed data (block dependency) + blockMaxSize buffer
+}
+
+// NewWriter returns a new LZ4 frame encoder.
+// No access to the underlying io.Writer is performed.
+// The supplied Header is checked at the first Write.
+// It is ok to change it before the first Write but then not until a Reset() is performed.
+func NewWriter(dst io.Writer) *Writer {
+	return &Writer{
+		dst:      dst,
+		checksum: hashPool.Get(),
+		Header: Header{
+			BlockMaxSize: 4 << 20,
+		},
+	}
+}
+
+// writeHeader builds and writes the header (magic+header) to the underlying io.Writer.
+func (z *Writer) writeHeader() error {
+	// Default to 4Mb if BlockMaxSize is not set
+	if z.Header.BlockMaxSize == 0 {
+		z.Header.BlockMaxSize = 4 << 20
+	}
+	// the only option that need to be validated
+	bSize, ok := bsMapValue[z.Header.BlockMaxSize]
+	if !ok {
+		return fmt.Errorf("lz4: invalid block max size: %d", z.Header.BlockMaxSize)
+	}
+
+	// magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes
+	// Size and DictID are optional
+	var buf [19]byte
+
+	// set the fixed size data: magic number, block max size and flags
+	binary.LittleEndian.PutUint32(buf[0:], frameMagic)
+	flg := byte(Version << 6)
+	if !z.Header.BlockDependency {
+		flg |= 1 << 5
+	}
+	if z.Header.BlockChecksum {
+		flg |= 1 << 4
+	}
+	if z.Header.Size > 0 {
+		flg |= 1 << 3
+	}
+	if !z.Header.NoChecksum {
+		flg |= 1 << 2
+	}
+	// 	if z.Header.Dict {
+	// 		flg |= 1
+	// 	}
+	buf[4] = flg
+	buf[5] = bSize << 4
+
+	// current buffer size: magic(4) + flags(1) + block max size (1)
+	n := 6
+	// optional items
+	if z.Header.Size > 0 {
+		binary.LittleEndian.PutUint64(buf[n:], z.Header.Size)
+		n += 8
+	}
+	// 	if z.Header.Dict {
+	// 		binary.LittleEndian.PutUint32(buf[n:], z.Header.DictID)
+	// 		n += 4
+	// 	}
+
+	// header checksum includes the flags, block max size and optional Size and DictID
+	z.checksum.Write(buf[4:n])
+	buf[n] = byte(z.checksum.Sum32() >> 8 & 0xFF)
+	z.checksum.Reset()
+
+	// header ready, write it out
+	if _, err := z.dst.Write(buf[0 : n+1]); err != nil {
+		return err
+	}
+	z.Header.done = true
+
+	return nil
+}
+
+// Write compresses data from the supplied buffer into the underlying io.Writer.
+// Write does not return until the data has been written.
+//
+// If the input buffer is large enough (typically in multiples of BlockMaxSize)
+// the data will be compressed concurrently.
+//
+// Write never buffers any data unless in BlockDependency mode where it may
+// do so until it has 64Kb of data, after which it never buffers any.
+func (z *Writer) Write(buf []byte) (n int, err error) {
+	if !z.Header.done {
+		if err = z.writeHeader(); err != nil {
+			return
+		}
+	}
+
+	if len(buf) == 0 {
+		return
+	}
+
+	if !z.NoChecksum {
+		z.wg.Add(1)
+		go func(b []byte) {
+			z.checksum.Write(b)
+			z.wg.Done()
+		}(buf)
+	}
+
+	// with block dependency, require at least 64Kb of data to work with
+	// not having 64Kb only matters initially to setup the first window
+	if z.BlockDependency && len(z.window) == 0 {
+		z.data = append(z.data, buf...)
+		if len(z.data) < winSize {
+			z.wg.Wait()
+			return len(buf), nil
+		}
+		buf = z.data
+		z.data = nil
+	}
+
+	// Break up the input buffer into BlockMaxSize blocks, provisioning the left over block.
+	// Then compress into each of them concurrently if possible (no dependency).
+	wbuf := buf
+	zn := len(wbuf) / z.BlockMaxSize
+	zblocks := make([]block, zn, zn+1)
+	for zi := 0; zi < zn; zi++ {
+		zb := &zblocks[zi]
+		if z.BlockDependency {
+			if zi == 0 {
+				// first block does not have the window
+				zb.data = append(z.window, wbuf[:z.BlockMaxSize]...)
+				zb.offset = len(z.window)
+				wbuf = wbuf[z.BlockMaxSize-winSize:]
+			} else {
+				// set the uncompressed data including the window from previous block
+				zb.data = wbuf[:z.BlockMaxSize+winSize]
+				zb.offset = winSize
+				wbuf = wbuf[z.BlockMaxSize:]
+			}
+		} else {
+			zb.data = wbuf[:z.BlockMaxSize]
+			wbuf = wbuf[z.BlockMaxSize:]
+		}
+
+		z.wg.Add(1)
+		go z.compressBlock(zb)
+	}
+
+	// left over
+	if len(buf)%z.BlockMaxSize > 0 {
+		zblocks = append(zblocks, block{data: wbuf})
+		zb := &zblocks[zn]
+		if z.BlockDependency {
+			if zn == 0 {
+				zb.data = append(z.window, zb.data...)
+				zb.offset = len(z.window)
+			} else {
+				zb.offset = winSize
+			}
+		}
+		z.wg.Add(1)
+		go z.compressBlock(zb)
+	}
+	z.wg.Wait()
+
+	// outputs the compressed data
+	for zi, zb := range zblocks {
+		_, err = z.writeBlock(&zb)
+
+		n += len(zb.data)
+		// remove the window in zb.data
+		if z.BlockDependency {
+			if zi == 0 {
+				n -= len(z.window)
+			} else {
+				n -= winSize
+			}
+		}
+		if err != nil {
+			return
+		}
+	}
+
+	if z.BlockDependency {
+		if len(z.window) == 0 {
+			z.window = make([]byte, winSize)
+		}
+		// last buffer may be shorter than the window
+		if len(buf) > winSize {
+			copy(z.window, buf[len(buf)-winSize:])
+		}
+	}
+
+	return
+}
+
+// compressBlock compresses a block.
+func (z *Writer) compressBlock(zb *block) {
+	// compressed block size cannot exceed the input's
+	zbuf := make([]byte, len(zb.data)-zb.offset)
+	var (
+		n   int
+		err error
+	)
+	if z.HighCompression {
+		n, err = CompressBlockHC(zb.data, zbuf, zb.offset)
+	} else {
+		n, err = CompressBlock(zb.data, zbuf, zb.offset)
+	}
+
+	// compressible and compressed size smaller than decompressed: ok!
+	if err == nil && n > 0 && len(zb.zdata) < len(zb.data) {
+		zb.compressed = true
+		zb.zdata = zbuf[:n]
+	} else {
+		zb.zdata = zb.data[zb.offset:]
+	}
+
+	if z.BlockChecksum {
+		xxh := hashPool.Get()
+		xxh.Write(zb.zdata)
+		zb.checksum = xxh.Sum32()
+		hashPool.Put(xxh)
+	}
+
+	z.wg.Done()
+}
+
+// writeBlock writes a frame block to the underlying io.Writer (size, data).
+func (z *Writer) writeBlock(zb *block) (int, error) {
+	bLen := int32(len(zb.zdata))
+	if !zb.compressed {
+		bLen = -bLen
+	}
+
+	n := 0
+	if err := binary.Write(z.dst, binary.LittleEndian, bLen); err != nil {
+		return n, err
+	}
+	n += 4
+
+	m, err := z.dst.Write(zb.zdata)
+	n += m
+	if err != nil {
+		return n, err
+	}
+
+	if z.BlockChecksum {
+		if err := binary.Write(z.dst, binary.LittleEndian, zb.checksum); err != nil {
+			return n, err
+		}
+		n += 4
+	}
+
+	return n, nil
+}
+
+// Flush flushes any pending compressed data to the underlying writer.
+// Flush does not return until the data has been written.
+// If the underlying writer returns an error, Flush returns that error.
+//
+// Flush is only required when in BlockDependency mode and the total of
+// data written is less than 64Kb.
+func (z *Writer) Flush() error {
+	if len(z.data) == 0 {
+		return nil
+	}
+	zb := block{data: z.data}
+	z.wg.Add(1)
+	z.compressBlock(&zb)
+	if _, err := z.writeBlock(&zb); err != nil {
+		return err
+	}
+	return nil
+}
+
+// Close closes the Writer, flushing any unwritten data to the underlying io.Writer, but does not close the underlying io.Writer.
+func (z *Writer) Close() error {
+	if !z.Header.done {
+		if err := z.writeHeader(); err != nil {
+			return err
+		}
+	}
+
+	// buffered data for the block dependency window
+	if z.BlockDependency && len(z.data) > 0 {
+		zb := block{data: z.data}
+		z.wg.Add(1)
+		z.compressBlock(&zb)
+		if _, err := z.writeBlock(&zb); err != nil {
+			return err
+		}
+	}
+
+	if err := binary.Write(z.dst, binary.LittleEndian, uint32(0)); err != nil {
+		return err
+	}
+	if !z.NoChecksum {
+		if err := binary.Write(z.dst, binary.LittleEndian, z.checksum.Sum32()); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Reset clears the state of the Writer z such that it is equivalent to its
+// initial state from NewWriter, but instead writing to w.
+// No access to the underlying io.Writer is performed.
+func (z *Writer) Reset(w io.Writer) {
+	z.Header = Header{}
+	z.dst = w
+	z.checksum.Reset()
+	z.data = nil
+	z.window = nil
+}
+
+// ReadFrom compresses the data read from the io.Reader and writes it to the underlying io.Writer.
+// Returns the number of bytes read.
+// It does not close the Writer.
+func (z *Writer) ReadFrom(r io.Reader) (n int64, err error) {
+	cpus := runtime.GOMAXPROCS(0)
+	buf := make([]byte, cpus*z.BlockMaxSize)
+	for {
+		m, er := io.ReadFull(r, buf)
+		n += int64(m)
+		if er == nil || er == io.ErrUnexpectedEOF || er == io.EOF {
+			if _, err = z.Write(buf[:m]); err != nil {
+				return
+			}
+			if er == nil {
+				continue
+			}
+			return
+		}
+		return n, er
+	}
+}