|
|
@@ -1,408 +1,171 @@
|
|
|
package lz4
|
|
|
|
|
|
-import (
|
|
|
- "encoding/binary"
|
|
|
- "fmt"
|
|
|
- "github.com/pierrec/lz4/internal/xxh32"
|
|
|
- "io"
|
|
|
- "runtime"
|
|
|
-)
|
|
|
-
|
|
|
-// zResult contains the results of compressing a block.
|
|
|
-type zResult struct {
|
|
|
- size uint32 // Block header
|
|
|
- data []byte // Compressed data
|
|
|
- checksum uint32 // Data checksum
|
|
|
-}
|
|
|
-
|
|
|
-// Writer implements the LZ4 frame encoder.
|
|
|
-type Writer struct {
|
|
|
- Header
|
|
|
- // Handler called when a block has been successfully written out.
|
|
|
- // It provides the number of bytes written.
|
|
|
- OnBlockDone func(size int)
|
|
|
-
|
|
|
- buf [19]byte // magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes
|
|
|
- dst io.Writer // Destination.
|
|
|
- checksum xxh32.XXHZero // Frame checksum.
|
|
|
- data []byte // Data to be compressed + buffer for compressed data.
|
|
|
- idx int // Index into data.
|
|
|
- hashtable [winSize]int // Hash table used in CompressBlock().
|
|
|
-
|
|
|
- // For concurrency.
|
|
|
- c chan chan zResult // Channel for block compression goroutines and writer goroutine.
|
|
|
- err error // Any error encountered while writing to the underlying destination.
|
|
|
+import "io"
|
|
|
+
|
|
|
+var writerStates = []aState{
|
|
|
+ noState: newState,
|
|
|
+ newState: headerState,
|
|
|
+ headerState: writeState,
|
|
|
+ writeState: closedState,
|
|
|
+ closedState: newState,
|
|
|
+ errorState: newState,
|
|
|
}
|
|
|
|
|
|
// NewWriter returns a new LZ4 frame encoder.
|
|
|
-// No access to the underlying io.Writer is performed.
|
|
|
-// The supplied Header is checked at the first Write.
|
|
|
-// It is ok to change it before the first Write but then not until a Reset() is performed.
|
|
|
-func NewWriter(dst io.Writer) *Writer {
|
|
|
- z := new(Writer)
|
|
|
- z.Reset(dst)
|
|
|
- return z
|
|
|
-}
|
|
|
-
|
|
|
-// WithConcurrency sets the number of concurrent go routines used for compression.
|
|
|
-// A negative value sets the concurrency to GOMAXPROCS.
|
|
|
-func (z *Writer) WithConcurrency(n int) *Writer {
|
|
|
- switch {
|
|
|
- case n == 0 || n == 1:
|
|
|
- z.c = nil
|
|
|
- return z
|
|
|
- case n < 0:
|
|
|
- n = runtime.GOMAXPROCS(0)
|
|
|
- }
|
|
|
- z.c = make(chan chan zResult, n)
|
|
|
- // Writer goroutine managing concurrent block compression goroutines.
|
|
|
- go func() {
|
|
|
- // Process next block compression item.
|
|
|
- for c := range z.c {
|
|
|
- // Read the next compressed block result.
|
|
|
- // Waiting here ensures that the blocks are output in the order they were sent.
|
|
|
- // The incoming channel is always closed as it indicates to the caller that
|
|
|
- // the block has been processed.
|
|
|
- res := <-c
|
|
|
- n := len(res.data)
|
|
|
- if n == 0 {
|
|
|
- // Notify the block compression routine that we are done with its result.
|
|
|
- // This is used when a sentinel block is sent to terminate the compression.
|
|
|
- close(c)
|
|
|
- return
|
|
|
- }
|
|
|
- // Write the block.
|
|
|
- if err := z.writeUint32(res.size); err != nil && z.err == nil {
|
|
|
- z.err = err
|
|
|
- }
|
|
|
- if _, err := z.dst.Write(res.data); err != nil && z.err == nil {
|
|
|
- z.err = err
|
|
|
- }
|
|
|
- if z.BlockChecksum {
|
|
|
- if err := z.writeUint32(res.checksum); err != nil && z.err == nil {
|
|
|
- z.err = err
|
|
|
- }
|
|
|
- }
|
|
|
- if isCompressed := res.size&compressedBlockFlag == 0; isCompressed {
|
|
|
- // It is now safe to release the buffer as no longer in use by any goroutine.
|
|
|
- putBuffer(cap(res.data), res.data)
|
|
|
- }
|
|
|
- if h := z.OnBlockDone; h != nil {
|
|
|
- h(n)
|
|
|
- }
|
|
|
- close(c)
|
|
|
- }
|
|
|
- }()
|
|
|
- return z
|
|
|
+func NewWriter(w io.Writer, options ...Option) (io.WriteCloser, error) {
|
|
|
+ zw := new(_Writer)
|
|
|
+ _ = defaultBlockSizeOption(zw)
|
|
|
+ _ = defaultChecksumOption(zw)
|
|
|
+ _ = defaultConcurrency(zw)
|
|
|
+ if err := zw.Reset(w, options...); err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ return zw, nil
|
|
|
}
|
|
|
|
|
|
-// newBuffers instantiates new buffers which size matches the one in Header.
|
|
|
-// The returned buffers are for decompression and compression respectively.
|
|
|
-func (z *Writer) newBuffers() {
|
|
|
- bSize := z.Header.BlockMaxSize
|
|
|
- buf := getBuffer(bSize)
|
|
|
- z.data = buf[:bSize] // Uncompressed buffer is the first half.
|
|
|
+type _Writer struct {
|
|
|
+ state _State
|
|
|
+ buf [11]byte // frame descriptor needs at most 4+8+1=11 bytes
|
|
|
+ src io.Writer // destination writer
|
|
|
+ level CompressionLevel // how hard to try
|
|
|
+ num int // concurrency level
|
|
|
+ frame Frame // frame being built
|
|
|
+ ht []int // hash table (set if no concurrency)
|
|
|
+ data []byte // pending data
|
|
|
+ idx int // size of pending data
|
|
|
}
|
|
|
|
|
|
-// freeBuffers puts the writer's buffers back to the pool.
|
|
|
-func (z *Writer) freeBuffers() {
|
|
|
- // Put the buffer back into the pool, if any.
|
|
|
- putBuffer(z.Header.BlockMaxSize, z.data)
|
|
|
- z.data = nil
|
|
|
+func (w *_Writer) isNotConcurrent() bool {
|
|
|
+ return w.num == 1
|
|
|
}
|
|
|
|
|
|
-// writeHeader builds and writes the header (magic+header) to the underlying io.Writer.
|
|
|
-func (z *Writer) writeHeader() error {
|
|
|
- // Default to 4Mb if BlockMaxSize is not set.
|
|
|
- if z.Header.BlockMaxSize == 0 {
|
|
|
- z.Header.BlockMaxSize = blockSize4M
|
|
|
- }
|
|
|
- // The only option that needs to be validated.
|
|
|
- bSize := z.Header.BlockMaxSize
|
|
|
- if !isValidBlockSize(z.Header.BlockMaxSize) {
|
|
|
- return fmt.Errorf("lz4: invalid block max size: %d", bSize)
|
|
|
- }
|
|
|
- // Allocate the compressed/uncompressed buffers.
|
|
|
- // The compressed buffer cannot exceed the uncompressed one.
|
|
|
- z.newBuffers()
|
|
|
- z.idx = 0
|
|
|
-
|
|
|
- // Size is optional.
|
|
|
- buf := z.buf[:]
|
|
|
-
|
|
|
- // Set the fixed size data: magic number, block max size and flags.
|
|
|
- binary.LittleEndian.PutUint32(buf[0:], frameMagic)
|
|
|
- flg := byte(Version << 6)
|
|
|
- flg |= 1 << 5 // No block dependency.
|
|
|
- if z.Header.BlockChecksum {
|
|
|
- flg |= 1 << 4
|
|
|
- }
|
|
|
- if z.Header.Size > 0 {
|
|
|
- flg |= 1 << 3
|
|
|
- }
|
|
|
- if !z.Header.NoChecksum {
|
|
|
- flg |= 1 << 2
|
|
|
- }
|
|
|
- buf[4] = flg
|
|
|
- buf[5] = blockSizeValueToIndex(z.Header.BlockMaxSize) << 4
|
|
|
-
|
|
|
- // Current buffer size: magic(4) + flags(1) + block max size (1).
|
|
|
- n := 6
|
|
|
- // Optional items.
|
|
|
- if z.Header.Size > 0 {
|
|
|
- binary.LittleEndian.PutUint64(buf[n:], z.Header.Size)
|
|
|
- n += 8
|
|
|
- }
|
|
|
-
|
|
|
- // The header checksum includes the flags, block max size and optional Size.
|
|
|
- buf[n] = byte(xxh32.ChecksumZero(buf[4:n]) >> 8 & 0xFF)
|
|
|
- z.checksum.Reset()
|
|
|
-
|
|
|
- // Header ready, write it out.
|
|
|
- if _, err := z.dst.Write(buf[0 : n+1]); err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- z.Header.done = true
|
|
|
- if debugFlag {
|
|
|
- debug("wrote header %v", z.Header)
|
|
|
- }
|
|
|
-
|
|
|
- return nil
|
|
|
-}
|
|
|
-
|
|
|
-// Write compresses data from the supplied buffer into the underlying io.Writer.
|
|
|
-// Write does not return until the data has been written.
|
|
|
-func (z *Writer) Write(buf []byte) (int, error) {
|
|
|
- if !z.Header.done {
|
|
|
- if err := z.writeHeader(); err != nil {
|
|
|
- return 0, err
|
|
|
+func (w *_Writer) Write(buf []byte) (n int, err error) {
|
|
|
+ defer w.state.check(&err)
|
|
|
+ switch w.state.state {
|
|
|
+ case closedState, errorState:
|
|
|
+ return 0, w.state.err
|
|
|
+ case newState:
|
|
|
+ w.state.next(nil)
|
|
|
+ if err = w.frame.Descriptor.write(w); w.state.next(err) {
|
|
|
+ return
|
|
|
}
|
|
|
- }
|
|
|
- if debugFlag {
|
|
|
- debug("input buffer len=%d index=%d", len(buf), z.idx)
|
|
|
+ default:
|
|
|
+ return 0, w.state.fail()
|
|
|
}
|
|
|
|
|
|
- zn := len(z.data)
|
|
|
- var n int
|
|
|
+ zn := len(w.data)
|
|
|
for len(buf) > 0 {
|
|
|
- if z.idx == 0 && len(buf) >= zn {
|
|
|
+ if w.idx == 0 && len(buf) >= zn {
|
|
|
// Avoid a copy as there is enough data for a block.
|
|
|
- if err := z.compressBlock(buf[:zn]); err != nil {
|
|
|
- return n, err
|
|
|
+ if err = w.write(); err != nil {
|
|
|
+ return
|
|
|
}
|
|
|
n += zn
|
|
|
buf = buf[zn:]
|
|
|
continue
|
|
|
}
|
|
|
// Accumulate the data to be compressed.
|
|
|
- m := copy(z.data[z.idx:], buf)
|
|
|
+ m := copy(w.data[w.idx:], buf)
|
|
|
n += m
|
|
|
- z.idx += m
|
|
|
+ w.idx += m
|
|
|
buf = buf[m:]
|
|
|
- if debugFlag {
|
|
|
- debug("%d bytes copied to buf, current index %d", n, z.idx)
|
|
|
- }
|
|
|
|
|
|
- if z.idx < len(z.data) {
|
|
|
+ if w.idx < len(w.data) {
|
|
|
// Buffer not filled.
|
|
|
- if debugFlag {
|
|
|
- debug("need more data for compression")
|
|
|
- }
|
|
|
- return n, nil
|
|
|
+ return
|
|
|
}
|
|
|
|
|
|
// Buffer full.
|
|
|
- if err := z.compressBlock(z.data); err != nil {
|
|
|
- return n, err
|
|
|
+ if err = w.write(); err != nil {
|
|
|
+ return
|
|
|
}
|
|
|
- z.idx = 0
|
|
|
+ w.idx = 0
|
|
|
}
|
|
|
-
|
|
|
- return n, nil
|
|
|
+ return
|
|
|
}
|
|
|
|
|
|
-// compressBlock compresses a block.
|
|
|
-func (z *Writer) compressBlock(data []byte) error {
|
|
|
- if !z.NoChecksum {
|
|
|
- _, _ = z.checksum.Write(data)
|
|
|
- }
|
|
|
-
|
|
|
- if z.c != nil {
|
|
|
- c := make(chan zResult)
|
|
|
- z.c <- c // Send now to guarantee order
|
|
|
- go writerCompressBlock(c, z.Header, data)
|
|
|
- return nil
|
|
|
- }
|
|
|
-
|
|
|
- zdata := z.data[z.Header.BlockMaxSize:cap(z.data)]
|
|
|
- // The compressed block size cannot exceed the input's.
|
|
|
- var zn int
|
|
|
-
|
|
|
- if level := z.Header.CompressionLevel; level != 0 {
|
|
|
- zn, _ = CompressBlockHC(data, zdata, level)
|
|
|
- } else {
|
|
|
- zn, _ = CompressBlock(data, zdata, z.hashtable[:])
|
|
|
- }
|
|
|
-
|
|
|
- var bLen uint32
|
|
|
- if debugFlag {
|
|
|
- debug("block compression %d => %d", len(data), zn)
|
|
|
- }
|
|
|
- if zn > 0 && zn < len(data) {
|
|
|
- // Compressible and compressed size smaller than uncompressed: ok!
|
|
|
- bLen = uint32(zn)
|
|
|
- zdata = zdata[:zn]
|
|
|
- } else {
|
|
|
- // Uncompressed block.
|
|
|
- bLen = uint32(len(data)) | compressedBlockFlag
|
|
|
- zdata = data
|
|
|
- }
|
|
|
- if debugFlag {
|
|
|
- debug("block compression to be written len=%d data len=%d", bLen, len(zdata))
|
|
|
- }
|
|
|
-
|
|
|
- // Write the block.
|
|
|
- if err := z.writeUint32(bLen); err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- written, err := z.dst.Write(zdata)
|
|
|
- if err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- if h := z.OnBlockDone; h != nil {
|
|
|
- h(written)
|
|
|
- }
|
|
|
-
|
|
|
- if !z.BlockChecksum {
|
|
|
- if debugFlag {
|
|
|
- debug("current frame checksum %x", z.checksum.Sum32())
|
|
|
+func (w *_Writer) write() error {
|
|
|
+ if w.isNotConcurrent() {
|
|
|
+ return w.frame.Blocks.Block.compress(w, w.data, w.ht).write(w)
|
|
|
+ }
|
|
|
+ size := w.frame.Descriptor.Flags.BlockSizeIndex()
|
|
|
+ c := make(chan *FrameDataBlock)
|
|
|
+ w.frame.Blocks.Blocks <- c
|
|
|
+ go func(c chan *FrameDataBlock, data []byte, size BlockSizeIndex) {
|
|
|
+ b := newFrameDataBlock(size)
|
|
|
+ zdata := b.Data
|
|
|
+ c <- b.compress(w, data, nil)
|
|
|
+ // Wait for the compressed or uncompressed data to no longer be in use
|
|
|
+ // and free the allocated buffers
|
|
|
+ if !b.Size.compressed() {
|
|
|
+ zdata, data = data, zdata
|
|
|
}
|
|
|
- return nil
|
|
|
- }
|
|
|
- checksum := xxh32.ChecksumZero(zdata)
|
|
|
- if debugFlag {
|
|
|
- debug("block checksum %x", checksum)
|
|
|
- defer func() { debug("current frame checksum %x", z.checksum.Sum32()) }()
|
|
|
- }
|
|
|
- return z.writeUint32(checksum)
|
|
|
-}
|
|
|
+ size.put(data)
|
|
|
+ <-c
|
|
|
+ size.put(zdata)
|
|
|
+ }(c, w.data, size)
|
|
|
|
|
|
-// Flush flushes any pending compressed data to the underlying writer.
|
|
|
-// Flush does not return until the data has been written.
|
|
|
-// If the underlying writer returns an error, Flush returns that error.
|
|
|
-func (z *Writer) Flush() error {
|
|
|
- if debugFlag {
|
|
|
- debug("flush with index %d", z.idx)
|
|
|
- }
|
|
|
- if z.idx == 0 {
|
|
|
- return nil
|
|
|
+ if w.idx > 0 {
|
|
|
+ // Not closed.
|
|
|
+ w.data = size.get()
|
|
|
}
|
|
|
+ w.idx = 0
|
|
|
|
|
|
- data := z.data[:z.idx]
|
|
|
- z.idx = 0
|
|
|
- if z.c == nil {
|
|
|
- return z.compressBlock(data)
|
|
|
- }
|
|
|
- if !z.NoChecksum {
|
|
|
- _, _ = z.checksum.Write(data)
|
|
|
- }
|
|
|
- c := make(chan zResult)
|
|
|
- z.c <- c
|
|
|
- writerCompressBlock(c, z.Header, data)
|
|
|
return nil
|
|
|
}
|
|
|
|
|
|
-func (z *Writer) close() error {
|
|
|
- if z.c == nil {
|
|
|
+// Close closes the Writer, flushing any unwritten data to the underlying io.Writer,
|
|
|
+// but does not close the underlying io.Writer.
|
|
|
+func (w *_Writer) Close() error {
|
|
|
+ switch w.state.state {
|
|
|
+ case writeState:
|
|
|
+ case errorState:
|
|
|
+ return w.state.err
|
|
|
+ default:
|
|
|
return nil
|
|
|
}
|
|
|
- // Send a sentinel block (no data to compress) to terminate the writer main goroutine.
|
|
|
- c := make(chan zResult)
|
|
|
- z.c <- c
|
|
|
- c <- zResult{}
|
|
|
- // Wait for the main goroutine to complete.
|
|
|
- <-c
|
|
|
- // At this point the main goroutine has shut down or is about to return.
|
|
|
- z.c = nil
|
|
|
- return z.err
|
|
|
-}
|
|
|
-
|
|
|
-// Close closes the Writer, flushing any unwritten data to the underlying io.Writer, but does not close the underlying io.Writer.
|
|
|
-func (z *Writer) Close() error {
|
|
|
- if !z.Header.done {
|
|
|
- if err := z.writeHeader(); err != nil {
|
|
|
+ var err error
|
|
|
+ defer func() { w.state.next(err) }()
|
|
|
+ if idx := w.idx; idx > 0 {
|
|
|
+ // Flush pending data.
|
|
|
+ w.data = w.data[:idx]
|
|
|
+ w.idx = 0
|
|
|
+ if err = w.write(); err != nil {
|
|
|
return err
|
|
|
}
|
|
|
+ w.data = nil
|
|
|
}
|
|
|
- if err := z.Flush(); err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- if err := z.close(); err != nil {
|
|
|
- return err
|
|
|
- }
|
|
|
- z.freeBuffers()
|
|
|
-
|
|
|
- if debugFlag {
|
|
|
- debug("writing last empty block")
|
|
|
- }
|
|
|
- if err := z.writeUint32(0); err != nil {
|
|
|
- return err
|
|
|
+ if w.isNotConcurrent() {
|
|
|
+ htPool.Put(w.ht)
|
|
|
+ size := w.frame.Descriptor.Flags.BlockSizeIndex()
|
|
|
+ size.put(w.data)
|
|
|
}
|
|
|
- if z.NoChecksum {
|
|
|
- return nil
|
|
|
- }
|
|
|
- checksum := z.checksum.Sum32()
|
|
|
- if debugFlag {
|
|
|
- debug("stream checksum %x", checksum)
|
|
|
- }
|
|
|
- return z.writeUint32(checksum)
|
|
|
-}
|
|
|
-
|
|
|
-// Reset clears the state of the Writer z such that it is equivalent to its
|
|
|
-// initial state from NewWriter, but instead writing to w.
|
|
|
-// No access to the underlying io.Writer is performed.
|
|
|
-func (z *Writer) Reset(w io.Writer) {
|
|
|
- n := cap(z.c)
|
|
|
- _ = z.close()
|
|
|
- z.freeBuffers()
|
|
|
- z.Header.Reset()
|
|
|
- z.dst = w
|
|
|
- z.checksum.Reset()
|
|
|
- z.idx = 0
|
|
|
- z.err = nil
|
|
|
- z.WithConcurrency(n)
|
|
|
+ return w.frame.closeW(w)
|
|
|
}
|
|
|
|
|
|
-// writeUint32 writes a uint32 to the underlying writer.
|
|
|
-func (z *Writer) writeUint32(x uint32) error {
|
|
|
- buf := z.buf[:4]
|
|
|
- binary.LittleEndian.PutUint32(buf, x)
|
|
|
- _, err := z.dst.Write(buf)
|
|
|
- return err
|
|
|
-}
|
|
|
-
|
|
|
-// writerCompressBlock compresses data into a pooled buffer and writes its result
|
|
|
-// out to the input channel.
|
|
|
-func writerCompressBlock(c chan zResult, header Header, data []byte) {
|
|
|
- zdata := getBuffer(header.BlockMaxSize)
|
|
|
- // The compressed block size cannot exceed the input's.
|
|
|
- var zn int
|
|
|
- if level := header.CompressionLevel; level != 0 {
|
|
|
- zn, _ = CompressBlockHC(data, zdata, level)
|
|
|
- } else {
|
|
|
- var hashTable [winSize]int
|
|
|
- zn, _ = CompressBlock(data, zdata, hashTable[:])
|
|
|
+// Reset clears the state of the Writer w such that it is equivalent to its
|
|
|
+// initial state from NewWriter, but instead writing to writer.
|
|
|
+// Reset keeps the previous options unless overwritten by the supplied ones.
|
|
|
+// No access to writer is performed.
|
|
|
+//
|
|
|
+// w.Close must be called before Reset.
|
|
|
+func (w *_Writer) Reset(writer io.Writer, options ...Option) (err error) {
|
|
|
+ for _, o := range options {
|
|
|
+ if err = o(w); err != nil {
|
|
|
+ break
|
|
|
+ }
|
|
|
}
|
|
|
- var res zResult
|
|
|
- if zn > 0 && zn < len(data) {
|
|
|
- res.size = uint32(zn)
|
|
|
- res.data = zdata[:zn]
|
|
|
- } else {
|
|
|
- res.size = uint32(len(data)) | compressedBlockFlag
|
|
|
- res.data = data
|
|
|
+ w.state.state = noState
|
|
|
+ if w.state.next(err) {
|
|
|
+ return
|
|
|
}
|
|
|
- if header.BlockChecksum {
|
|
|
- res.checksum = xxh32.ChecksumZero(res.data)
|
|
|
+ w.src = writer
|
|
|
+ w.frame.initW(w)
|
|
|
+ size := w.frame.Descriptor.Flags.BlockSizeIndex()
|
|
|
+ w.data = size.get()
|
|
|
+ w.idx = 0
|
|
|
+ if w.isNotConcurrent() {
|
|
|
+ w.ht = htPool.Get().([]int)
|
|
|
}
|
|
|
- c <- res
|
|
|
+ return nil
|
|
|
}
|