123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335 |
- package lz4
- import (
- "encoding/binary"
- "fmt"
- "io"
- "io/ioutil"
- "github.com/pierrec/lz4/internal/xxh32"
- )
- // Reader implements the LZ4 frame decoder.
- // The Header is set after the first call to Read().
- // The Header may change between Read() calls in case of concatenated frames.
- type Reader struct {
- Header
- // Handler called when a block has been successfully read.
- // It provides the number of bytes read.
- OnBlockDone func(size int)
- buf [8]byte // Scrap buffer.
- pos int64 // Current position in src.
- src io.Reader // Source.
- zdata []byte // Compressed data.
- data []byte // Uncompressed data.
- idx int // Index of unread bytes into data.
- checksum xxh32.XXHZero // Frame hash.
- skip int64 // Bytes to skip before next read.
- dpos int64 // Position in dest
- }
- // NewReader returns a new LZ4 frame decoder.
- // No access to the underlying io.Reader is performed.
- func NewReader(src io.Reader) *Reader {
- r := &Reader{src: src}
- return r
- }
- // readHeader checks the frame magic number and parses the frame descriptoz.
- // Skippable frames are supported even as a first frame although the LZ4
- // specifications recommends skippable frames not to be used as first frames.
- func (z *Reader) readHeader(first bool) error {
- defer z.checksum.Reset()
- buf := z.buf[:]
- for {
- magic, err := z.readUint32()
- if err != nil {
- z.pos += 4
- if !first && err == io.ErrUnexpectedEOF {
- return io.EOF
- }
- return err
- }
- if magic == frameMagic {
- break
- }
- if magic>>8 != frameSkipMagic>>8 {
- return ErrInvalid
- }
- skipSize, err := z.readUint32()
- if err != nil {
- return err
- }
- z.pos += 4
- m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize))
- if err != nil {
- return err
- }
- z.pos += m
- }
- // Header.
- if _, err := io.ReadFull(z.src, buf[:2]); err != nil {
- return err
- }
- z.pos += 8
- b := buf[0]
- if v := b >> 6; v != Version {
- return fmt.Errorf("lz4: invalid version: got %d; expected %d", v, Version)
- }
- if b>>5&1 == 0 {
- return ErrBlockDependency
- }
- z.BlockChecksum = b>>4&1 > 0
- frameSize := b>>3&1 > 0
- z.NoChecksum = b>>2&1 == 0
- bmsID := buf[1] >> 4 & 0x7
- if bmsID < 4 || bmsID > 7 {
- return fmt.Errorf("lz4: invalid block max size ID: %d", bmsID)
- }
- bSize := blockSizeIndexToValue(bmsID - 4)
- z.BlockMaxSize = bSize
- // Allocate the compressed/uncompressed buffers.
- // The compressed buffer cannot exceed the uncompressed one.
- if n := 2 * bSize; cap(z.zdata) < n {
- z.zdata = make([]byte, n, n)
- }
- if debugFlag {
- debug("header block max size id=%d size=%d", bmsID, bSize)
- }
- z.zdata = z.zdata[:bSize]
- z.data = z.zdata[:cap(z.zdata)][bSize:]
- z.idx = len(z.data)
- _, _ = z.checksum.Write(buf[0:2])
- if frameSize {
- buf := buf[:8]
- if _, err := io.ReadFull(z.src, buf); err != nil {
- return err
- }
- z.Size = binary.LittleEndian.Uint64(buf)
- z.pos += 8
- _, _ = z.checksum.Write(buf)
- }
- // Header checksum.
- if _, err := io.ReadFull(z.src, buf[:1]); err != nil {
- return err
- }
- z.pos++
- if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] {
- return fmt.Errorf("lz4: invalid header checksum: got %x; expected %x", buf[0], h)
- }
- z.Header.done = true
- if debugFlag {
- debug("header read: %v", z.Header)
- }
- return nil
- }
- // Read decompresses data from the underlying source into the supplied buffer.
- //
- // Since there can be multiple streams concatenated, Header values may
- // change between calls to Read(). If that is the case, no data is actually read from
- // the underlying io.Reader, to allow for potential input buffer resizing.
- func (z *Reader) Read(buf []byte) (int, error) {
- if debugFlag {
- debug("Read buf len=%d", len(buf))
- }
- if !z.Header.done {
- if err := z.readHeader(true); err != nil {
- return 0, err
- }
- if debugFlag {
- debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
- len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
- }
- }
- if len(buf) == 0 {
- return 0, nil
- }
- if z.idx == len(z.data) {
- // No data ready for reading, process the next block.
- if debugFlag {
- debug("reading block from writer")
- }
- // Reset uncompressed buffer
- z.data = z.zdata[:cap(z.zdata)][len(z.zdata):]
- // Block length: 0 = end of frame, highest bit set: uncompressed.
- bLen, err := z.readUint32()
- if err != nil {
- return 0, err
- }
- z.pos += 4
- if bLen == 0 {
- // End of frame reached.
- if !z.NoChecksum {
- // Validate the frame checksum.
- checksum, err := z.readUint32()
- if err != nil {
- return 0, err
- }
- if debugFlag {
- debug("frame checksum got=%x / want=%x", z.checksum.Sum32(), checksum)
- }
- z.pos += 4
- if h := z.checksum.Sum32(); checksum != h {
- return 0, fmt.Errorf("lz4: invalid frame checksum: got %x; expected %x", h, checksum)
- }
- }
- // Get ready for the next concatenated frame and keep the position.
- pos := z.pos
- z.Reset(z.src)
- z.pos = pos
- // Since multiple frames can be concatenated, check for more.
- return 0, z.readHeader(false)
- }
- if debugFlag {
- debug("raw block size %d", bLen)
- }
- if bLen&compressedBlockFlag > 0 {
- // Uncompressed block.
- bLen &= compressedBlockMask
- if debugFlag {
- debug("uncompressed block size %d", bLen)
- }
- if int(bLen) > cap(z.data) {
- return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
- }
- z.data = z.data[:bLen]
- if _, err := io.ReadFull(z.src, z.data); err != nil {
- return 0, err
- }
- z.pos += int64(bLen)
- if z.OnBlockDone != nil {
- z.OnBlockDone(int(bLen))
- }
- if z.BlockChecksum {
- checksum, err := z.readUint32()
- if err != nil {
- return 0, err
- }
- z.pos += 4
- if h := xxh32.ChecksumZero(z.data); h != checksum {
- return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
- }
- }
- } else {
- // Compressed block.
- if debugFlag {
- debug("compressed block size %d", bLen)
- }
- if int(bLen) > cap(z.data) {
- return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
- }
- zdata := z.zdata[:bLen]
- if _, err := io.ReadFull(z.src, zdata); err != nil {
- return 0, err
- }
- z.pos += int64(bLen)
- if z.BlockChecksum {
- checksum, err := z.readUint32()
- if err != nil {
- return 0, err
- }
- z.pos += 4
- if h := xxh32.ChecksumZero(zdata); h != checksum {
- return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
- }
- }
- n, err := UncompressBlock(zdata, z.data)
- if err != nil {
- return 0, err
- }
- z.data = z.data[:n]
- if z.OnBlockDone != nil {
- z.OnBlockDone(n)
- }
- }
- if !z.NoChecksum {
- _, _ = z.checksum.Write(z.data)
- if debugFlag {
- debug("current frame checksum %x", z.checksum.Sum32())
- }
- }
- z.idx = 0
- }
- if z.skip > int64(len(z.data[z.idx:])) {
- z.skip -= int64(len(z.data[z.idx:]))
- z.dpos += int64(len(z.data[z.idx:]))
- z.idx = len(z.data)
- return 0, nil
- }
- z.idx += int(z.skip)
- z.dpos += z.skip
- z.skip = 0
- n := copy(buf, z.data[z.idx:])
- z.idx += n
- z.dpos += int64(n)
- if debugFlag {
- debug("copied %d bytes to input", n)
- }
- return n, nil
- }
- // Seek implements io.Seeker, but supports seeking forward from the current
- // position only. Any other seek will return an error. Allows skipping output
- // bytes which aren't needed, which in some scenarios is faster than reading
- // and discarding them.
- // Note this may cause future calls to Read() to read 0 bytes if all of the
- // data they would have returned is skipped.
- func (z *Reader) Seek(offset int64, whence int) (int64, error) {
- if offset < 0 || whence != io.SeekCurrent {
- return z.dpos + z.skip, ErrUnsupportedSeek
- }
- z.skip += offset
- return z.dpos + z.skip, nil
- }
- // Reset discards the Reader's state and makes it equivalent to the
- // result of its original state from NewReader, but reading from r instead.
- // This permits reusing a Reader rather than allocating a new one.
- func (z *Reader) Reset(r io.Reader) {
- z.Header = Header{}
- z.pos = 0
- z.src = r
- z.zdata = z.zdata[:0]
- z.data = z.data[:0]
- z.idx = 0
- z.checksum.Reset()
- }
- // readUint32 reads an uint32 into the supplied buffer.
- // The idea is to make use of the already allocated buffers avoiding additional allocations.
- func (z *Reader) readUint32() (uint32, error) {
- buf := z.buf[:4]
- _, err := io.ReadFull(z.src, buf)
- x := binary.LittleEndian.Uint32(buf)
- return x, err
- }
|