reader.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. package lz4
  2. import (
  3. "encoding/binary"
  4. "fmt"
  5. "io"
  6. "io/ioutil"
  7. "github.com/pierrec/lz4/internal/xxh32"
  8. )
  9. // Reader implements the LZ4 frame decoder.
  10. // The Header is set after the first call to Read().
  11. // The Header may change between Read() calls in case of concatenated frames.
  12. type Reader struct {
  13. Header
  14. // Handler called when a block has been successfully read.
  15. // It provides the number of bytes read.
  16. OnBlockDone func(size int)
  17. buf [8]byte // Scrap buffer.
  18. pos int64 // Current position in src.
  19. src io.Reader // Source.
  20. zdata []byte // Compressed data.
  21. data []byte // Uncompressed data.
  22. idx int // Index of unread bytes into data.
  23. checksum xxh32.XXHZero // Frame hash.
  24. }
  25. // NewReader returns a new LZ4 frame decoder.
  26. // No access to the underlying io.Reader is performed.
  27. func NewReader(src io.Reader) *Reader {
  28. r := &Reader{src: src}
  29. return r
  30. }
  31. // readHeader checks the frame magic number and parses the frame descriptoz.
  32. // Skippable frames are supported even as a first frame although the LZ4
  33. // specifications recommends skippable frames not to be used as first frames.
  34. func (z *Reader) readHeader(first bool) error {
  35. defer z.checksum.Reset()
  36. buf := z.buf[:]
  37. for {
  38. magic, err := z.readUint32()
  39. if err != nil {
  40. z.pos += 4
  41. if !first && err == io.ErrUnexpectedEOF {
  42. return io.EOF
  43. }
  44. return err
  45. }
  46. if magic == frameMagic {
  47. break
  48. }
  49. if magic>>8 != frameSkipMagic>>8 {
  50. return ErrInvalid
  51. }
  52. skipSize, err := z.readUint32()
  53. if err != nil {
  54. return err
  55. }
  56. z.pos += 4
  57. m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize))
  58. if err != nil {
  59. return err
  60. }
  61. z.pos += m
  62. }
  63. // Header.
  64. if _, err := io.ReadFull(z.src, buf[:2]); err != nil {
  65. return err
  66. }
  67. z.pos += 8
  68. b := buf[0]
  69. if v := b >> 6; v != Version {
  70. return fmt.Errorf("lz4: invalid version: got %d; expected %d", v, Version)
  71. }
  72. if b>>5&1 == 0 {
  73. return fmt.Errorf("lz4: block dependency not supported")
  74. }
  75. z.BlockChecksum = b>>4&1 > 0
  76. frameSize := b>>3&1 > 0
  77. z.NoChecksum = b>>2&1 == 0
  78. bmsID := buf[1] >> 4 & 0x7
  79. bSize, ok := bsMapID[bmsID]
  80. if !ok {
  81. return fmt.Errorf("lz4: invalid block max size ID: %d", bmsID)
  82. }
  83. z.BlockMaxSize = bSize
  84. // Allocate the compressed/uncompressed buffers.
  85. // The compressed buffer cannot exceed the uncompressed one.
  86. if n := 2 * bSize; cap(z.zdata) < n {
  87. z.zdata = make([]byte, n, n)
  88. }
  89. if debugFlag {
  90. debug("header block max size id=%d size=%d", bmsID, bSize)
  91. }
  92. z.zdata = z.zdata[:bSize]
  93. z.data = z.zdata[:cap(z.zdata)][bSize:]
  94. z.idx = len(z.data)
  95. _, _ = z.checksum.Write(buf[0:2])
  96. if frameSize {
  97. buf := buf[:8]
  98. if _, err := io.ReadFull(z.src, buf); err != nil {
  99. return err
  100. }
  101. z.Size = binary.LittleEndian.Uint64(buf)
  102. z.pos += 8
  103. _, _ = z.checksum.Write(buf)
  104. }
  105. // Header checksum.
  106. if _, err := io.ReadFull(z.src, buf[:1]); err != nil {
  107. return err
  108. }
  109. z.pos++
  110. if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] {
  111. return fmt.Errorf("lz4: invalid header checksum: got %x; expected %x", buf[0], h)
  112. }
  113. z.Header.done = true
  114. if debugFlag {
  115. debug("header read: %v", z.Header)
  116. }
  117. return nil
  118. }
  119. // Read decompresses data from the underlying source into the supplied buffer.
  120. //
  121. // Since there can be multiple streams concatenated, Header values may
  122. // change between calls to Read(). If that is the case, no data is actually read from
  123. // the underlying io.Reader, to allow for potential input buffer resizing.
  124. func (z *Reader) Read(buf []byte) (int, error) {
  125. if debugFlag {
  126. debug("Read buf len=%d", len(buf))
  127. }
  128. if !z.Header.done {
  129. if err := z.readHeader(true); err != nil {
  130. return 0, err
  131. }
  132. if debugFlag {
  133. debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
  134. len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
  135. }
  136. }
  137. if len(buf) == 0 {
  138. return 0, nil
  139. }
  140. if z.idx == len(z.data) {
  141. // No data ready for reading, process the next block.
  142. if debugFlag {
  143. debug("reading block from writer")
  144. }
  145. // Reset uncompressed buffer
  146. z.data = z.zdata[:cap(z.zdata)][len(z.zdata):]
  147. // Block length: 0 = end of frame, highest bit set: uncompressed.
  148. bLen, err := z.readUint32()
  149. if err != nil {
  150. return 0, err
  151. }
  152. z.pos += 4
  153. if bLen == 0 {
  154. // End of frame reached.
  155. if !z.NoChecksum {
  156. // Validate the frame checksum.
  157. checksum, err := z.readUint32()
  158. if err != nil {
  159. return 0, err
  160. }
  161. if debugFlag {
  162. debug("frame checksum got=%x / want=%x", z.checksum.Sum32(), checksum)
  163. }
  164. z.pos += 4
  165. if h := z.checksum.Sum32(); checksum != h {
  166. return 0, fmt.Errorf("lz4: invalid frame checksum: got %x; expected %x", h, checksum)
  167. }
  168. }
  169. // Get ready for the next concatenated frame and keep the position.
  170. pos := z.pos
  171. z.Reset(z.src)
  172. z.pos = pos
  173. // Since multiple frames can be concatenated, check for more.
  174. return 0, z.readHeader(false)
  175. }
  176. if debugFlag {
  177. debug("raw block size %d", bLen)
  178. }
  179. if bLen&compressedBlockFlag > 0 {
  180. // Uncompressed block.
  181. bLen &= compressedBlockMask
  182. if debugFlag {
  183. debug("uncompressed block size %d", bLen)
  184. }
  185. if int(bLen) > cap(z.data) {
  186. return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
  187. }
  188. z.data = z.data[:bLen]
  189. if _, err := io.ReadFull(z.src, z.data); err != nil {
  190. return 0, err
  191. }
  192. z.pos += int64(bLen)
  193. if z.OnBlockDone != nil {
  194. z.OnBlockDone(int(bLen))
  195. }
  196. if z.BlockChecksum {
  197. checksum, err := z.readUint32()
  198. if err != nil {
  199. return 0, err
  200. }
  201. z.pos += 4
  202. if h := xxh32.ChecksumZero(z.data); h != checksum {
  203. return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
  204. }
  205. }
  206. } else {
  207. // Compressed block.
  208. if debugFlag {
  209. debug("compressed block size %d", bLen)
  210. }
  211. if int(bLen) > cap(z.data) {
  212. return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
  213. }
  214. zdata := z.zdata[:bLen]
  215. if _, err := io.ReadFull(z.src, zdata); err != nil {
  216. return 0, err
  217. }
  218. z.pos += int64(bLen)
  219. if z.BlockChecksum {
  220. checksum, err := z.readUint32()
  221. if err != nil {
  222. return 0, err
  223. }
  224. z.pos += 4
  225. if h := xxh32.ChecksumZero(zdata); h != checksum {
  226. return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
  227. }
  228. }
  229. n, err := UncompressBlock(zdata, z.data)
  230. if err != nil {
  231. return 0, err
  232. }
  233. z.data = z.data[:n]
  234. if z.OnBlockDone != nil {
  235. z.OnBlockDone(n)
  236. }
  237. }
  238. if !z.NoChecksum {
  239. _, _ = z.checksum.Write(z.data)
  240. if debugFlag {
  241. debug("current frame checksum %x", z.checksum.Sum32())
  242. }
  243. }
  244. z.idx = 0
  245. }
  246. n := copy(buf, z.data[z.idx:])
  247. z.idx += n
  248. if debugFlag {
  249. debug("copied %d bytes to input", n)
  250. }
  251. return n, nil
  252. }
  253. // Reset discards the Reader's state and makes it equivalent to the
  254. // result of its original state from NewReader, but reading from r instead.
  255. // This permits reusing a Reader rather than allocating a new one.
  256. func (z *Reader) Reset(r io.Reader) {
  257. z.Header = Header{}
  258. z.pos = 0
  259. z.src = r
  260. z.zdata = z.zdata[:0]
  261. z.data = z.data[:0]
  262. z.idx = 0
  263. z.checksum.Reset()
  264. }
  265. // readUint32 reads an uint32 into the supplied buffer.
  266. // The idea is to make use of the already allocated buffers avoiding additional allocations.
  267. func (z *Reader) readUint32() (uint32, error) {
  268. buf := z.buf[:4]
  269. _, err := io.ReadFull(z.src, buf)
  270. x := binary.LittleEndian.Uint32(buf)
  271. return x, err
  272. }