reader.go 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. package lz4
  2. import (
  3. "encoding/binary"
  4. "fmt"
  5. "io"
  6. "io/ioutil"
  7. "github.com/pierrec/lz4/internal/xxh32"
  8. )
  9. // Reader implements the LZ4 frame decoder.
  10. // The Header is set after the first call to Read().
  11. // The Header may change between Read() calls in case of concatenated frames.
  12. type Reader struct {
  13. Header
  14. // Handler called when a block has been successfully read.
  15. // It provides the number of bytes read.
  16. OnBlockDone func(size int)
  17. buf [8]byte // Scrap buffer.
  18. pos int64 // Current position in src.
  19. src io.Reader // Source.
  20. zdata []byte // Compressed data.
  21. data []byte // Uncompressed data.
  22. idx int // Index of unread bytes into data.
  23. checksum xxh32.XXHZero // Frame hash.
  24. skip int64 // Bytes to skip before next read.
  25. dpos int64 // Position in dest
  26. }
  27. // NewReader returns a new LZ4 frame decoder.
  28. // No access to the underlying io.Reader is performed.
  29. func NewReader(src io.Reader) *Reader {
  30. r := &Reader{src: src}
  31. return r
  32. }
  33. // readHeader checks the frame magic number and parses the frame descriptoz.
  34. // Skippable frames are supported even as a first frame although the LZ4
  35. // specifications recommends skippable frames not to be used as first frames.
  36. func (z *Reader) readHeader(first bool) error {
  37. defer z.checksum.Reset()
  38. buf := z.buf[:]
  39. for {
  40. magic, err := z.readUint32()
  41. if err != nil {
  42. z.pos += 4
  43. if !first && err == io.ErrUnexpectedEOF {
  44. return io.EOF
  45. }
  46. return err
  47. }
  48. if magic == frameMagic {
  49. break
  50. }
  51. if magic>>8 != frameSkipMagic>>8 {
  52. return ErrInvalid
  53. }
  54. skipSize, err := z.readUint32()
  55. if err != nil {
  56. return err
  57. }
  58. z.pos += 4
  59. m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize))
  60. if err != nil {
  61. return err
  62. }
  63. z.pos += m
  64. }
  65. // Header.
  66. if _, err := io.ReadFull(z.src, buf[:2]); err != nil {
  67. return err
  68. }
  69. z.pos += 8
  70. b := buf[0]
  71. if v := b >> 6; v != Version {
  72. return fmt.Errorf("lz4: invalid version: got %d; expected %d", v, Version)
  73. }
  74. if b>>5&1 == 0 {
  75. return ErrBlockDependency
  76. }
  77. z.BlockChecksum = b>>4&1 > 0
  78. frameSize := b>>3&1 > 0
  79. z.NoChecksum = b>>2&1 == 0
  80. bmsID := buf[1] >> 4 & 0x7
  81. if bmsID < 4 || bmsID > 7 {
  82. return fmt.Errorf("lz4: invalid block max size ID: %d", bmsID)
  83. }
  84. bSize := blockSizeIndexToValue(bmsID - 4)
  85. z.BlockMaxSize = bSize
  86. // Allocate the compressed/uncompressed buffers.
  87. // The compressed buffer cannot exceed the uncompressed one.
  88. if n := 2 * bSize; cap(z.zdata) < n {
  89. z.zdata = make([]byte, n, n)
  90. }
  91. if debugFlag {
  92. debug("header block max size id=%d size=%d", bmsID, bSize)
  93. }
  94. z.zdata = z.zdata[:bSize]
  95. z.data = z.zdata[:cap(z.zdata)][bSize:]
  96. z.idx = len(z.data)
  97. _, _ = z.checksum.Write(buf[0:2])
  98. if frameSize {
  99. buf := buf[:8]
  100. if _, err := io.ReadFull(z.src, buf); err != nil {
  101. return err
  102. }
  103. z.Size = binary.LittleEndian.Uint64(buf)
  104. z.pos += 8
  105. _, _ = z.checksum.Write(buf)
  106. }
  107. // Header checksum.
  108. if _, err := io.ReadFull(z.src, buf[:1]); err != nil {
  109. return err
  110. }
  111. z.pos++
  112. if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] {
  113. return fmt.Errorf("lz4: invalid header checksum: got %x; expected %x", buf[0], h)
  114. }
  115. z.Header.done = true
  116. if debugFlag {
  117. debug("header read: %v", z.Header)
  118. }
  119. return nil
  120. }
  121. // Read decompresses data from the underlying source into the supplied buffer.
  122. //
  123. // Since there can be multiple streams concatenated, Header values may
  124. // change between calls to Read(). If that is the case, no data is actually read from
  125. // the underlying io.Reader, to allow for potential input buffer resizing.
  126. func (z *Reader) Read(buf []byte) (int, error) {
  127. if debugFlag {
  128. debug("Read buf len=%d", len(buf))
  129. }
  130. if !z.Header.done {
  131. if err := z.readHeader(true); err != nil {
  132. return 0, err
  133. }
  134. if debugFlag {
  135. debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
  136. len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
  137. }
  138. }
  139. if len(buf) == 0 {
  140. return 0, nil
  141. }
  142. if z.idx == len(z.data) {
  143. // No data ready for reading, process the next block.
  144. if debugFlag {
  145. debug("reading block from writer")
  146. }
  147. // Reset uncompressed buffer
  148. z.data = z.zdata[:cap(z.zdata)][len(z.zdata):]
  149. // Block length: 0 = end of frame, highest bit set: uncompressed.
  150. bLen, err := z.readUint32()
  151. if err != nil {
  152. return 0, err
  153. }
  154. z.pos += 4
  155. if bLen == 0 {
  156. // End of frame reached.
  157. if !z.NoChecksum {
  158. // Validate the frame checksum.
  159. checksum, err := z.readUint32()
  160. if err != nil {
  161. return 0, err
  162. }
  163. if debugFlag {
  164. debug("frame checksum got=%x / want=%x", z.checksum.Sum32(), checksum)
  165. }
  166. z.pos += 4
  167. if h := z.checksum.Sum32(); checksum != h {
  168. return 0, fmt.Errorf("lz4: invalid frame checksum: got %x; expected %x", h, checksum)
  169. }
  170. }
  171. // Get ready for the next concatenated frame and keep the position.
  172. pos := z.pos
  173. z.Reset(z.src)
  174. z.pos = pos
  175. // Since multiple frames can be concatenated, check for more.
  176. return 0, z.readHeader(false)
  177. }
  178. if debugFlag {
  179. debug("raw block size %d", bLen)
  180. }
  181. if bLen&compressedBlockFlag > 0 {
  182. // Uncompressed block.
  183. bLen &= compressedBlockMask
  184. if debugFlag {
  185. debug("uncompressed block size %d", bLen)
  186. }
  187. if int(bLen) > cap(z.data) {
  188. return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
  189. }
  190. z.data = z.data[:bLen]
  191. if _, err := io.ReadFull(z.src, z.data); err != nil {
  192. return 0, err
  193. }
  194. z.pos += int64(bLen)
  195. if z.OnBlockDone != nil {
  196. z.OnBlockDone(int(bLen))
  197. }
  198. if z.BlockChecksum {
  199. checksum, err := z.readUint32()
  200. if err != nil {
  201. return 0, err
  202. }
  203. z.pos += 4
  204. if h := xxh32.ChecksumZero(z.data); h != checksum {
  205. return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
  206. }
  207. }
  208. } else {
  209. // Compressed block.
  210. if debugFlag {
  211. debug("compressed block size %d", bLen)
  212. }
  213. if int(bLen) > cap(z.data) {
  214. return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
  215. }
  216. zdata := z.zdata[:bLen]
  217. if _, err := io.ReadFull(z.src, zdata); err != nil {
  218. return 0, err
  219. }
  220. z.pos += int64(bLen)
  221. if z.BlockChecksum {
  222. checksum, err := z.readUint32()
  223. if err != nil {
  224. return 0, err
  225. }
  226. z.pos += 4
  227. if h := xxh32.ChecksumZero(zdata); h != checksum {
  228. return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
  229. }
  230. }
  231. n, err := UncompressBlock(zdata, z.data)
  232. if err != nil {
  233. return 0, err
  234. }
  235. z.data = z.data[:n]
  236. if z.OnBlockDone != nil {
  237. z.OnBlockDone(n)
  238. }
  239. }
  240. if !z.NoChecksum {
  241. _, _ = z.checksum.Write(z.data)
  242. if debugFlag {
  243. debug("current frame checksum %x", z.checksum.Sum32())
  244. }
  245. }
  246. z.idx = 0
  247. }
  248. if z.skip > int64(len(z.data[z.idx:])) {
  249. z.skip -= int64(len(z.data[z.idx:]))
  250. z.dpos += int64(len(z.data[z.idx:]))
  251. z.idx = len(z.data)
  252. return 0, nil
  253. }
  254. z.idx += int(z.skip)
  255. z.dpos += z.skip
  256. z.skip = 0
  257. n := copy(buf, z.data[z.idx:])
  258. z.idx += n
  259. z.dpos += int64(n)
  260. if debugFlag {
  261. debug("copied %d bytes to input", n)
  262. }
  263. return n, nil
  264. }
  265. // Seek implements io.Seeker, but supports seeking forward from the current
  266. // position only. Any other seek will return an error. Allows skipping output
  267. // bytes which aren't needed, which in some scenarios is faster than reading
  268. // and discarding them.
  269. // Note this may cause future calls to Read() to read 0 bytes if all of the
  270. // data they would have returned is skipped.
  271. func (z *Reader) Seek(offset int64, whence int) (int64, error) {
  272. if offset < 0 || whence != io.SeekCurrent {
  273. return z.dpos + z.skip, ErrUnsupportedSeek
  274. }
  275. z.skip += offset
  276. return z.dpos + z.skip, nil
  277. }
  278. // Reset discards the Reader's state and makes it equivalent to the
  279. // result of its original state from NewReader, but reading from r instead.
  280. // This permits reusing a Reader rather than allocating a new one.
  281. func (z *Reader) Reset(r io.Reader) {
  282. z.Header = Header{}
  283. z.pos = 0
  284. z.src = r
  285. z.zdata = z.zdata[:0]
  286. z.data = z.data[:0]
  287. z.idx = 0
  288. z.checksum.Reset()
  289. }
  290. // readUint32 reads an uint32 into the supplied buffer.
  291. // The idea is to make use of the already allocated buffers avoiding additional allocations.
  292. func (z *Reader) readUint32() (uint32, error) {
  293. buf := z.buf[:4]
  294. _, err := io.ReadFull(z.src, buf)
  295. x := binary.LittleEndian.Uint32(buf)
  296. return x, err
  297. }