frame.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. // Package lz4stream provides the types that support reading and writing LZ4 data streams.
  2. package lz4stream
  3. import (
  4. "encoding/binary"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "github.com/pierrec/lz4/v4/internal/lz4block"
  9. "github.com/pierrec/lz4/v4/internal/lz4errors"
  10. "github.com/pierrec/lz4/v4/internal/xxh32"
  11. )
  12. //go:generate go run gen.go
  13. const (
  14. frameMagic uint32 = 0x184D2204
  15. frameSkipMagic uint32 = 0x184D2A50
  16. )
  17. func NewFrame() *Frame {
  18. return &Frame{}
  19. }
  20. type Frame struct {
  21. buf [15]byte // frame descriptor needs at most 4(magic)+4+8+1=11 bytes
  22. Magic uint32
  23. Descriptor FrameDescriptor
  24. Blocks Blocks
  25. Checksum uint32
  26. checksum xxh32.XXHZero
  27. }
  28. // Reset allows reusing the Frame.
  29. // The Descriptor configuration is not modified.
  30. func (f *Frame) Reset(num int) {
  31. f.Magic = 0
  32. f.Descriptor.Checksum = 0
  33. f.Descriptor.ContentSize = 0
  34. _ = f.Blocks.closeW(f, num)
  35. f.Checksum = 0
  36. }
  37. func (f *Frame) InitW(dst io.Writer, num int) {
  38. f.Magic = frameMagic
  39. f.Descriptor.initW()
  40. f.Blocks.initW(f, dst, num)
  41. f.checksum.Reset()
  42. }
  43. func (f *Frame) CloseW(dst io.Writer, num int) error {
  44. if err := f.Blocks.closeW(f, num); err != nil {
  45. return err
  46. }
  47. buf := f.buf[:0]
  48. // End mark (data block size of uint32(0)).
  49. buf = append(buf, 0, 0, 0, 0)
  50. if f.Descriptor.Flags.ContentChecksum() {
  51. buf = f.checksum.Sum(buf)
  52. }
  53. _, err := dst.Write(buf)
  54. return err
  55. }
  56. func (f *Frame) InitR(src io.Reader) error {
  57. if f.Magic > 0 {
  58. // Header already read.
  59. return nil
  60. }
  61. newFrame:
  62. var err error
  63. if f.Magic, err = f.readUint32(src); err != nil {
  64. return err
  65. }
  66. switch m := f.Magic; {
  67. case m == frameMagic:
  68. // All 16 values of frameSkipMagic are valid.
  69. case m>>8 == frameSkipMagic>>8:
  70. var skip uint32
  71. if err := binary.Read(src, binary.LittleEndian, &skip); err != nil {
  72. return err
  73. }
  74. if _, err := io.CopyN(ioutil.Discard, src, int64(skip)); err != nil {
  75. return err
  76. }
  77. goto newFrame
  78. default:
  79. return lz4errors.ErrInvalidFrame
  80. }
  81. if err := f.Descriptor.initR(f, src); err != nil {
  82. return err
  83. }
  84. f.Blocks.initR(f)
  85. f.checksum.Reset()
  86. return nil
  87. }
  88. func (f *Frame) CloseR(src io.Reader) (err error) {
  89. if !f.Descriptor.Flags.ContentChecksum() {
  90. return nil
  91. }
  92. if f.Checksum, err = f.readUint32(src); err != nil {
  93. return err
  94. }
  95. if c := f.checksum.Sum32(); c != f.Checksum {
  96. return fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidFrameChecksum, c, f.Checksum)
  97. }
  98. return nil
  99. }
  100. type FrameDescriptor struct {
  101. Flags DescriptorFlags
  102. ContentSize uint64
  103. Checksum uint8
  104. }
  105. func (fd *FrameDescriptor) initW() {
  106. fd.Flags.VersionSet(1)
  107. fd.Flags.BlockIndependenceSet(true)
  108. }
  109. func (fd *FrameDescriptor) Write(f *Frame, dst io.Writer) error {
  110. if fd.Checksum > 0 {
  111. // Header already written.
  112. return nil
  113. }
  114. buf := f.buf[:4+2]
  115. // Write the magic number here even though it belongs to the Frame.
  116. binary.LittleEndian.PutUint32(buf, f.Magic)
  117. binary.LittleEndian.PutUint16(buf[4:], uint16(fd.Flags))
  118. if fd.Flags.Size() {
  119. buf = buf[:4+2+8]
  120. binary.LittleEndian.PutUint64(buf[4+2:], fd.ContentSize)
  121. }
  122. fd.Checksum = descriptorChecksum(buf[4:])
  123. buf = append(buf, fd.Checksum)
  124. _, err := dst.Write(buf)
  125. return err
  126. }
  127. func (fd *FrameDescriptor) initR(f *Frame, src io.Reader) error {
  128. // Read the flags and the checksum, hoping that there is not content size.
  129. buf := f.buf[:3]
  130. if _, err := io.ReadFull(src, buf); err != nil {
  131. return err
  132. }
  133. descr := binary.LittleEndian.Uint16(buf)
  134. fd.Flags = DescriptorFlags(descr)
  135. if fd.Flags.Size() {
  136. // Append the 8 missing bytes.
  137. buf = buf[:3+8]
  138. if _, err := io.ReadFull(src, buf[3:]); err != nil {
  139. return err
  140. }
  141. fd.ContentSize = binary.LittleEndian.Uint64(buf[2:])
  142. }
  143. fd.Checksum = buf[len(buf)-1] // the checksum is the last byte
  144. buf = buf[:len(buf)-1] // all descriptor fields except checksum
  145. if c := descriptorChecksum(buf); fd.Checksum != c {
  146. return fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidHeaderChecksum, c, fd.Checksum)
  147. }
  148. // Validate the elements that can be.
  149. if idx := fd.Flags.BlockSizeIndex(); !idx.IsValid() {
  150. return lz4errors.ErrOptionInvalidBlockSize
  151. }
  152. return nil
  153. }
  154. func descriptorChecksum(buf []byte) byte {
  155. return byte(xxh32.ChecksumZero(buf) >> 8)
  156. }
  157. type Blocks struct {
  158. Block *FrameDataBlock
  159. Blocks chan chan *FrameDataBlock
  160. err error
  161. }
  162. func (b *Blocks) initW(f *Frame, dst io.Writer, num int) {
  163. size := f.Descriptor.Flags.BlockSizeIndex()
  164. if num == 1 {
  165. b.Blocks = nil
  166. b.Block = NewFrameDataBlock(size)
  167. return
  168. }
  169. b.Block = nil
  170. if cap(b.Blocks) != num {
  171. b.Blocks = make(chan chan *FrameDataBlock, num)
  172. }
  173. // goroutine managing concurrent block compression goroutines.
  174. go func() {
  175. // Process next block compression item.
  176. for c := range b.Blocks {
  177. // Read the next compressed block result.
  178. // Waiting here ensures that the blocks are output in the order they were sent.
  179. // The incoming channel is always closed as it indicates to the caller that
  180. // the block has been processed.
  181. block := <-c
  182. if block == nil {
  183. // Notify the block compression routine that we are done with its result.
  184. // This is used when a sentinel block is sent to terminate the compression.
  185. close(c)
  186. return
  187. }
  188. // Do not attempt to write the block upon any previous failure.
  189. if b.err == nil {
  190. // Write the block.
  191. if err := block.Write(f, dst); err != nil && b.err == nil {
  192. // Keep the first error.
  193. b.err = err
  194. // All pending compression goroutines need to shut down, so we need to keep going.
  195. }
  196. }
  197. close(c)
  198. }
  199. }()
  200. }
  201. func (b *Blocks) closeW(f *Frame, num int) error {
  202. if num == 1 {
  203. if b.Block == nil {
  204. // Not initialized yet.
  205. return nil
  206. }
  207. b.Block.CloseW(f)
  208. return nil
  209. }
  210. if b.Blocks == nil {
  211. // Not initialized yet.
  212. return nil
  213. }
  214. c := make(chan *FrameDataBlock)
  215. b.Blocks <- c
  216. c <- nil
  217. <-c
  218. err := b.err
  219. b.err = nil
  220. return err
  221. }
  222. func (b *Blocks) initR(f *Frame) {
  223. size := f.Descriptor.Flags.BlockSizeIndex()
  224. b.Block = NewFrameDataBlock(size)
  225. }
  226. func NewFrameDataBlock(size lz4block.BlockSizeIndex) *FrameDataBlock {
  227. buf := size.Get()
  228. return &FrameDataBlock{Data: buf, data: buf}
  229. }
  230. type FrameDataBlock struct {
  231. Size DataBlockSize
  232. Data []byte // compressed or uncompressed data (.data or .src)
  233. Checksum uint32
  234. data []byte // buffer for compressed data
  235. src []byte // uncompressed data
  236. }
  237. func (b *FrameDataBlock) CloseW(f *Frame) {
  238. if b.data != nil {
  239. // Block was not already closed.
  240. size := f.Descriptor.Flags.BlockSizeIndex()
  241. size.Put(b.data)
  242. b.Data = nil
  243. b.data = nil
  244. b.src = nil
  245. }
  246. }
  247. // Block compression errors are ignored since the buffer is sized appropriately.
  248. func (b *FrameDataBlock) Compress(f *Frame, src []byte, level lz4block.CompressionLevel) *FrameDataBlock {
  249. data := b.data[:len(src)] // trigger the incompressible flag in CompressBlock
  250. var n int
  251. switch level {
  252. case lz4block.Fast:
  253. n, _ = lz4block.CompressBlock(src, data, nil)
  254. default:
  255. n, _ = lz4block.CompressBlockHC(src, data, level, nil, nil)
  256. }
  257. if n == 0 {
  258. b.Size.UncompressedSet(true)
  259. b.Data = src
  260. } else {
  261. b.Size.UncompressedSet(false)
  262. b.Data = data[:n]
  263. }
  264. b.Size.sizeSet(len(b.Data))
  265. b.src = src // keep track of the source for content checksum
  266. if f.Descriptor.Flags.BlockChecksum() {
  267. b.Checksum = xxh32.ChecksumZero(src)
  268. }
  269. return b
  270. }
  271. func (b *FrameDataBlock) Write(f *Frame, dst io.Writer) error {
  272. if f.Descriptor.Flags.ContentChecksum() {
  273. _, _ = f.checksum.Write(b.src)
  274. }
  275. buf := f.buf[:]
  276. binary.LittleEndian.PutUint32(buf, uint32(b.Size))
  277. if _, err := dst.Write(buf[:4]); err != nil {
  278. return err
  279. }
  280. if _, err := dst.Write(b.Data); err != nil {
  281. return err
  282. }
  283. if b.Checksum == 0 {
  284. return nil
  285. }
  286. binary.LittleEndian.PutUint32(buf, b.Checksum)
  287. _, err := dst.Write(buf[:4])
  288. return err
  289. }
  290. func (b *FrameDataBlock) Uncompress(f *Frame, src io.Reader, dst []byte) (int, error) {
  291. x, err := f.readUint32(src)
  292. if err != nil {
  293. return 0, err
  294. }
  295. b.Size = DataBlockSize(x)
  296. if b.Size == 0 {
  297. // End of frame reached.
  298. return 0, io.EOF
  299. }
  300. isCompressed := !b.Size.Uncompressed()
  301. size := b.Size.size()
  302. var data []byte
  303. if isCompressed {
  304. // Data is first copied into b.Data and then it will get uncompressed into dst.
  305. data = b.Data
  306. } else {
  307. // Data is directly copied into dst as it is not compressed.
  308. data = dst
  309. }
  310. data = data[:size]
  311. if _, err := io.ReadFull(src, data); err != nil {
  312. return 0, err
  313. }
  314. if isCompressed {
  315. n, err := lz4block.UncompressBlock(data, dst)
  316. if err != nil {
  317. return 0, err
  318. }
  319. data = dst[:n]
  320. }
  321. if f.Descriptor.Flags.BlockChecksum() {
  322. var err error
  323. if b.Checksum, err = f.readUint32(src); err != nil {
  324. return 0, err
  325. }
  326. if c := xxh32.ChecksumZero(data); c != b.Checksum {
  327. return 0, fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidBlockChecksum, c, b.Checksum)
  328. }
  329. }
  330. if f.Descriptor.Flags.ContentChecksum() {
  331. _, _ = f.checksum.Write(data)
  332. }
  333. return len(data), nil
  334. }
  335. func (f *Frame) readUint32(r io.Reader) (x uint32, err error) {
  336. if _, err = io.ReadFull(r, f.buf[:4]); err != nil {
  337. return
  338. }
  339. x = binary.LittleEndian.Uint32(f.buf[:4])
  340. return
  341. }