lz4.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. // Package lz4 implements reading and writing lz4 compressed data (a frame),
  2. // as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html,
  3. // using an io.Reader (decompression) and io.Writer (compression).
  4. // It is designed to minimize memory usage while maximizing throughput by being able to
  5. // [de]compress data concurrently.
  6. //
  7. // The Reader and the Writer support concurrent processing provided the supplied buffers are
  8. // large enough (in multiples of BlockMaxSize) and there is no block dependency.
  9. // Reader.WriteTo and Writer.ReadFrom do leverage the concurrency transparently.
  10. // The runtime.GOMAXPROCS() value is used to apply concurrency or not.
  11. //
  12. // Although the block level compression and decompression functions are exposed and are fully compatible
  13. // with the lz4 block format definition, they are low level and should not be used directly.
  14. // For a complete description of an lz4 compressed block, see:
  15. // http://fastcompression.blogspot.fr/2011/05/lz4-explained.html
  16. //
  17. // See https://github.com/Cyan4973/lz4 for the reference C implementation.
  18. package lz4
  19. import (
  20. "hash"
  21. "sync"
  22. "unsafe"
  23. "github.com/pierrec/xxHash/xxHash32"
  24. )
  25. const (
  26. // Extension is the LZ4 frame file name extension
  27. Extension = ".lz4"
  28. // Version is the LZ4 frame format version
  29. Version = 1
  30. frameMagic = uint32(0x184D2204)
  31. frameSkipMagic = uint32(0x184D2A50)
  32. // The following constants are used to setup the compression algorithm.
  33. minMatch = 4 // the minimum size of the match sequence size (4 bytes)
  34. winSizeLog = 16 // LZ4 64Kb window size limit
  35. winSize = 1 << winSizeLog
  36. winMask = winSize - 1 // 64Kb window of previous data for dependent blocks
  37. // hashLog determines the size of the hash table used to quickly find a previous match position.
  38. // Its value influences the compression speed and memory usage, the lower the faster,
  39. // but at the expense of the compression ratio.
  40. // 16 seems to be the best compromise.
  41. hashLog = 16
  42. hashTableSize = 1 << hashLog
  43. hashShift = uint((minMatch * 8) - hashLog)
  44. mfLimit = 8 + minMatch // The last match cannot start within the last 12 bytes.
  45. skipStrength = 6 // variable step for fast scan
  46. hasher = uint32(2654435761) // prime number used to hash minMatch
  47. )
  48. // map the block max size id with its value in bytes: 64Kb, 256Kb, 1Mb and 4Mb.
  49. var bsMapID = map[byte]int{4: 64 << 10, 5: 256 << 10, 6: 1 << 20, 7: 4 << 20}
  50. var bsMapValue = map[int]byte{}
  51. // Reversed.
  52. func init() {
  53. for i, v := range bsMapID {
  54. bsMapValue[v] = i
  55. }
  56. }
  57. var isLittleEndian = getIsLittleEndian()
  58. func getIsLittleEndian() (ret bool) {
  59. var i int = 0x1
  60. bs := (*[1]byte)(unsafe.Pointer(&i))
  61. if bs[0] == 0 {
  62. return false
  63. }
  64. return true
  65. }
  66. // Header describes the various flags that can be set on a Writer or obtained from a Reader.
  67. // The default values match those of the LZ4 frame format definition (http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html).
  68. //
  69. // NB. in a Reader, in case of concatenated frames, the Header values may change between Read() calls.
  70. // It is the caller responsibility to check them if necessary (typically when using the Reader concurrency).
  71. type Header struct {
  72. BlockDependency bool // compressed blocks are dependent (one block depends on the last 64Kb of the previous one)
  73. BlockChecksum bool // compressed blocks are checksumed
  74. NoChecksum bool // frame checksum
  75. BlockMaxSize int // the size of the decompressed data block (one of [64KB, 256KB, 1MB, 4MB]). Default=4MB.
  76. Size uint64 // the frame total size. It is _not_ computed by the Writer.
  77. HighCompression bool // use high compression (only for the Writer)
  78. done bool // whether the descriptor was processed (Read or Write and checked)
  79. // Removed as not supported
  80. // Dict bool // a dictionary id is to be used
  81. // DictID uint32 // the dictionary id read from the frame, if any.
  82. }
  83. // xxhPool wraps the standard pool for xxHash items.
  84. // Putting items back in the pool automatically resets them.
  85. type xxhPool struct {
  86. sync.Pool
  87. }
  88. func (p *xxhPool) Get() hash.Hash32 {
  89. return p.Pool.Get().(hash.Hash32)
  90. }
  91. func (p *xxhPool) Put(h hash.Hash32) {
  92. h.Reset()
  93. p.Pool.Put(h)
  94. }
  95. // hashPool is used by readers and writers and contains xxHash items.
  96. var hashPool = xxhPool{
  97. Pool: sync.Pool{
  98. New: func() interface{} { return xxHash32.New(0) },
  99. },
  100. }