fse.go 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. // Copyright 2018 Klaus Post. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
  5. // Package fse provides Finite State Entropy encoding and decoding.
  6. //
  7. // Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding
  8. // for byte blocks as implemented in zstd.
  9. //
  10. // See https://github.com/klauspost/compress/tree/master/fse for more information.
  11. package fse
  12. import (
  13. "errors"
  14. "fmt"
  15. "math/bits"
  16. )
  17. const (
  18. /*!MEMORY_USAGE :
  19. * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
  20. * Increasing memory usage improves compression ratio
  21. * Reduced memory usage can improve speed, due to cache effect
  22. * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
  23. maxMemoryUsage = 14
  24. defaultMemoryUsage = 13
  25. maxTableLog = maxMemoryUsage - 2
  26. maxTablesize = 1 << maxTableLog
  27. defaultTablelog = defaultMemoryUsage - 2
  28. minTablelog = 5
  29. maxSymbolValue = 255
  30. )
  31. var (
  32. // ErrIncompressible is returned when input is judged to be too hard to compress.
  33. ErrIncompressible = errors.New("input is not compressible")
  34. // ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
  35. ErrUseRLE = errors.New("input is single value repeated")
  36. )
  37. // Scratch provides temporary storage for compression and decompression.
  38. type Scratch struct {
  39. // Private
  40. count [maxSymbolValue + 1]uint32
  41. norm [maxSymbolValue + 1]int16
  42. br byteReader
  43. bits bitReader
  44. bw bitWriter
  45. ct cTable // Compression tables.
  46. decTable []decSymbol // Decompression table.
  47. maxCount int // count of the most probable symbol
  48. // Per block parameters.
  49. // These can be used to override compression parameters of the block.
  50. // Do not touch, unless you know what you are doing.
  51. // Out is output buffer.
  52. // If the scratch is re-used before the caller is done processing the output,
  53. // set this field to nil.
  54. // Otherwise the output buffer will be re-used for next Compression/Decompression step
  55. // and allocation will be avoided.
  56. Out []byte
  57. // DecompressLimit limits the maximum decoded size acceptable.
  58. // If > 0 decompression will stop when approximately this many bytes
  59. // has been decoded.
  60. // If 0, maximum size will be 2GB.
  61. DecompressLimit int
  62. symbolLen uint16 // Length of active part of the symbol table.
  63. actualTableLog uint8 // Selected tablelog.
  64. zeroBits bool // no bits has prob > 50%.
  65. clearCount bool // clear count
  66. // MaxSymbolValue will override the maximum symbol value of the next block.
  67. MaxSymbolValue uint8
  68. // TableLog will attempt to override the tablelog for the next block.
  69. TableLog uint8
  70. }
  71. // Histogram allows to populate the histogram and skip that step in the compression,
  72. // It otherwise allows to inspect the histogram when compression is done.
  73. // To indicate that you have populated the histogram call HistogramFinished
  74. // with the value of the highest populated symbol, as well as the number of entries
  75. // in the most populated entry. These are accepted at face value.
  76. // The returned slice will always be length 256.
  77. func (s *Scratch) Histogram() []uint32 {
  78. return s.count[:]
  79. }
  80. // HistogramFinished can be called to indicate that the histogram has been populated.
  81. // maxSymbol is the index of the highest set symbol of the next data segment.
  82. // maxCount is the number of entries in the most populated entry.
  83. // These are accepted at face value.
  84. func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) {
  85. s.maxCount = maxCount
  86. s.symbolLen = uint16(maxSymbol) + 1
  87. s.clearCount = maxCount != 0
  88. }
  89. // prepare will prepare and allocate scratch tables used for both compression and decompression.
  90. func (s *Scratch) prepare(in []byte) (*Scratch, error) {
  91. if s == nil {
  92. s = &Scratch{}
  93. }
  94. if s.MaxSymbolValue == 0 {
  95. s.MaxSymbolValue = 255
  96. }
  97. if s.TableLog == 0 {
  98. s.TableLog = defaultTablelog
  99. }
  100. if s.TableLog > maxTableLog {
  101. return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog)
  102. }
  103. if cap(s.Out) == 0 {
  104. s.Out = make([]byte, 0, len(in))
  105. }
  106. if s.clearCount && s.maxCount == 0 {
  107. for i := range s.count {
  108. s.count[i] = 0
  109. }
  110. s.clearCount = false
  111. }
  112. s.br.init(in)
  113. if s.DecompressLimit == 0 {
  114. // Max size 2GB.
  115. s.DecompressLimit = (2 << 30) - 1
  116. }
  117. return s, nil
  118. }
  119. // tableStep returns the next table index.
  120. func tableStep(tableSize uint32) uint32 {
  121. return (tableSize >> 1) + (tableSize >> 3) + 3
  122. }
  123. func highBits(val uint32) (n uint32) {
  124. return uint32(bits.Len32(val) - 1)
  125. }