encoder_options.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. package zstd
  2. import (
  3. "errors"
  4. "fmt"
  5. "runtime"
  6. "strings"
  7. )
  8. // EOption is an option for creating a encoder.
  9. type EOption func(*encoderOptions) error
  10. // options retains accumulated state of multiple options.
  11. type encoderOptions struct {
  12. concurrent int
  13. level EncoderLevel
  14. single *bool
  15. pad int
  16. blockSize int
  17. windowSize int
  18. crc bool
  19. fullZero bool
  20. noEntropy bool
  21. customWindow bool
  22. }
  23. func (o *encoderOptions) setDefault() {
  24. *o = encoderOptions{
  25. // use less ram: true for now, but may change.
  26. concurrent: runtime.GOMAXPROCS(0),
  27. crc: true,
  28. single: nil,
  29. blockSize: 1 << 16,
  30. windowSize: 8 << 20,
  31. level: SpeedDefault,
  32. }
  33. }
  34. // encoder returns an encoder with the selected options.
  35. func (o encoderOptions) encoder() encoder {
  36. switch o.level {
  37. case SpeedDefault:
  38. return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}}
  39. case SpeedBetterCompression:
  40. return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
  41. case SpeedFastest:
  42. return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
  43. }
  44. panic("unknown compression level")
  45. }
  46. // WithEncoderCRC will add CRC value to output.
  47. // Output will be 4 bytes larger.
  48. func WithEncoderCRC(b bool) EOption {
  49. return func(o *encoderOptions) error { o.crc = b; return nil }
  50. }
  51. // WithEncoderConcurrency will set the concurrency,
  52. // meaning the maximum number of decoders to run concurrently.
  53. // The value supplied must be at least 1.
  54. // By default this will be set to GOMAXPROCS.
  55. func WithEncoderConcurrency(n int) EOption {
  56. return func(o *encoderOptions) error {
  57. if n <= 0 {
  58. return fmt.Errorf("concurrency must be at least 1")
  59. }
  60. o.concurrent = n
  61. return nil
  62. }
  63. }
  64. // WithWindowSize will set the maximum allowed back-reference distance.
  65. // The value must be a power of two between MinWindowSize and MaxWindowSize.
  66. // A larger value will enable better compression but allocate more memory and,
  67. // for above-default values, take considerably longer.
  68. // The default value is determined by the compression level.
  69. func WithWindowSize(n int) EOption {
  70. return func(o *encoderOptions) error {
  71. switch {
  72. case n < MinWindowSize:
  73. return fmt.Errorf("window size must be at least %d", MinWindowSize)
  74. case n > MaxWindowSize:
  75. return fmt.Errorf("window size must be at most %d", MaxWindowSize)
  76. case (n & (n - 1)) != 0:
  77. return errors.New("window size must be a power of 2")
  78. }
  79. o.windowSize = n
  80. o.customWindow = true
  81. if o.blockSize > o.windowSize {
  82. o.blockSize = o.windowSize
  83. }
  84. return nil
  85. }
  86. }
  87. // WithEncoderPadding will add padding to all output so the size will be a multiple of n.
  88. // This can be used to obfuscate the exact output size or make blocks of a certain size.
  89. // The contents will be a skippable frame, so it will be invisible by the decoder.
  90. // n must be > 0 and <= 1GB, 1<<30 bytes.
  91. // The padded area will be filled with data from crypto/rand.Reader.
  92. // If `EncodeAll` is used with data already in the destination, the total size will be multiple of this.
  93. func WithEncoderPadding(n int) EOption {
  94. return func(o *encoderOptions) error {
  95. if n <= 0 {
  96. return fmt.Errorf("padding must be at least 1")
  97. }
  98. // No need to waste our time.
  99. if n == 1 {
  100. o.pad = 0
  101. }
  102. if n > 1<<30 {
  103. return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
  104. }
  105. o.pad = n
  106. return nil
  107. }
  108. }
  109. // EncoderLevel predefines encoder compression levels.
  110. // Only use the constants made available, since the actual mapping
  111. // of these values are very likely to change and your compression could change
  112. // unpredictably when upgrading the library.
  113. type EncoderLevel int
  114. const (
  115. speedNotSet EncoderLevel = iota
  116. // SpeedFastest will choose the fastest reasonable compression.
  117. // This is roughly equivalent to the fastest Zstandard mode.
  118. SpeedFastest
  119. // SpeedDefault is the default "pretty fast" compression option.
  120. // This is roughly equivalent to the default Zstandard mode (level 3).
  121. SpeedDefault
  122. // SpeedBetterCompression will yield better compression than the default.
  123. // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
  124. // By using this, notice that CPU usage may go up in the future.
  125. SpeedBetterCompression
  126. // speedLast should be kept as the last actual compression option.
  127. // The is not for external usage, but is used to keep track of the valid options.
  128. speedLast
  129. // SpeedBestCompression will choose the best available compression option.
  130. // For now this is not implemented.
  131. SpeedBestCompression = SpeedBetterCompression
  132. )
  133. // EncoderLevelFromString will convert a string representation of an encoding level back
  134. // to a compression level. The compare is not case sensitive.
  135. // If the string wasn't recognized, (false, SpeedDefault) will be returned.
  136. func EncoderLevelFromString(s string) (bool, EncoderLevel) {
  137. for l := EncoderLevel(speedNotSet + 1); l < speedLast; l++ {
  138. if strings.EqualFold(s, l.String()) {
  139. return true, l
  140. }
  141. }
  142. return false, SpeedDefault
  143. }
  144. // EncoderLevelFromZstd will return an encoder level that closest matches the compression
  145. // ratio of a specific zstd compression level.
  146. // Many input values will provide the same compression level.
  147. func EncoderLevelFromZstd(level int) EncoderLevel {
  148. switch {
  149. case level < 3:
  150. return SpeedFastest
  151. case level >= 3 && level < 6:
  152. return SpeedDefault
  153. case level > 5:
  154. return SpeedBetterCompression
  155. }
  156. return SpeedDefault
  157. }
  158. // String provides a string representation of the compression level.
  159. func (e EncoderLevel) String() string {
  160. switch e {
  161. case SpeedFastest:
  162. return "fastest"
  163. case SpeedDefault:
  164. return "default"
  165. case SpeedBetterCompression:
  166. return "better"
  167. default:
  168. return "invalid"
  169. }
  170. }
  171. // WithEncoderLevel specifies a predefined compression level.
  172. func WithEncoderLevel(l EncoderLevel) EOption {
  173. return func(o *encoderOptions) error {
  174. switch {
  175. case l <= speedNotSet || l >= speedLast:
  176. return fmt.Errorf("unknown encoder level")
  177. }
  178. o.level = l
  179. if !o.customWindow {
  180. switch o.level {
  181. case SpeedFastest:
  182. o.windowSize = 4 << 20
  183. case SpeedDefault:
  184. o.windowSize = 8 << 20
  185. case SpeedBetterCompression:
  186. o.windowSize = 16 << 20
  187. }
  188. }
  189. return nil
  190. }
  191. }
  192. // WithZeroFrames will encode 0 length input as full frames.
  193. // This can be needed for compatibility with zstandard usage,
  194. // but is not needed for this package.
  195. func WithZeroFrames(b bool) EOption {
  196. return func(o *encoderOptions) error {
  197. o.fullZero = b
  198. return nil
  199. }
  200. }
  201. // WithNoEntropyCompression will always skip entropy compression of literals.
  202. // This can be useful if content has matches, but unlikely to benefit from entropy
  203. // compression. Usually the slight speed improvement is not worth enabling this.
  204. func WithNoEntropyCompression(b bool) EOption {
  205. return func(o *encoderOptions) error {
  206. o.noEntropy = b
  207. return nil
  208. }
  209. }
  210. // WithSingleSegment will set the "single segment" flag when EncodeAll is used.
  211. // If this flag is set, data must be regenerated within a single continuous memory segment.
  212. // In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present.
  213. // As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content.
  214. // In order to preserve the decoder from unreasonable memory requirements,
  215. // a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
  216. // For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
  217. // This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
  218. // If this is not specified, block encodes will automatically choose this based on the input size.
  219. // This setting has no effect on streamed encodes.
  220. func WithSingleSegment(b bool) EOption {
  221. return func(o *encoderOptions) error {
  222. o.single = &b
  223. return nil
  224. }
  225. }