stateless.go 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. package flate
  2. import (
  3. "io"
  4. "math"
  5. "sync"
  6. )
  7. const (
  8. maxStatelessBlock = math.MaxInt16
  9. // dictionary will be taken from maxStatelessBlock, so limit it.
  10. maxStatelessDict = 8 << 10
  11. slTableBits = 13
  12. slTableSize = 1 << slTableBits
  13. slTableShift = 32 - slTableBits
  14. )
  15. type statelessWriter struct {
  16. dst io.Writer
  17. closed bool
  18. }
  19. func (s *statelessWriter) Close() error {
  20. if s.closed {
  21. return nil
  22. }
  23. s.closed = true
  24. // Emit EOF block
  25. return StatelessDeflate(s.dst, nil, true, nil)
  26. }
  27. func (s *statelessWriter) Write(p []byte) (n int, err error) {
  28. err = StatelessDeflate(s.dst, p, false, nil)
  29. if err != nil {
  30. return 0, err
  31. }
  32. return len(p), nil
  33. }
  34. func (s *statelessWriter) Reset(w io.Writer) {
  35. s.dst = w
  36. s.closed = false
  37. }
  38. // NewStatelessWriter will do compression but without maintaining any state
  39. // between Write calls.
  40. // There will be no memory kept between Write calls,
  41. // but compression and speed will be suboptimal.
  42. // Because of this, the size of actual Write calls will affect output size.
  43. func NewStatelessWriter(dst io.Writer) io.WriteCloser {
  44. return &statelessWriter{dst: dst}
  45. }
  46. // bitWriterPool contains bit writers that can be reused.
  47. var bitWriterPool = sync.Pool{
  48. New: func() interface{} {
  49. return newHuffmanBitWriter(nil)
  50. },
  51. }
  52. // StatelessDeflate allows to compress directly to a Writer without retaining state.
  53. // When returning everything will be flushed.
  54. // Up to 8KB of an optional dictionary can be given which is presumed to presumed to precede the block.
  55. // Longer dictionaries will be truncated and will still produce valid output.
  56. // Sending nil dictionary is perfectly fine.
  57. func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
  58. var dst tokens
  59. bw := bitWriterPool.Get().(*huffmanBitWriter)
  60. bw.reset(out)
  61. defer func() {
  62. // don't keep a reference to our output
  63. bw.reset(nil)
  64. bitWriterPool.Put(bw)
  65. }()
  66. if eof && len(in) == 0 {
  67. // Just write an EOF block.
  68. // Could be faster...
  69. bw.writeStoredHeader(0, true)
  70. bw.flush()
  71. return bw.err
  72. }
  73. // Truncate dict
  74. if len(dict) > maxStatelessDict {
  75. dict = dict[len(dict)-maxStatelessDict:]
  76. }
  77. for len(in) > 0 {
  78. todo := in
  79. if len(todo) > maxStatelessBlock-len(dict) {
  80. todo = todo[:maxStatelessBlock-len(dict)]
  81. }
  82. in = in[len(todo):]
  83. uncompressed := todo
  84. if len(dict) > 0 {
  85. // combine dict and source
  86. bufLen := len(todo) + len(dict)
  87. combined := make([]byte, bufLen)
  88. copy(combined, dict)
  89. copy(combined[len(dict):], todo)
  90. todo = combined
  91. }
  92. // Compress
  93. statelessEnc(&dst, todo, int16(len(dict)))
  94. isEof := eof && len(in) == 0
  95. if dst.n == 0 {
  96. bw.writeStoredHeader(len(uncompressed), isEof)
  97. if bw.err != nil {
  98. return bw.err
  99. }
  100. bw.writeBytes(uncompressed)
  101. } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
  102. // If we removed less than 1/16th, huffman compress the block.
  103. bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
  104. } else {
  105. bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
  106. }
  107. if len(in) > 0 {
  108. // Retain a dict if we have more
  109. dict = todo[len(todo)-maxStatelessDict:]
  110. dst.Reset()
  111. }
  112. if bw.err != nil {
  113. return bw.err
  114. }
  115. }
  116. if !eof {
  117. // Align, only a stored block can do that.
  118. bw.writeStoredHeader(0, false)
  119. }
  120. bw.flush()
  121. return bw.err
  122. }
  123. func hashSL(u uint32) uint32 {
  124. return (u * 0x1e35a7bd) >> slTableShift
  125. }
  126. func load3216(b []byte, i int16) uint32 {
  127. // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
  128. b = b[i:]
  129. b = b[:4]
  130. return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
  131. }
  132. func load6416(b []byte, i int16) uint64 {
  133. // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
  134. b = b[i:]
  135. b = b[:8]
  136. return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
  137. uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
  138. }
  139. func statelessEnc(dst *tokens, src []byte, startAt int16) {
  140. const (
  141. inputMargin = 12 - 1
  142. minNonLiteralBlockSize = 1 + 1 + inputMargin
  143. )
  144. type tableEntry struct {
  145. offset int16
  146. }
  147. var table [slTableSize]tableEntry
  148. // This check isn't in the Snappy implementation, but there, the caller
  149. // instead of the callee handles this case.
  150. if len(src)-int(startAt) < minNonLiteralBlockSize {
  151. // We do not fill the token table.
  152. // This will be picked up by caller.
  153. dst.n = 0
  154. return
  155. }
  156. // Index until startAt
  157. if startAt > 0 {
  158. cv := load3232(src, 0)
  159. for i := int16(0); i < startAt; i++ {
  160. table[hashSL(cv)] = tableEntry{offset: i}
  161. cv = (cv >> 8) | (uint32(src[i+4]) << 24)
  162. }
  163. }
  164. s := startAt + 1
  165. nextEmit := startAt
  166. // sLimit is when to stop looking for offset/length copies. The inputMargin
  167. // lets us use a fast path for emitLiteral in the main loop, while we are
  168. // looking for copies.
  169. sLimit := int16(len(src) - inputMargin)
  170. // nextEmit is where in src the next emitLiteral should start from.
  171. cv := load3216(src, s)
  172. for {
  173. const skipLog = 5
  174. const doEvery = 2
  175. nextS := s
  176. var candidate tableEntry
  177. for {
  178. nextHash := hashSL(cv)
  179. candidate = table[nextHash]
  180. nextS = s + doEvery + (s-nextEmit)>>skipLog
  181. if nextS > sLimit || nextS <= 0 {
  182. goto emitRemainder
  183. }
  184. now := load6416(src, nextS)
  185. table[nextHash] = tableEntry{offset: s}
  186. nextHash = hashSL(uint32(now))
  187. if cv == load3216(src, candidate.offset) {
  188. table[nextHash] = tableEntry{offset: nextS}
  189. break
  190. }
  191. // Do one right away...
  192. cv = uint32(now)
  193. s = nextS
  194. nextS++
  195. candidate = table[nextHash]
  196. now >>= 8
  197. table[nextHash] = tableEntry{offset: s}
  198. if cv == load3216(src, candidate.offset) {
  199. table[nextHash] = tableEntry{offset: nextS}
  200. break
  201. }
  202. cv = uint32(now)
  203. s = nextS
  204. }
  205. // A 4-byte match has been found. We'll later see if more than 4 bytes
  206. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  207. // them as literal bytes.
  208. for {
  209. // Invariant: we have a 4-byte match at s, and no need to emit any
  210. // literal bytes prior to s.
  211. // Extend the 4-byte match as long as possible.
  212. t := candidate.offset
  213. l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
  214. // Extend backwards
  215. for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
  216. s--
  217. t--
  218. l++
  219. }
  220. if nextEmit < s {
  221. emitLiteral(dst, src[nextEmit:s])
  222. }
  223. // Save the match found
  224. dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
  225. s += l
  226. nextEmit = s
  227. if nextS >= s {
  228. s = nextS + 1
  229. }
  230. if s >= sLimit {
  231. goto emitRemainder
  232. }
  233. // We could immediately start working at s now, but to improve
  234. // compression we first update the hash table at s-2 and at s. If
  235. // another emitCopy is not our next move, also calculate nextHash
  236. // at s+1. At least on GOARCH=amd64, these three hash calculations
  237. // are faster as one load64 call (with some shifts) instead of
  238. // three load32 calls.
  239. x := load6416(src, s-2)
  240. o := s - 2
  241. prevHash := hashSL(uint32(x))
  242. table[prevHash] = tableEntry{offset: o}
  243. x >>= 16
  244. currHash := hashSL(uint32(x))
  245. candidate = table[currHash]
  246. table[currHash] = tableEntry{offset: o + 2}
  247. if uint32(x) != load3216(src, candidate.offset) {
  248. cv = uint32(x >> 8)
  249. s++
  250. break
  251. }
  252. }
  253. }
  254. emitRemainder:
  255. if int(nextEmit) < len(src) {
  256. // If nothing was added, don't encode literals.
  257. if dst.n == 0 {
  258. return
  259. }
  260. emitLiteral(dst, src[nextEmit:])
  261. }
  262. }