level3.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. package flate
  2. import "fmt"
  3. // fastEncL3
  4. type fastEncL3 struct {
  5. fastGen
  6. table [tableSize]tableEntryPrev
  7. }
  8. // Encode uses a similar algorithm to level 2, will check up to two candidates.
  9. func (e *fastEncL3) Encode(dst *tokens, src []byte) {
  10. const (
  11. inputMargin = 8 - 1
  12. minNonLiteralBlockSize = 1 + 1 + inputMargin
  13. )
  14. if debugDeflate && e.cur < 0 {
  15. panic(fmt.Sprint("e.cur < 0: ", e.cur))
  16. }
  17. // Protect against e.cur wraparound.
  18. for e.cur >= bufferReset {
  19. if len(e.hist) == 0 {
  20. for i := range e.table[:] {
  21. e.table[i] = tableEntryPrev{}
  22. }
  23. e.cur = maxMatchOffset
  24. break
  25. }
  26. // Shift down everything in the table that isn't already too far away.
  27. minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
  28. for i := range e.table[:] {
  29. v := e.table[i]
  30. if v.Cur.offset <= minOff {
  31. v.Cur.offset = 0
  32. } else {
  33. v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
  34. }
  35. if v.Prev.offset <= minOff {
  36. v.Prev.offset = 0
  37. } else {
  38. v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
  39. }
  40. e.table[i] = v
  41. }
  42. e.cur = maxMatchOffset
  43. }
  44. s := e.addBlock(src)
  45. // Skip if too small.
  46. if len(src) < minNonLiteralBlockSize {
  47. // We do not fill the token table.
  48. // This will be picked up by caller.
  49. dst.n = uint16(len(src))
  50. return
  51. }
  52. // Override src
  53. src = e.hist
  54. nextEmit := s
  55. // sLimit is when to stop looking for offset/length copies. The inputMargin
  56. // lets us use a fast path for emitLiteral in the main loop, while we are
  57. // looking for copies.
  58. sLimit := int32(len(src) - inputMargin)
  59. // nextEmit is where in src the next emitLiteral should start from.
  60. cv := load3232(src, s)
  61. for {
  62. const skipLog = 6
  63. nextS := s
  64. var candidate tableEntry
  65. for {
  66. nextHash := hash(cv)
  67. s = nextS
  68. nextS = s + 1 + (s-nextEmit)>>skipLog
  69. if nextS > sLimit {
  70. goto emitRemainder
  71. }
  72. candidates := e.table[nextHash]
  73. now := load3232(src, nextS)
  74. // Safe offset distance until s + 4...
  75. minOffset := e.cur + s - (maxMatchOffset - 4)
  76. e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}}
  77. // Check both candidates
  78. candidate = candidates.Cur
  79. if candidate.offset < minOffset {
  80. cv = now
  81. // Previous will also be invalid, we have nothing.
  82. continue
  83. }
  84. if cv == load3232(src, candidate.offset-e.cur) {
  85. if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) {
  86. break
  87. }
  88. // Both match and are valid, pick longest.
  89. offset := s - (candidate.offset - e.cur)
  90. o2 := s - (candidates.Prev.offset - e.cur)
  91. l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
  92. if l2 > l1 {
  93. candidate = candidates.Prev
  94. }
  95. break
  96. } else {
  97. // We only check if value mismatches.
  98. // Offset will always be invalid in other cases.
  99. candidate = candidates.Prev
  100. if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
  101. break
  102. }
  103. }
  104. cv = now
  105. }
  106. // Call emitCopy, and then see if another emitCopy could be our next
  107. // move. Repeat until we find no match for the input immediately after
  108. // what was consumed by the last emitCopy call.
  109. //
  110. // If we exit this loop normally then we need to call emitLiteral next,
  111. // though we don't yet know how big the literal will be. We handle that
  112. // by proceeding to the next iteration of the main loop. We also can
  113. // exit this loop via goto if we get close to exhausting the input.
  114. for {
  115. // Invariant: we have a 4-byte match at s, and no need to emit any
  116. // literal bytes prior to s.
  117. // Extend the 4-byte match as long as possible.
  118. //
  119. t := candidate.offset - e.cur
  120. l := e.matchlenLong(s+4, t+4, src) + 4
  121. // Extend backwards
  122. for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
  123. s--
  124. t--
  125. l++
  126. }
  127. if nextEmit < s {
  128. emitLiteral(dst, src[nextEmit:s])
  129. }
  130. dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
  131. s += l
  132. nextEmit = s
  133. if nextS >= s {
  134. s = nextS + 1
  135. }
  136. if s >= sLimit {
  137. t += l
  138. // Index first pair after match end.
  139. if int(t+4) < len(src) && t > 0 {
  140. cv := load3232(src, t)
  141. nextHash := hash(cv)
  142. e.table[nextHash] = tableEntryPrev{
  143. Prev: e.table[nextHash].Cur,
  144. Cur: tableEntry{offset: e.cur + t},
  145. }
  146. }
  147. goto emitRemainder
  148. }
  149. // We could immediately start working at s now, but to improve
  150. // compression we first update the hash table at s-3 to s.
  151. x := load6432(src, s-3)
  152. prevHash := hash(uint32(x))
  153. e.table[prevHash] = tableEntryPrev{
  154. Prev: e.table[prevHash].Cur,
  155. Cur: tableEntry{offset: e.cur + s - 3},
  156. }
  157. x >>= 8
  158. prevHash = hash(uint32(x))
  159. e.table[prevHash] = tableEntryPrev{
  160. Prev: e.table[prevHash].Cur,
  161. Cur: tableEntry{offset: e.cur + s - 2},
  162. }
  163. x >>= 8
  164. prevHash = hash(uint32(x))
  165. e.table[prevHash] = tableEntryPrev{
  166. Prev: e.table[prevHash].Cur,
  167. Cur: tableEntry{offset: e.cur + s - 1},
  168. }
  169. x >>= 8
  170. currHash := hash(uint32(x))
  171. candidates := e.table[currHash]
  172. cv = uint32(x)
  173. e.table[currHash] = tableEntryPrev{
  174. Prev: candidates.Cur,
  175. Cur: tableEntry{offset: s + e.cur},
  176. }
  177. // Check both candidates
  178. candidate = candidates.Cur
  179. minOffset := e.cur + s - (maxMatchOffset - 4)
  180. if candidate.offset > minOffset && cv != load3232(src, candidate.offset-e.cur) {
  181. // We only check if value mismatches.
  182. // Offset will always be invalid in other cases.
  183. candidate = candidates.Prev
  184. if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
  185. offset := s - (candidate.offset - e.cur)
  186. if offset <= maxMatchOffset {
  187. continue
  188. }
  189. }
  190. }
  191. cv = uint32(x >> 8)
  192. s++
  193. break
  194. }
  195. }
  196. emitRemainder:
  197. if int(nextEmit) < len(src) {
  198. // If nothing was added, don't encode literals.
  199. if dst.n == 0 {
  200. return
  201. }
  202. emitLiteral(dst, src[nextEmit:])
  203. }
  204. }