level5.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. package flate
  2. import "fmt"
  3. type fastEncL5 struct {
  4. fastGen
  5. table [tableSize]tableEntry
  6. bTable [tableSize]tableEntryPrev
  7. }
  8. func (e *fastEncL5) Encode(dst *tokens, src []byte) {
  9. const (
  10. inputMargin = 12 - 1
  11. minNonLiteralBlockSize = 1 + 1 + inputMargin
  12. )
  13. if debugDeflate && e.cur < 0 {
  14. panic(fmt.Sprint("e.cur < 0: ", e.cur))
  15. }
  16. // Protect against e.cur wraparound.
  17. for e.cur >= bufferReset {
  18. if len(e.hist) == 0 {
  19. for i := range e.table[:] {
  20. e.table[i] = tableEntry{}
  21. }
  22. for i := range e.bTable[:] {
  23. e.bTable[i] = tableEntryPrev{}
  24. }
  25. e.cur = maxMatchOffset
  26. break
  27. }
  28. // Shift down everything in the table that isn't already too far away.
  29. minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
  30. for i := range e.table[:] {
  31. v := e.table[i].offset
  32. if v <= minOff {
  33. v = 0
  34. } else {
  35. v = v - e.cur + maxMatchOffset
  36. }
  37. e.table[i].offset = v
  38. }
  39. for i := range e.bTable[:] {
  40. v := e.bTable[i]
  41. if v.Cur.offset <= minOff {
  42. v.Cur.offset = 0
  43. v.Prev.offset = 0
  44. } else {
  45. v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
  46. if v.Prev.offset <= minOff {
  47. v.Prev.offset = 0
  48. } else {
  49. v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
  50. }
  51. }
  52. e.bTable[i] = v
  53. }
  54. e.cur = maxMatchOffset
  55. }
  56. s := e.addBlock(src)
  57. // This check isn't in the Snappy implementation, but there, the caller
  58. // instead of the callee handles this case.
  59. if len(src) < minNonLiteralBlockSize {
  60. // We do not fill the token table.
  61. // This will be picked up by caller.
  62. dst.n = uint16(len(src))
  63. return
  64. }
  65. // Override src
  66. src = e.hist
  67. nextEmit := s
  68. // sLimit is when to stop looking for offset/length copies. The inputMargin
  69. // lets us use a fast path for emitLiteral in the main loop, while we are
  70. // looking for copies.
  71. sLimit := int32(len(src) - inputMargin)
  72. // nextEmit is where in src the next emitLiteral should start from.
  73. cv := load6432(src, s)
  74. for {
  75. const skipLog = 6
  76. const doEvery = 1
  77. nextS := s
  78. var l int32
  79. var t int32
  80. for {
  81. nextHashS := hash4x64(cv, tableBits)
  82. nextHashL := hash7(cv, tableBits)
  83. s = nextS
  84. nextS = s + doEvery + (s-nextEmit)>>skipLog
  85. if nextS > sLimit {
  86. goto emitRemainder
  87. }
  88. // Fetch a short+long candidate
  89. sCandidate := e.table[nextHashS]
  90. lCandidate := e.bTable[nextHashL]
  91. next := load6432(src, nextS)
  92. entry := tableEntry{offset: s + e.cur}
  93. e.table[nextHashS] = entry
  94. eLong := &e.bTable[nextHashL]
  95. eLong.Cur, eLong.Prev = entry, eLong.Cur
  96. nextHashS = hash4x64(next, tableBits)
  97. nextHashL = hash7(next, tableBits)
  98. t = lCandidate.Cur.offset - e.cur
  99. if s-t < maxMatchOffset {
  100. if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
  101. // Store the next match
  102. e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
  103. eLong := &e.bTable[nextHashL]
  104. eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
  105. t2 := lCandidate.Prev.offset - e.cur
  106. if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
  107. l = e.matchlen(s+4, t+4, src) + 4
  108. ml1 := e.matchlen(s+4, t2+4, src) + 4
  109. if ml1 > l {
  110. t = t2
  111. l = ml1
  112. break
  113. }
  114. }
  115. break
  116. }
  117. t = lCandidate.Prev.offset - e.cur
  118. if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
  119. // Store the next match
  120. e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
  121. eLong := &e.bTable[nextHashL]
  122. eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
  123. break
  124. }
  125. }
  126. t = sCandidate.offset - e.cur
  127. if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
  128. // Found a 4 match...
  129. l = e.matchlen(s+4, t+4, src) + 4
  130. lCandidate = e.bTable[nextHashL]
  131. // Store the next match
  132. e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
  133. eLong := &e.bTable[nextHashL]
  134. eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
  135. // If the next long is a candidate, use that...
  136. t2 := lCandidate.Cur.offset - e.cur
  137. if nextS-t2 < maxMatchOffset {
  138. if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
  139. ml := e.matchlen(nextS+4, t2+4, src) + 4
  140. if ml > l {
  141. t = t2
  142. s = nextS
  143. l = ml
  144. break
  145. }
  146. }
  147. // If the previous long is a candidate, use that...
  148. t2 = lCandidate.Prev.offset - e.cur
  149. if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
  150. ml := e.matchlen(nextS+4, t2+4, src) + 4
  151. if ml > l {
  152. t = t2
  153. s = nextS
  154. l = ml
  155. break
  156. }
  157. }
  158. }
  159. break
  160. }
  161. cv = next
  162. }
  163. // A 4-byte match has been found. We'll later see if more than 4 bytes
  164. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  165. // them as literal bytes.
  166. // Extend the 4-byte match as long as possible.
  167. if l == 0 {
  168. l = e.matchlenLong(s+4, t+4, src) + 4
  169. } else if l == maxMatchLength {
  170. l += e.matchlenLong(s+l, t+l, src)
  171. }
  172. // Extend backwards
  173. for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
  174. s--
  175. t--
  176. l++
  177. }
  178. if nextEmit < s {
  179. emitLiteral(dst, src[nextEmit:s])
  180. }
  181. if debugDeflate {
  182. if t >= s {
  183. panic(fmt.Sprintln("s-t", s, t))
  184. }
  185. if (s - t) > maxMatchOffset {
  186. panic(fmt.Sprintln("mmo", s-t))
  187. }
  188. if l < baseMatchLength {
  189. panic("bml")
  190. }
  191. }
  192. dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
  193. s += l
  194. nextEmit = s
  195. if nextS >= s {
  196. s = nextS + 1
  197. }
  198. if s >= sLimit {
  199. goto emitRemainder
  200. }
  201. // Store every 3rd hash in-between.
  202. if true {
  203. const hashEvery = 3
  204. i := s - l + 1
  205. if i < s-1 {
  206. cv := load6432(src, i)
  207. t := tableEntry{offset: i + e.cur}
  208. e.table[hash4x64(cv, tableBits)] = t
  209. eLong := &e.bTable[hash7(cv, tableBits)]
  210. eLong.Cur, eLong.Prev = t, eLong.Cur
  211. // Do an long at i+1
  212. cv >>= 8
  213. t = tableEntry{offset: t.offset + 1}
  214. eLong = &e.bTable[hash7(cv, tableBits)]
  215. eLong.Cur, eLong.Prev = t, eLong.Cur
  216. // We only have enough bits for a short entry at i+2
  217. cv >>= 8
  218. t = tableEntry{offset: t.offset + 1}
  219. e.table[hash4x64(cv, tableBits)] = t
  220. // Skip one - otherwise we risk hitting 's'
  221. i += 4
  222. for ; i < s-1; i += hashEvery {
  223. cv := load6432(src, i)
  224. t := tableEntry{offset: i + e.cur}
  225. t2 := tableEntry{offset: t.offset + 1}
  226. eLong := &e.bTable[hash7(cv, tableBits)]
  227. eLong.Cur, eLong.Prev = t, eLong.Cur
  228. e.table[hash4u(uint32(cv>>8), tableBits)] = t2
  229. }
  230. }
  231. }
  232. // We could immediately start working at s now, but to improve
  233. // compression we first update the hash table at s-1 and at s.
  234. x := load6432(src, s-1)
  235. o := e.cur + s - 1
  236. prevHashS := hash4x64(x, tableBits)
  237. prevHashL := hash7(x, tableBits)
  238. e.table[prevHashS] = tableEntry{offset: o}
  239. eLong := &e.bTable[prevHashL]
  240. eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
  241. cv = x >> 8
  242. }
  243. emitRemainder:
  244. if int(nextEmit) < len(src) {
  245. // If nothing was added, don't encode literals.
  246. if dst.n == 0 {
  247. return
  248. }
  249. emitLiteral(dst, src[nextEmit:])
  250. }
  251. }