level6.go 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. package flate
  2. import "fmt"
  3. type fastEncL6 struct {
  4. fastGen
  5. table [tableSize]tableEntry
  6. bTable [tableSize]tableEntryPrev
  7. }
  8. func (e *fastEncL6) Encode(dst *tokens, src []byte) {
  9. const (
  10. inputMargin = 12 - 1
  11. minNonLiteralBlockSize = 1 + 1 + inputMargin
  12. )
  13. if debugDeflate && e.cur < 0 {
  14. panic(fmt.Sprint("e.cur < 0: ", e.cur))
  15. }
  16. // Protect against e.cur wraparound.
  17. for e.cur >= bufferReset {
  18. if len(e.hist) == 0 {
  19. for i := range e.table[:] {
  20. e.table[i] = tableEntry{}
  21. }
  22. for i := range e.bTable[:] {
  23. e.bTable[i] = tableEntryPrev{}
  24. }
  25. e.cur = maxMatchOffset
  26. break
  27. }
  28. // Shift down everything in the table that isn't already too far away.
  29. minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
  30. for i := range e.table[:] {
  31. v := e.table[i].offset
  32. if v <= minOff {
  33. v = 0
  34. } else {
  35. v = v - e.cur + maxMatchOffset
  36. }
  37. e.table[i].offset = v
  38. }
  39. for i := range e.bTable[:] {
  40. v := e.bTable[i]
  41. if v.Cur.offset <= minOff {
  42. v.Cur.offset = 0
  43. v.Prev.offset = 0
  44. } else {
  45. v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
  46. if v.Prev.offset <= minOff {
  47. v.Prev.offset = 0
  48. } else {
  49. v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
  50. }
  51. }
  52. e.bTable[i] = v
  53. }
  54. e.cur = maxMatchOffset
  55. }
  56. s := e.addBlock(src)
  57. // This check isn't in the Snappy implementation, but there, the caller
  58. // instead of the callee handles this case.
  59. if len(src) < minNonLiteralBlockSize {
  60. // We do not fill the token table.
  61. // This will be picked up by caller.
  62. dst.n = uint16(len(src))
  63. return
  64. }
  65. // Override src
  66. src = e.hist
  67. nextEmit := s
  68. // sLimit is when to stop looking for offset/length copies. The inputMargin
  69. // lets us use a fast path for emitLiteral in the main loop, while we are
  70. // looking for copies.
  71. sLimit := int32(len(src) - inputMargin)
  72. // nextEmit is where in src the next emitLiteral should start from.
  73. cv := load6432(src, s)
  74. // Repeat MUST be > 1 and within range
  75. repeat := int32(1)
  76. for {
  77. const skipLog = 7
  78. const doEvery = 1
  79. nextS := s
  80. var l int32
  81. var t int32
  82. for {
  83. nextHashS := hash4x64(cv, tableBits)
  84. nextHashL := hash7(cv, tableBits)
  85. s = nextS
  86. nextS = s + doEvery + (s-nextEmit)>>skipLog
  87. if nextS > sLimit {
  88. goto emitRemainder
  89. }
  90. // Fetch a short+long candidate
  91. sCandidate := e.table[nextHashS]
  92. lCandidate := e.bTable[nextHashL]
  93. next := load6432(src, nextS)
  94. entry := tableEntry{offset: s + e.cur}
  95. e.table[nextHashS] = entry
  96. eLong := &e.bTable[nextHashL]
  97. eLong.Cur, eLong.Prev = entry, eLong.Cur
  98. // Calculate hashes of 'next'
  99. nextHashS = hash4x64(next, tableBits)
  100. nextHashL = hash7(next, tableBits)
  101. t = lCandidate.Cur.offset - e.cur
  102. if s-t < maxMatchOffset {
  103. if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) {
  104. // Long candidate matches at least 4 bytes.
  105. // Store the next match
  106. e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
  107. eLong := &e.bTable[nextHashL]
  108. eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
  109. // Check the previous long candidate as well.
  110. t2 := lCandidate.Prev.offset - e.cur
  111. if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
  112. l = e.matchlen(s+4, t+4, src) + 4
  113. ml1 := e.matchlen(s+4, t2+4, src) + 4
  114. if ml1 > l {
  115. t = t2
  116. l = ml1
  117. break
  118. }
  119. }
  120. break
  121. }
  122. // Current value did not match, but check if previous long value does.
  123. t = lCandidate.Prev.offset - e.cur
  124. if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) {
  125. // Store the next match
  126. e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
  127. eLong := &e.bTable[nextHashL]
  128. eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
  129. break
  130. }
  131. }
  132. t = sCandidate.offset - e.cur
  133. if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) {
  134. // Found a 4 match...
  135. l = e.matchlen(s+4, t+4, src) + 4
  136. // Look up next long candidate (at nextS)
  137. lCandidate = e.bTable[nextHashL]
  138. // Store the next match
  139. e.table[nextHashS] = tableEntry{offset: nextS + e.cur}
  140. eLong := &e.bTable[nextHashL]
  141. eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur
  142. // Check repeat at s + repOff
  143. const repOff = 1
  144. t2 := s - repeat + repOff
  145. if load3232(src, t2) == uint32(cv>>(8*repOff)) {
  146. ml := e.matchlen(s+4+repOff, t2+4, src) + 4
  147. if ml > l {
  148. t = t2
  149. l = ml
  150. s += repOff
  151. // Not worth checking more.
  152. break
  153. }
  154. }
  155. // If the next long is a candidate, use that...
  156. t2 = lCandidate.Cur.offset - e.cur
  157. if nextS-t2 < maxMatchOffset {
  158. if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) {
  159. ml := e.matchlen(nextS+4, t2+4, src) + 4
  160. if ml > l {
  161. t = t2
  162. s = nextS
  163. l = ml
  164. // This is ok, but check previous as well.
  165. }
  166. }
  167. // If the previous long is a candidate, use that...
  168. t2 = lCandidate.Prev.offset - e.cur
  169. if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) {
  170. ml := e.matchlen(nextS+4, t2+4, src) + 4
  171. if ml > l {
  172. t = t2
  173. s = nextS
  174. l = ml
  175. break
  176. }
  177. }
  178. }
  179. break
  180. }
  181. cv = next
  182. }
  183. // A 4-byte match has been found. We'll later see if more than 4 bytes
  184. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  185. // them as literal bytes.
  186. // Extend the 4-byte match as long as possible.
  187. if l == 0 {
  188. l = e.matchlenLong(s+4, t+4, src) + 4
  189. } else if l == maxMatchLength {
  190. l += e.matchlenLong(s+l, t+l, src)
  191. }
  192. // Extend backwards
  193. for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
  194. s--
  195. t--
  196. l++
  197. }
  198. if nextEmit < s {
  199. emitLiteral(dst, src[nextEmit:s])
  200. }
  201. if false {
  202. if t >= s {
  203. panic(fmt.Sprintln("s-t", s, t))
  204. }
  205. if (s - t) > maxMatchOffset {
  206. panic(fmt.Sprintln("mmo", s-t))
  207. }
  208. if l < baseMatchLength {
  209. panic("bml")
  210. }
  211. }
  212. dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
  213. repeat = s - t
  214. s += l
  215. nextEmit = s
  216. if nextS >= s {
  217. s = nextS + 1
  218. }
  219. if s >= sLimit {
  220. // Index after match end.
  221. for i := nextS + 1; i < int32(len(src))-8; i += 2 {
  222. cv := load6432(src, i)
  223. e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur}
  224. eLong := &e.bTable[hash7(cv, tableBits)]
  225. eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur
  226. }
  227. goto emitRemainder
  228. }
  229. // Store every long hash in-between and every second short.
  230. if true {
  231. for i := nextS + 1; i < s-1; i += 2 {
  232. cv := load6432(src, i)
  233. t := tableEntry{offset: i + e.cur}
  234. t2 := tableEntry{offset: t.offset + 1}
  235. eLong := &e.bTable[hash7(cv, tableBits)]
  236. eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
  237. e.table[hash4x64(cv, tableBits)] = t
  238. eLong.Cur, eLong.Prev = t, eLong.Cur
  239. eLong2.Cur, eLong2.Prev = t2, eLong2.Cur
  240. }
  241. }
  242. // We could immediately start working at s now, but to improve
  243. // compression we first update the hash table at s-1 and at s.
  244. cv = load6432(src, s)
  245. }
  246. emitRemainder:
  247. if int(nextEmit) < len(src) {
  248. // If nothing was added, don't encode literals.
  249. if dst.n == 0 {
  250. return
  251. }
  252. emitLiteral(dst, src[nextEmit:])
  253. }
  254. }