enc_better.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import "fmt"
  6. const (
  7. betterLongTableBits = 19 // Bits used in the long match table
  8. betterLongTableSize = 1 << betterLongTableBits // Size of the table
  9. // Note: Increasing the short table bits or making the hash shorter
  10. // can actually lead to compression degradation since it will 'steal' more from the
  11. // long match table and match offsets are quite big.
  12. // This greatly depends on the type of input.
  13. betterShortTableBits = 13 // Bits used in the short match table
  14. betterShortTableSize = 1 << betterShortTableBits // Size of the table
  15. )
  16. type prevEntry struct {
  17. offset int32
  18. prev int32
  19. }
  20. // betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
  21. // The long match table contains the previous entry with the same hash,
  22. // effectively making it a "chain" of length 2.
  23. // When we find a long match we choose between the two values and select the longest.
  24. // When we find a short match, after checking the long, we check if we can find a long at n+1
  25. // and that it is longer (lazy matching).
  26. type betterFastEncoder struct {
  27. fastBase
  28. table [betterShortTableSize]tableEntry
  29. longTable [betterLongTableSize]prevEntry
  30. }
  31. // Encode improves compression...
  32. func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
  33. const (
  34. // Input margin is the number of bytes we read (8)
  35. // and the maximum we will read ahead (2)
  36. inputMargin = 8 + 2
  37. minNonLiteralBlockSize = 16
  38. )
  39. // Protect against e.cur wraparound.
  40. for e.cur >= bufferReset {
  41. if len(e.hist) == 0 {
  42. for i := range e.table[:] {
  43. e.table[i] = tableEntry{}
  44. }
  45. for i := range e.longTable[:] {
  46. e.longTable[i] = prevEntry{}
  47. }
  48. e.cur = e.maxMatchOff
  49. break
  50. }
  51. // Shift down everything in the table that isn't already too far away.
  52. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  53. for i := range e.table[:] {
  54. v := e.table[i].offset
  55. if v < minOff {
  56. v = 0
  57. } else {
  58. v = v - e.cur + e.maxMatchOff
  59. }
  60. e.table[i].offset = v
  61. }
  62. for i := range e.longTable[:] {
  63. v := e.longTable[i].offset
  64. v2 := e.longTable[i].prev
  65. if v < minOff {
  66. v = 0
  67. v2 = 0
  68. } else {
  69. v = v - e.cur + e.maxMatchOff
  70. if v2 < minOff {
  71. v2 = 0
  72. } else {
  73. v2 = v2 - e.cur + e.maxMatchOff
  74. }
  75. }
  76. e.longTable[i] = prevEntry{
  77. offset: v,
  78. prev: v2,
  79. }
  80. }
  81. e.cur = e.maxMatchOff
  82. break
  83. }
  84. s := e.addBlock(src)
  85. blk.size = len(src)
  86. if len(src) < minNonLiteralBlockSize {
  87. blk.extraLits = len(src)
  88. blk.literals = blk.literals[:len(src)]
  89. copy(blk.literals, src)
  90. return
  91. }
  92. // Override src
  93. src = e.hist
  94. sLimit := int32(len(src)) - inputMargin
  95. // stepSize is the number of bytes to skip on every main loop iteration.
  96. // It should be >= 1.
  97. const stepSize = 1
  98. const kSearchStrength = 9
  99. // nextEmit is where in src the next emitLiteral should start from.
  100. nextEmit := s
  101. cv := load6432(src, s)
  102. // Relative offsets
  103. offset1 := int32(blk.recentOffsets[0])
  104. offset2 := int32(blk.recentOffsets[1])
  105. addLiterals := func(s *seq, until int32) {
  106. if until == nextEmit {
  107. return
  108. }
  109. blk.literals = append(blk.literals, src[nextEmit:until]...)
  110. s.litLen = uint32(until - nextEmit)
  111. }
  112. if debug {
  113. println("recent offsets:", blk.recentOffsets)
  114. }
  115. encodeLoop:
  116. for {
  117. var t int32
  118. // We allow the encoder to optionally turn off repeat offsets across blocks
  119. canRepeat := len(blk.sequences) > 2
  120. var matched int32
  121. for {
  122. if debugAsserts && canRepeat && offset1 == 0 {
  123. panic("offset0 was 0")
  124. }
  125. nextHashS := hash5(cv, betterShortTableBits)
  126. nextHashL := hash8(cv, betterLongTableBits)
  127. candidateL := e.longTable[nextHashL]
  128. candidateS := e.table[nextHashS]
  129. const repOff = 1
  130. repIndex := s - offset1 + repOff
  131. off := s + e.cur
  132. e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
  133. e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
  134. if canRepeat {
  135. if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
  136. // Consider history as well.
  137. var seq seq
  138. lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
  139. seq.matchLen = uint32(lenght - zstdMinMatch)
  140. // We might be able to match backwards.
  141. // Extend as long as we can.
  142. start := s + repOff
  143. // We end the search early, so we don't risk 0 literals
  144. // and have to do special offset treatment.
  145. startLimit := nextEmit + 1
  146. tMin := s - e.maxMatchOff
  147. if tMin < 0 {
  148. tMin = 0
  149. }
  150. for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
  151. repIndex--
  152. start--
  153. seq.matchLen++
  154. }
  155. addLiterals(&seq, start)
  156. // rep 0
  157. seq.offset = 1
  158. if debugSequences {
  159. println("repeat sequence", seq, "next s:", s)
  160. }
  161. blk.sequences = append(blk.sequences, seq)
  162. // Index match start+1 (long) -> s - 1
  163. index0 := s + repOff
  164. s += lenght + repOff
  165. nextEmit = s
  166. if s >= sLimit {
  167. if debug {
  168. println("repeat ended", s, lenght)
  169. }
  170. break encodeLoop
  171. }
  172. // Index skipped...
  173. for index0 < s-1 {
  174. cv0 := load6432(src, index0)
  175. cv1 := cv0 >> 8
  176. h0 := hash8(cv0, betterLongTableBits)
  177. off := index0 + e.cur
  178. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  179. e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
  180. index0 += 2
  181. }
  182. cv = load6432(src, s)
  183. continue
  184. }
  185. const repOff2 = 1
  186. // We deviate from the reference encoder and also check offset 2.
  187. // Still slower and not much better, so disabled.
  188. // repIndex = s - offset2 + repOff2
  189. if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
  190. // Consider history as well.
  191. var seq seq
  192. lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
  193. seq.matchLen = uint32(lenght - zstdMinMatch)
  194. // We might be able to match backwards.
  195. // Extend as long as we can.
  196. start := s + repOff2
  197. // We end the search early, so we don't risk 0 literals
  198. // and have to do special offset treatment.
  199. startLimit := nextEmit + 1
  200. tMin := s - e.maxMatchOff
  201. if tMin < 0 {
  202. tMin = 0
  203. }
  204. for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
  205. repIndex--
  206. start--
  207. seq.matchLen++
  208. }
  209. addLiterals(&seq, start)
  210. // rep 2
  211. seq.offset = 2
  212. if debugSequences {
  213. println("repeat sequence 2", seq, "next s:", s)
  214. }
  215. blk.sequences = append(blk.sequences, seq)
  216. index0 := s + repOff2
  217. s += lenght + repOff2
  218. nextEmit = s
  219. if s >= sLimit {
  220. if debug {
  221. println("repeat ended", s, lenght)
  222. }
  223. break encodeLoop
  224. }
  225. // Index skipped...
  226. for index0 < s-1 {
  227. cv0 := load6432(src, index0)
  228. cv1 := cv0 >> 8
  229. h0 := hash8(cv0, betterLongTableBits)
  230. off := index0 + e.cur
  231. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  232. e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
  233. index0 += 2
  234. }
  235. cv = load6432(src, s)
  236. // Swap offsets
  237. offset1, offset2 = offset2, offset1
  238. continue
  239. }
  240. }
  241. // Find the offsets of our two matches.
  242. coffsetL := candidateL.offset - e.cur
  243. coffsetLP := candidateL.prev - e.cur
  244. // Check if we have a long match.
  245. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  246. // Found a long match, at least 8 bytes.
  247. matched = e.matchlen(s+8, coffsetL+8, src) + 8
  248. t = coffsetL
  249. if debugAsserts && s <= t {
  250. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  251. }
  252. if debugAsserts && s-t > e.maxMatchOff {
  253. panic("s - t >e.maxMatchOff")
  254. }
  255. if debugMatches {
  256. println("long match")
  257. }
  258. if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
  259. // Found a long match, at least 8 bytes.
  260. prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
  261. if prevMatch > matched {
  262. matched = prevMatch
  263. t = coffsetLP
  264. }
  265. if debugAsserts && s <= t {
  266. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  267. }
  268. if debugAsserts && s-t > e.maxMatchOff {
  269. panic("s - t >e.maxMatchOff")
  270. }
  271. if debugMatches {
  272. println("long match")
  273. }
  274. }
  275. break
  276. }
  277. // Check if we have a long match on prev.
  278. if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
  279. // Found a long match, at least 8 bytes.
  280. matched = e.matchlen(s+8, coffsetLP+8, src) + 8
  281. t = coffsetLP
  282. if debugAsserts && s <= t {
  283. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  284. }
  285. if debugAsserts && s-t > e.maxMatchOff {
  286. panic("s - t >e.maxMatchOff")
  287. }
  288. if debugMatches {
  289. println("long match")
  290. }
  291. break
  292. }
  293. coffsetS := candidateS.offset - e.cur
  294. // Check if we have a short match.
  295. if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
  296. // found a regular match
  297. matched = e.matchlen(s+4, coffsetS+4, src) + 4
  298. // See if we can find a long match at s+1
  299. const checkAt = 1
  300. cv := load6432(src, s+checkAt)
  301. nextHashL = hash8(cv, betterLongTableBits)
  302. candidateL = e.longTable[nextHashL]
  303. coffsetL = candidateL.offset - e.cur
  304. // We can store it, since we have at least a 4 byte match.
  305. e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
  306. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  307. // Found a long match, at least 8 bytes.
  308. matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
  309. if matchedNext > matched {
  310. t = coffsetL
  311. s += checkAt
  312. matched = matchedNext
  313. if debugMatches {
  314. println("long match (after short)")
  315. }
  316. break
  317. }
  318. }
  319. // Check prev long...
  320. coffsetL = candidateL.prev - e.cur
  321. if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
  322. // Found a long match, at least 8 bytes.
  323. matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
  324. if matchedNext > matched {
  325. t = coffsetL
  326. s += checkAt
  327. matched = matchedNext
  328. if debugMatches {
  329. println("prev long match (after short)")
  330. }
  331. break
  332. }
  333. }
  334. t = coffsetS
  335. if debugAsserts && s <= t {
  336. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  337. }
  338. if debugAsserts && s-t > e.maxMatchOff {
  339. panic("s - t >e.maxMatchOff")
  340. }
  341. if debugAsserts && t < 0 {
  342. panic("t<0")
  343. }
  344. if debugMatches {
  345. println("short match")
  346. }
  347. break
  348. }
  349. // No match found, move forward in input.
  350. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  351. if s >= sLimit {
  352. break encodeLoop
  353. }
  354. cv = load6432(src, s)
  355. }
  356. // A 4-byte match has been found. Update recent offsets.
  357. // We'll later see if more than 4 bytes.
  358. offset2 = offset1
  359. offset1 = s - t
  360. if debugAsserts && s <= t {
  361. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  362. }
  363. if debugAsserts && canRepeat && int(offset1) > len(src) {
  364. panic("invalid offset")
  365. }
  366. // Extend the n-byte match as long as possible.
  367. l := matched
  368. // Extend backwards
  369. tMin := s - e.maxMatchOff
  370. if tMin < 0 {
  371. tMin = 0
  372. }
  373. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  374. s--
  375. t--
  376. l++
  377. }
  378. // Write our sequence
  379. var seq seq
  380. seq.litLen = uint32(s - nextEmit)
  381. seq.matchLen = uint32(l - zstdMinMatch)
  382. if seq.litLen > 0 {
  383. blk.literals = append(blk.literals, src[nextEmit:s]...)
  384. }
  385. seq.offset = uint32(s-t) + 3
  386. s += l
  387. if debugSequences {
  388. println("sequence", seq, "next s:", s)
  389. }
  390. blk.sequences = append(blk.sequences, seq)
  391. nextEmit = s
  392. if s >= sLimit {
  393. break encodeLoop
  394. }
  395. // Index match start+1 (long) -> s - 1
  396. index0 := s - l + 1
  397. for index0 < s-1 {
  398. cv0 := load6432(src, index0)
  399. cv1 := cv0 >> 8
  400. h0 := hash8(cv0, betterLongTableBits)
  401. off := index0 + e.cur
  402. e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
  403. e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
  404. index0 += 2
  405. }
  406. cv = load6432(src, s)
  407. if !canRepeat {
  408. continue
  409. }
  410. // Check offset 2
  411. for {
  412. o2 := s - offset2
  413. if load3232(src, o2) != uint32(cv) {
  414. // Do regular search
  415. break
  416. }
  417. // Store this, since we have it.
  418. nextHashS := hash5(cv, betterShortTableBits)
  419. nextHashL := hash8(cv, betterLongTableBits)
  420. // We have at least 4 byte match.
  421. // No need to check backwards. We come straight from a match
  422. l := 4 + e.matchlen(s+4, o2+4, src)
  423. e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
  424. e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  425. seq.matchLen = uint32(l) - zstdMinMatch
  426. seq.litLen = 0
  427. // Since litlen is always 0, this is offset 1.
  428. seq.offset = 1
  429. s += l
  430. nextEmit = s
  431. if debugSequences {
  432. println("sequence", seq, "next s:", s)
  433. }
  434. blk.sequences = append(blk.sequences, seq)
  435. // Swap offset 1 and 2.
  436. offset1, offset2 = offset2, offset1
  437. if s >= sLimit {
  438. // Finished
  439. break encodeLoop
  440. }
  441. cv = load6432(src, s)
  442. }
  443. }
  444. if int(nextEmit) < len(src) {
  445. blk.literals = append(blk.literals, src[nextEmit:]...)
  446. blk.extraLits = len(src) - int(nextEmit)
  447. }
  448. blk.recentOffsets[0] = uint32(offset1)
  449. blk.recentOffsets[1] = uint32(offset2)
  450. if debug {
  451. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  452. }
  453. }
  454. // EncodeNoHist will encode a block with no history and no following blocks.
  455. // Most notable difference is that src will not be copied for history and
  456. // we do not need to check for max match length.
  457. func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
  458. e.Encode(blk, src)
  459. }