block.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. package lz4
  2. import "errors"
  3. // block represents a frame data block.
  4. // Used when compressing or decompressing frame blocks concurrently.
  5. type block struct {
  6. compressed bool
  7. zdata []byte // compressed data
  8. data []byte // decompressed data
  9. offset int // offset within the data as with block dependency the 64Kb window is prepended to it
  10. checksum uint32 // compressed data checksum
  11. err error // error while [de]compressing
  12. }
  13. var (
  14. // ErrInvalidSource is returned by UncompressBlock when a compressed block is corrupted.
  15. ErrInvalidSource = errors.New("lz4: invalid source")
  16. // ErrShortBuffer is returned by UncompressBlock, CompressBlock or CompressBlockHC when
  17. // the supplied buffer for [de]compression is too small.
  18. ErrShortBuffer = errors.New("lz4: short buffer")
  19. )
  20. // CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
  21. func CompressBlockBound(n int) int {
  22. return n + n/255 + 16
  23. }
  24. // UncompressBlock decompresses the source buffer into the destination one,
  25. // starting at the di index and returning the decompressed size.
  26. //
  27. // The destination buffer must be sized appropriately.
  28. //
  29. // An error is returned if the source data is invalid or the destination buffer is too small.
  30. func UncompressBlock(src, dst []byte, di int) (int, error) {
  31. si, sn, di0 := 0, len(src), di
  32. if sn == 0 {
  33. return 0, nil
  34. }
  35. for {
  36. // literals and match lengths (token)
  37. lLen := int(src[si] >> 4)
  38. mLen := int(src[si] & 0xF)
  39. if si++; si == sn {
  40. return di, ErrInvalidSource
  41. }
  42. // literals
  43. if lLen > 0 {
  44. if lLen == 0xF {
  45. for src[si] == 0xFF {
  46. lLen += 0xFF
  47. if si++; si == sn {
  48. return di - di0, ErrInvalidSource
  49. }
  50. }
  51. lLen += int(src[si])
  52. if si++; si == sn {
  53. return di - di0, ErrInvalidSource
  54. }
  55. }
  56. if len(dst)-di < lLen || si+lLen > sn {
  57. return di - di0, ErrShortBuffer
  58. }
  59. di += copy(dst[di:], src[si:si+lLen])
  60. if si += lLen; si >= sn {
  61. return di - di0, nil
  62. }
  63. }
  64. if si += 2; si >= sn {
  65. return di, ErrInvalidSource
  66. }
  67. offset := int(src[si-2]) | int(src[si-1])<<8
  68. if di-offset < 0 || offset == 0 {
  69. return di - di0, ErrInvalidSource
  70. }
  71. // match
  72. if mLen == 0xF {
  73. for src[si] == 0xFF {
  74. mLen += 0xFF
  75. if si++; si == sn {
  76. return di - di0, ErrInvalidSource
  77. }
  78. }
  79. mLen += int(src[si])
  80. if si++; si == sn {
  81. return di - di0, ErrInvalidSource
  82. }
  83. }
  84. // minimum match length is 4
  85. mLen += 4
  86. if len(dst)-di <= mLen {
  87. return di - di0, ErrShortBuffer
  88. }
  89. // copy the match (NB. match is at least 4 bytes long)
  90. // NB. past di, copy() would write old bytes instead of
  91. // the ones we just copied, so split the work into the largest chunk.
  92. for ; mLen >= offset; mLen -= offset {
  93. di += copy(dst[di:], dst[di-offset:di])
  94. }
  95. di += copy(dst[di:], dst[di-offset:di-offset+mLen])
  96. }
  97. }
  98. // CompressBlock compresses the source buffer starting at soffet into the destination one.
  99. // This is the fast version of LZ4 compression and also the default one.
  100. //
  101. // The size of the compressed data is returned. If it is 0 and no error, then the data is incompressible.
  102. //
  103. // An error is returned if the destination buffer is too small.
  104. func CompressBlock(src, dst []byte, soffset int) (int, error) {
  105. sn, dn := len(src)-mfLimit, len(dst)
  106. if sn <= 0 || dn == 0 || soffset >= sn {
  107. return 0, nil
  108. }
  109. var si, di int
  110. // fast scan strategy:
  111. // we only need a hash table to store the last sequences (4 bytes)
  112. var hashTable [1 << hashLog]int
  113. var hashShift = uint((minMatch * 8) - hashLog)
  114. // Initialise the hash table with the first 64Kb of the input buffer
  115. // (used when compressing dependent blocks)
  116. for si < soffset {
  117. h := (uint32(src[si+3])<<24 | uint32(src[si+2])<<16 | uint32(src[si+1])<<8 | uint32(src[si])) * hasher >> hashShift
  118. si++
  119. hashTable[h] = si
  120. }
  121. anchor := si
  122. fma := 1<<skipStrength + 3
  123. for si < sn-minMatch {
  124. // hash the next 4 bytes (sequence)...
  125. h := (uint32(src[si+3])<<24 | uint32(src[si+2])<<16 | uint32(src[si+1])<<8 | uint32(src[si])) * hasher >> hashShift
  126. // -1 to separate existing entries from new ones
  127. ref := hashTable[h] - 1
  128. // ...and store the position of the hash in the hash table (+1 to compensate the -1 upon saving)
  129. hashTable[h] = si + 1
  130. // the sequence is new, out of bound (64kb) or not valid: try next sequence
  131. if ref < 0 ||
  132. (si-ref)>>winSizeLog > 0 ||
  133. src[ref] != src[si] ||
  134. src[ref+1] != src[si+1] ||
  135. src[ref+2] != src[si+2] ||
  136. src[ref+3] != src[si+3] {
  137. // variable step: improves performance on non-compressible data
  138. si += fma >> skipStrength
  139. fma++
  140. continue
  141. }
  142. // match found
  143. fma = 1<<skipStrength + 3
  144. lLen := si - anchor
  145. offset := si - ref
  146. // encode match length part 1
  147. si += minMatch
  148. mLen := si // match length has minMatch already
  149. for si <= sn && src[si] == src[si-offset] {
  150. si++
  151. }
  152. mLen = si - mLen
  153. if mLen < 0xF {
  154. dst[di] = byte(mLen)
  155. } else {
  156. dst[di] = 0xF
  157. }
  158. // encode literals length
  159. if lLen < 0xF {
  160. dst[di] |= byte(lLen << 4)
  161. } else {
  162. dst[di] |= 0xF0
  163. if di++; di == dn {
  164. return di, ErrShortBuffer
  165. }
  166. l := lLen - 0xF
  167. for ; l >= 0xFF; l -= 0xFF {
  168. dst[di] = 0xFF
  169. if di++; di == dn {
  170. return di, ErrShortBuffer
  171. }
  172. }
  173. dst[di] = byte(l)
  174. }
  175. if di++; di == dn {
  176. return di, ErrShortBuffer
  177. }
  178. // literals
  179. if di+lLen >= dn {
  180. return di, ErrShortBuffer
  181. }
  182. di += copy(dst[di:], src[anchor:anchor+lLen])
  183. anchor = si
  184. // encode offset
  185. if di += 2; di >= dn {
  186. return di, ErrShortBuffer
  187. }
  188. dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
  189. // encode match length part 2
  190. if mLen >= 0xF {
  191. for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
  192. dst[di] = 0xFF
  193. if di++; di == dn {
  194. return di, ErrShortBuffer
  195. }
  196. }
  197. dst[di] = byte(mLen)
  198. if di++; di == dn {
  199. return di, ErrShortBuffer
  200. }
  201. }
  202. }
  203. if anchor == 0 {
  204. // incompressible
  205. return 0, nil
  206. }
  207. // last literals
  208. lLen := len(src) - anchor
  209. if lLen < 0xF {
  210. dst[di] = byte(lLen << 4)
  211. } else {
  212. dst[di] = 0xF0
  213. if di++; di == dn {
  214. return di, ErrShortBuffer
  215. }
  216. lLen -= 0xF
  217. for ; lLen >= 0xFF; lLen -= 0xFF {
  218. dst[di] = 0xFF
  219. if di++; di == dn {
  220. return di, ErrShortBuffer
  221. }
  222. }
  223. dst[di] = byte(lLen)
  224. }
  225. if di++; di == dn {
  226. return di, ErrShortBuffer
  227. }
  228. // write literals
  229. src = src[anchor:]
  230. switch n := di + len(src); {
  231. case n > dn:
  232. return di, ErrShortBuffer
  233. case n >= sn:
  234. // incompressible
  235. return 0, nil
  236. }
  237. di += copy(dst[di:], src)
  238. return di, nil
  239. }
  240. // CompressBlockHC compresses the source buffer starting at soffet into the destination one.
  241. // CompressBlockHC compression ratio is better than CompressBlock but it is also slower.
  242. //
  243. // The size of the compressed data is returned. If it is 0 and no error, then the data is not compressible.
  244. //
  245. // An error is returned if the destination buffer is too small.
  246. func CompressBlockHC(src, dst []byte, soffset int) (int, error) {
  247. sn, dn := len(src)-mfLimit, len(dst)
  248. if sn <= 0 || dn == 0 || soffset >= sn {
  249. return 0, nil
  250. }
  251. var si, di int
  252. // Hash Chain strategy:
  253. // we need a hash table and a chain table
  254. // the chain table cannot contain more entries than the window size (64Kb entries)
  255. var hashTable [1 << hashLog]int
  256. var chainTable [winSize]int
  257. var hashShift = uint((minMatch * 8) - hashLog)
  258. // Initialise the hash table with the first 64Kb of the input buffer
  259. // (used when compressing dependent blocks)
  260. for si < soffset {
  261. h := (uint32(src[si+3])<<24 | uint32(src[si+2])<<16 | uint32(src[si+1])<<8 | uint32(src[si])) * hasher >> hashShift
  262. chainTable[si&winMask] = hashTable[h]
  263. si++
  264. hashTable[h] = si
  265. }
  266. anchor := si
  267. for si < sn-minMatch {
  268. // hash the next 4 bytes (sequence)...
  269. h := (uint32(src[si+3])<<24 | uint32(src[si+2])<<16 | uint32(src[si+1])<<8 | uint32(src[si])) * hasher >> hashShift
  270. // follow the chain until out of window and give the longest match
  271. mLen := 0
  272. offset := 0
  273. for next := hashTable[h] - 1; next > 0 && next > si-winSize; next = chainTable[next&winMask] - 1 {
  274. // the first (mLen==0) or next byte (mLen>=minMatch) at current match length must match to improve on the match length
  275. if src[next+mLen] == src[si+mLen] {
  276. for ml := 0; ; ml++ {
  277. if src[next+ml] != src[si+ml] || si+ml > sn {
  278. // found a longer match, keep its position and length
  279. if mLen < ml && ml >= minMatch {
  280. mLen = ml
  281. offset = si - next
  282. }
  283. break
  284. }
  285. }
  286. }
  287. }
  288. chainTable[si&winMask] = hashTable[h]
  289. hashTable[h] = si + 1
  290. // no match found
  291. if mLen == 0 {
  292. si++
  293. continue
  294. }
  295. // match found
  296. // update hash/chain tables with overlaping bytes:
  297. // si already hashed, add everything from si+1 up to the match length
  298. for si, ml := si+1, si+mLen; si < ml; {
  299. h := (uint32(src[si+3])<<24 | uint32(src[si+2])<<16 | uint32(src[si+1])<<8 | uint32(src[si])) * hasher >> hashShift
  300. chainTable[si&winMask] = hashTable[h]
  301. si++
  302. hashTable[h] = si
  303. }
  304. lLen := si - anchor
  305. si += mLen
  306. mLen -= minMatch // match length does not include minMatch
  307. if mLen < 0xF {
  308. dst[di] = byte(mLen)
  309. } else {
  310. dst[di] = 0xF
  311. }
  312. // encode literals length
  313. if lLen < 0xF {
  314. dst[di] |= byte(lLen << 4)
  315. } else {
  316. dst[di] |= 0xF0
  317. if di++; di == dn {
  318. return di, ErrShortBuffer
  319. }
  320. l := lLen - 0xF
  321. for ; l >= 0xFF; l -= 0xFF {
  322. dst[di] = 0xFF
  323. if di++; di == dn {
  324. return di, ErrShortBuffer
  325. }
  326. }
  327. dst[di] = byte(l)
  328. }
  329. if di++; di == dn {
  330. return di, ErrShortBuffer
  331. }
  332. // literals
  333. if di+lLen >= dn {
  334. return di, ErrShortBuffer
  335. }
  336. di += copy(dst[di:], src[anchor:anchor+lLen])
  337. anchor = si
  338. // encode offset
  339. if di += 2; di >= dn {
  340. return di, ErrShortBuffer
  341. }
  342. dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
  343. // encode match length part 2
  344. if mLen >= 0xF {
  345. for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
  346. dst[di] = 0xFF
  347. if di++; di == dn {
  348. return di, ErrShortBuffer
  349. }
  350. }
  351. dst[di] = byte(mLen)
  352. if di++; di == dn {
  353. return di, ErrShortBuffer
  354. }
  355. }
  356. }
  357. if anchor == 0 {
  358. // incompressible
  359. return 0, nil
  360. }
  361. // last literals
  362. lLen := len(src) - anchor
  363. if lLen < 0xF {
  364. dst[di] = byte(lLen << 4)
  365. } else {
  366. dst[di] = 0xF0
  367. if di++; di == dn {
  368. return di, ErrShortBuffer
  369. }
  370. lLen -= 0xF
  371. for ; lLen >= 0xFF; lLen -= 0xFF {
  372. dst[di] = 0xFF
  373. if di++; di == dn {
  374. return di, ErrShortBuffer
  375. }
  376. }
  377. dst[di] = byte(lLen)
  378. }
  379. if di++; di == dn {
  380. return di, ErrShortBuffer
  381. }
  382. // write literals
  383. src = src[anchor:]
  384. switch n := di + len(src); {
  385. case n > dn:
  386. return di, ErrShortBuffer
  387. case n >= sn:
  388. // incompressible
  389. return 0, nil
  390. }
  391. di += copy(dst[di:], src)
  392. return di, nil
  393. }