encode_better.go 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. // Copyright 2016 The Snappy-Go Authors. All rights reserved.
  2. // Copyright (c) 2019 Klaus Post. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package s2
  6. import (
  7. "math/bits"
  8. )
  9. // hash4 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
  10. // Preferably h should be a constant and should always be <32.
  11. func hash4(u uint64, h uint8) uint32 {
  12. const prime4bytes = 2654435761
  13. return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
  14. }
  15. // hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.
  16. // Preferably h should be a constant and should always be <64.
  17. func hash5(u uint64, h uint8) uint32 {
  18. const prime5bytes = 889523592379
  19. return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63))
  20. }
  21. // hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
  22. // Preferably h should be a constant and should always be <64.
  23. func hash7(u uint64, h uint8) uint32 {
  24. const prime7bytes = 58295818150454627
  25. return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
  26. }
  27. // hash8 returns the hash of u to fit in a hash table with h bits.
  28. // Preferably h should be a constant and should always be <64.
  29. func hash8(u uint64, h uint8) uint32 {
  30. const prime8bytes = 0xcf1bbcdcb7a56463
  31. return uint32((u * prime8bytes) >> ((64 - h) & 63))
  32. }
  33. // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
  34. // assumes that the varint-encoded length of the decompressed bytes has already
  35. // been written.
  36. //
  37. // It also assumes that:
  38. // len(dst) >= MaxEncodedLen(len(src)) &&
  39. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  40. func encodeBlockBetter(dst, src []byte) (d int) {
  41. // Initialize the hash tables.
  42. const (
  43. // Long hash matches.
  44. lTableBits = 16
  45. maxLTableSize = 1 << lTableBits
  46. // Short hash matches.
  47. sTableBits = 14
  48. maxSTableSize = 1 << sTableBits
  49. )
  50. var lTable [maxLTableSize]uint32
  51. var sTable [maxSTableSize]uint32
  52. // sLimit is when to stop looking for offset/length copies. The inputMargin
  53. // lets us use a fast path for emitLiteral in the main loop, while we are
  54. // looking for copies.
  55. sLimit := len(src) - inputMargin
  56. // Bail if we can't compress to at least this.
  57. dstLimit := len(src) - len(src)>>5 - 5
  58. // nextEmit is where in src the next emitLiteral should start from.
  59. nextEmit := 0
  60. // The encoded form must start with a literal, as there are no previous
  61. // bytes to copy, so we start looking for hash matches at s == 1.
  62. s := 1
  63. cv := load64(src, s)
  64. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  65. repeat := 1
  66. for {
  67. candidateL := 0
  68. for {
  69. // Next src position to check
  70. nextS := s + (s-nextEmit)>>7 + 1
  71. if nextS > sLimit {
  72. goto emitRemainder
  73. }
  74. hashL := hash7(cv, lTableBits)
  75. hashS := hash4(cv, sTableBits)
  76. candidateL = int(lTable[hashL])
  77. candidateS := int(sTable[hashS])
  78. lTable[hashL] = uint32(s)
  79. sTable[hashS] = uint32(s)
  80. // Check repeat at offset checkRep.
  81. const checkRep = 1
  82. if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  83. base := s + checkRep
  84. // Extend back
  85. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  86. i--
  87. base--
  88. }
  89. d += emitLiteral(dst[d:], src[nextEmit:base])
  90. // Extend forward
  91. candidate := s - repeat + 4 + checkRep
  92. s += 4 + checkRep
  93. for s <= sLimit {
  94. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  95. s += bits.TrailingZeros64(diff) >> 3
  96. break
  97. }
  98. s += 8
  99. candidate += 8
  100. }
  101. if nextEmit > 0 {
  102. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  103. d += emitRepeat(dst[d:], repeat, s-base)
  104. } else {
  105. // First match, cannot be repeat.
  106. d += emitCopy(dst[d:], repeat, s-base)
  107. }
  108. nextEmit = s
  109. if s >= sLimit {
  110. goto emitRemainder
  111. }
  112. cv = load64(src, s)
  113. continue
  114. }
  115. if uint32(cv) == load32(src, candidateL) {
  116. break
  117. }
  118. // Check our short candidate
  119. if uint32(cv) == load32(src, candidateS) {
  120. // Try a long candidate at s+1
  121. hashL = hash7(cv>>8, lTableBits)
  122. candidateL = int(lTable[hashL])
  123. lTable[hashL] = uint32(s + 1)
  124. if uint32(cv>>8) == load32(src, candidateL) {
  125. s++
  126. break
  127. }
  128. // Use our short candidate.
  129. candidateL = candidateS
  130. break
  131. }
  132. cv = load64(src, nextS)
  133. s = nextS
  134. }
  135. // Extend backwards
  136. for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
  137. candidateL--
  138. s--
  139. }
  140. // Bail if we exceed the maximum size.
  141. if d+(s-nextEmit) > dstLimit {
  142. return 0
  143. }
  144. base := s
  145. offset := base - candidateL
  146. // Extend the 4-byte match as long as possible.
  147. s += 4
  148. candidateL += 4
  149. for s <= len(src)-8 {
  150. if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
  151. s += bits.TrailingZeros64(diff) >> 3
  152. break
  153. }
  154. s += 8
  155. candidateL += 8
  156. }
  157. if offset > 65535 && s-base <= 5 {
  158. // Bail if the match is equal or worse to the encoding.
  159. s = base + 3
  160. cv = load64(src, s)
  161. continue
  162. }
  163. repeat = offset
  164. d += emitLiteral(dst[d:], src[nextEmit:base])
  165. d += emitCopy(dst[d:], offset, s-base)
  166. nextEmit = s
  167. if s >= sLimit {
  168. goto emitRemainder
  169. }
  170. if d > dstLimit {
  171. // Do we have space for more, if not bail.
  172. return 0
  173. }
  174. // Index match start+1 (long) and start+2 (short)
  175. index0 := base + 1
  176. // Index match end-2 (long) and end-1 (short)
  177. index1 := s - 2
  178. cv0 := load64(src, index0)
  179. cv1 := load64(src, index1)
  180. cv = load64(src, s)
  181. lTable[hash7(cv0, lTableBits)] = uint32(index0)
  182. lTable[hash7(cv1, lTableBits)] = uint32(index1)
  183. sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
  184. sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
  185. }
  186. emitRemainder:
  187. if nextEmit < len(src) {
  188. // Bail if we exceed the maximum size.
  189. if d+len(src)-nextEmit > dstLimit {
  190. return 0
  191. }
  192. d += emitLiteral(dst[d:], src[nextEmit:])
  193. }
  194. return d
  195. }