encode_go.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. // +build !amd64 appengine !gc noasm
  2. package s2
  3. import (
  4. "bytes"
  5. "math/bits"
  6. )
  7. // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
  8. // assumes that the varint-encoded length of the decompressed bytes has already
  9. // been written.
  10. //
  11. // It also assumes that:
  12. // len(dst) >= MaxEncodedLen(len(src))
  13. func encodeBlock(dst, src []byte) (d int) {
  14. if len(src) < minNonLiteralBlockSize {
  15. return 0
  16. }
  17. return encodeBlockGo(dst, src)
  18. }
  19. // emitLiteral writes a literal chunk and returns the number of bytes written.
  20. //
  21. // It assumes that:
  22. // dst is long enough to hold the encoded bytes
  23. // 0 <= len(lit) && len(lit) <= math.MaxUint32
  24. func emitLiteral(dst, lit []byte) int {
  25. if len(lit) == 0 {
  26. return 0
  27. }
  28. const num = 63<<2 | tagLiteral
  29. i, n := 0, uint(len(lit)-1)
  30. switch {
  31. case n < 60:
  32. dst[0] = uint8(n)<<2 | tagLiteral
  33. i = 1
  34. case n < 1<<8:
  35. dst[1] = uint8(n)
  36. dst[0] = 60<<2 | tagLiteral
  37. i = 2
  38. case n < 1<<16:
  39. dst[2] = uint8(n >> 8)
  40. dst[1] = uint8(n)
  41. dst[0] = 61<<2 | tagLiteral
  42. i = 3
  43. case n < 1<<24:
  44. dst[3] = uint8(n >> 16)
  45. dst[2] = uint8(n >> 8)
  46. dst[1] = uint8(n)
  47. dst[0] = 62<<2 | tagLiteral
  48. i = 4
  49. default:
  50. dst[4] = uint8(n >> 24)
  51. dst[3] = uint8(n >> 16)
  52. dst[2] = uint8(n >> 8)
  53. dst[1] = uint8(n)
  54. dst[0] = 63<<2 | tagLiteral
  55. i = 5
  56. }
  57. return i + copy(dst[i:], lit)
  58. }
  59. // emitRepeat writes a repeat chunk and returns the number of bytes written.
  60. // Length must be at least 4 and < 1<<24
  61. func emitRepeat(dst []byte, offset, length int) int {
  62. // Repeat offset, make length cheaper
  63. length -= 4
  64. if length <= 4 {
  65. dst[0] = uint8(length)<<2 | tagCopy1
  66. dst[1] = 0
  67. return 2
  68. }
  69. if length < 8 && offset < 2048 {
  70. // Encode WITH offset
  71. dst[1] = uint8(offset)
  72. dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
  73. return 2
  74. }
  75. if length < (1<<8)+4 {
  76. length -= 4
  77. dst[2] = uint8(length)
  78. dst[1] = 0
  79. dst[0] = 5<<2 | tagCopy1
  80. return 3
  81. }
  82. if length < (1<<16)+(1<<8) {
  83. length -= 1 << 8
  84. dst[3] = uint8(length >> 8)
  85. dst[2] = uint8(length >> 0)
  86. dst[1] = 0
  87. dst[0] = 6<<2 | tagCopy1
  88. return 4
  89. }
  90. const maxRepeat = (1 << 24) - 1
  91. length -= 1 << 16
  92. left := 0
  93. if length > maxRepeat {
  94. left = length - maxRepeat + 4
  95. length = maxRepeat - 4
  96. }
  97. dst[4] = uint8(length >> 16)
  98. dst[3] = uint8(length >> 8)
  99. dst[2] = uint8(length >> 0)
  100. dst[1] = 0
  101. dst[0] = 7<<2 | tagCopy1
  102. if left > 0 {
  103. return 5 + emitRepeat(dst[5:], offset, left)
  104. }
  105. return 5
  106. }
  107. // emitCopy writes a copy chunk and returns the number of bytes written.
  108. //
  109. // It assumes that:
  110. // dst is long enough to hold the encoded bytes
  111. // 1 <= offset && offset <= math.MaxUint32
  112. // 4 <= length && length <= 1 << 24
  113. func emitCopy(dst []byte, offset, length int) int {
  114. if offset >= 65536 {
  115. i := 0
  116. if length > 64 {
  117. // Emit a length 64 copy, encoded as 5 bytes.
  118. dst[4] = uint8(offset >> 24)
  119. dst[3] = uint8(offset >> 16)
  120. dst[2] = uint8(offset >> 8)
  121. dst[1] = uint8(offset)
  122. dst[0] = 63<<2 | tagCopy4
  123. length -= 64
  124. if length >= 4 {
  125. // Emit remaining as repeats
  126. return 5 + emitRepeat(dst[5:], offset, length)
  127. }
  128. i = 5
  129. }
  130. if length == 0 {
  131. return i
  132. }
  133. // Emit a copy, offset encoded as 4 bytes.
  134. dst[i+0] = uint8(length-1)<<2 | tagCopy4
  135. dst[i+1] = uint8(offset)
  136. dst[i+2] = uint8(offset >> 8)
  137. dst[i+3] = uint8(offset >> 16)
  138. dst[i+4] = uint8(offset >> 24)
  139. return i + 5
  140. }
  141. // Offset no more than 2 bytes.
  142. if length > 64 {
  143. // Emit a length 60 copy, encoded as 3 bytes.
  144. // Emit remaining as repeat value (minimum 4 bytes).
  145. dst[2] = uint8(offset >> 8)
  146. dst[1] = uint8(offset)
  147. dst[0] = 59<<2 | tagCopy2
  148. length -= 60
  149. // Emit remaining as repeats, at least 4 bytes remain.
  150. return 3 + emitRepeat(dst[3:], offset, length)
  151. }
  152. if length >= 12 || offset >= 2048 {
  153. // Emit the remaining copy, encoded as 3 bytes.
  154. dst[2] = uint8(offset >> 8)
  155. dst[1] = uint8(offset)
  156. dst[0] = uint8(length-1)<<2 | tagCopy2
  157. return 3
  158. }
  159. // Emit the remaining copy, encoded as 2 bytes.
  160. dst[1] = uint8(offset)
  161. dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
  162. return 2
  163. }
  164. // emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
  165. //
  166. // It assumes that:
  167. // dst is long enough to hold the encoded bytes
  168. // 1 <= offset && offset <= math.MaxUint32
  169. // 4 <= length && length <= 1 << 24
  170. func emitCopyNoRepeat(dst []byte, offset, length int) int {
  171. if offset >= 65536 {
  172. i := 0
  173. if length > 64 {
  174. // Emit a length 64 copy, encoded as 5 bytes.
  175. dst[4] = uint8(offset >> 24)
  176. dst[3] = uint8(offset >> 16)
  177. dst[2] = uint8(offset >> 8)
  178. dst[1] = uint8(offset)
  179. dst[0] = 63<<2 | tagCopy4
  180. length -= 64
  181. if length >= 4 {
  182. // Emit remaining as repeats
  183. return 5 + emitCopyNoRepeat(dst[5:], offset, length)
  184. }
  185. i = 5
  186. }
  187. if length == 0 {
  188. return i
  189. }
  190. // Emit a copy, offset encoded as 4 bytes.
  191. dst[i+0] = uint8(length-1)<<2 | tagCopy4
  192. dst[i+1] = uint8(offset)
  193. dst[i+2] = uint8(offset >> 8)
  194. dst[i+3] = uint8(offset >> 16)
  195. dst[i+4] = uint8(offset >> 24)
  196. return i + 5
  197. }
  198. // Offset no more than 2 bytes.
  199. if length > 64 {
  200. // Emit a length 60 copy, encoded as 3 bytes.
  201. // Emit remaining as repeat value (minimum 4 bytes).
  202. dst[2] = uint8(offset >> 8)
  203. dst[1] = uint8(offset)
  204. dst[0] = 59<<2 | tagCopy2
  205. length -= 60
  206. // Emit remaining as repeats, at least 4 bytes remain.
  207. return 3 + emitCopyNoRepeat(dst[3:], offset, length)
  208. }
  209. if length >= 12 || offset >= 2048 {
  210. // Emit the remaining copy, encoded as 3 bytes.
  211. dst[2] = uint8(offset >> 8)
  212. dst[1] = uint8(offset)
  213. dst[0] = uint8(length-1)<<2 | tagCopy2
  214. return 3
  215. }
  216. // Emit the remaining copy, encoded as 2 bytes.
  217. dst[1] = uint8(offset)
  218. dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
  219. return 2
  220. }
  221. // matchLen returns how many bytes match in a and b
  222. //
  223. // It assumes that:
  224. // len(a) <= len(b)
  225. //
  226. func matchLen(a []byte, b []byte) int {
  227. b = b[:len(a)]
  228. var checked int
  229. if len(a) > 4 {
  230. // Try 4 bytes first
  231. if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
  232. return bits.TrailingZeros32(diff) >> 3
  233. }
  234. // Switch to 8 byte matching.
  235. checked = 4
  236. a = a[4:]
  237. b = b[4:]
  238. for len(a) >= 8 {
  239. b = b[:len(a)]
  240. if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
  241. return checked + (bits.TrailingZeros64(diff) >> 3)
  242. }
  243. checked += 8
  244. a = a[8:]
  245. b = b[8:]
  246. }
  247. }
  248. b = b[:len(a)]
  249. for i := range a {
  250. if a[i] != b[i] {
  251. return int(i) + checked
  252. }
  253. }
  254. return len(a) + checked
  255. }
  256. func encodeBlockSnappy(dst, src []byte) (d int) {
  257. // Initialize the hash table.
  258. const (
  259. tableBits = 14
  260. maxTableSize = 1 << tableBits
  261. )
  262. var table [maxTableSize]uint32
  263. // sLimit is when to stop looking for offset/length copies. The inputMargin
  264. // lets us use a fast path for emitLiteral in the main loop, while we are
  265. // looking for copies.
  266. sLimit := len(src) - inputMargin
  267. // Bail if we can't compress to at least this.
  268. dstLimit := len(src) - len(src)>>5 - 5
  269. // nextEmit is where in src the next emitLiteral should start from.
  270. nextEmit := 0
  271. // The encoded form must start with a literal, as there are no previous
  272. // bytes to copy, so we start looking for hash matches at s == 1.
  273. s := 1
  274. cv := load64(src, s)
  275. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  276. repeat := 1
  277. for {
  278. candidate := 0
  279. for {
  280. // Next src position to check
  281. nextS := s + (s-nextEmit)>>6 + 4
  282. if nextS > sLimit {
  283. goto emitRemainder
  284. }
  285. hash0 := hash6(cv, tableBits)
  286. hash1 := hash6(cv>>8, tableBits)
  287. candidate = int(table[hash0])
  288. candidate2 := int(table[hash1])
  289. table[hash0] = uint32(s)
  290. table[hash1] = uint32(s + 1)
  291. hash2 := hash6(cv>>16, tableBits)
  292. // Check repeat at offset checkRep.
  293. const checkRep = 1
  294. if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  295. base := s + checkRep
  296. // Extend back
  297. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  298. i--
  299. base--
  300. }
  301. d += emitLiteral(dst[d:], src[nextEmit:base])
  302. // Extend forward
  303. candidate := s - repeat + 4 + checkRep
  304. s += 4 + checkRep
  305. for s <= sLimit {
  306. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  307. s += bits.TrailingZeros64(diff) >> 3
  308. break
  309. }
  310. s += 8
  311. candidate += 8
  312. }
  313. d += emitCopyNoRepeat(dst[d:], repeat, s-base)
  314. nextEmit = s
  315. if s >= sLimit {
  316. goto emitRemainder
  317. }
  318. cv = load64(src, s)
  319. continue
  320. }
  321. if uint32(cv) == load32(src, candidate) {
  322. break
  323. }
  324. candidate = int(table[hash2])
  325. if uint32(cv>>8) == load32(src, candidate2) {
  326. table[hash2] = uint32(s + 2)
  327. candidate = candidate2
  328. s++
  329. break
  330. }
  331. table[hash2] = uint32(s + 2)
  332. if uint32(cv>>16) == load32(src, candidate) {
  333. s += 2
  334. break
  335. }
  336. cv = load64(src, nextS)
  337. s = nextS
  338. }
  339. // Extend backwards
  340. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  341. candidate--
  342. s--
  343. }
  344. // Bail if we exceed the maximum size.
  345. if d+(s-nextEmit) > dstLimit {
  346. return 0
  347. }
  348. // A 4-byte match has been found. We'll later see if more than 4 bytes
  349. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  350. // them as literal bytes.
  351. d += emitLiteral(dst[d:], src[nextEmit:s])
  352. // Call emitCopy, and then see if another emitCopy could be our next
  353. // move. Repeat until we find no match for the input immediately after
  354. // what was consumed by the last emitCopy call.
  355. //
  356. // If we exit this loop normally then we need to call emitLiteral next,
  357. // though we don't yet know how big the literal will be. We handle that
  358. // by proceeding to the next iteration of the main loop. We also can
  359. // exit this loop via goto if we get close to exhausting the input.
  360. for {
  361. // Invariant: we have a 4-byte match at s, and no need to emit any
  362. // literal bytes prior to s.
  363. base := s
  364. repeat = base - candidate
  365. // Extend the 4-byte match as long as possible.
  366. s += 4
  367. candidate += 4
  368. for s <= len(src)-8 {
  369. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  370. s += bits.TrailingZeros64(diff) >> 3
  371. break
  372. }
  373. s += 8
  374. candidate += 8
  375. }
  376. d += emitCopyNoRepeat(dst[d:], repeat, s-base)
  377. if false {
  378. // Validate match.
  379. a := src[base:s]
  380. b := src[base-repeat : base-repeat+(s-base)]
  381. if !bytes.Equal(a, b) {
  382. panic("mismatch")
  383. }
  384. }
  385. nextEmit = s
  386. if s >= sLimit {
  387. goto emitRemainder
  388. }
  389. if d > dstLimit {
  390. // Do we have space for more, if not bail.
  391. return 0
  392. }
  393. // Check for an immediate match, otherwise start search at s+1
  394. x := load64(src, s-2)
  395. m2Hash := hash6(x, tableBits)
  396. currHash := hash6(x>>16, tableBits)
  397. candidate = int(table[currHash])
  398. table[m2Hash] = uint32(s - 2)
  399. table[currHash] = uint32(s)
  400. if uint32(x>>16) != load32(src, candidate) {
  401. cv = load64(src, s+1)
  402. s++
  403. break
  404. }
  405. }
  406. }
  407. emitRemainder:
  408. if nextEmit < len(src) {
  409. // Bail if we exceed the maximum size.
  410. if d+len(src)-nextEmit > dstLimit {
  411. return 0
  412. }
  413. d += emitLiteral(dst[d:], src[nextEmit:])
  414. }
  415. return d
  416. }