xxhash.go 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. // Package xxhash implements the 64-bit variant of xxHash (XXH64) as described
  2. // at http://cyan4973.github.io/xxHash/.
  3. // THIS IS VENDORED: Go to github.com/cespare/xxhash for original package.
  4. package xxhash
  5. import (
  6. "encoding/binary"
  7. "errors"
  8. "math/bits"
  9. )
  10. const (
  11. prime1 uint64 = 11400714785074694791
  12. prime2 uint64 = 14029467366897019727
  13. prime3 uint64 = 1609587929392839161
  14. prime4 uint64 = 9650029242287828579
  15. prime5 uint64 = 2870177450012600261
  16. )
  17. // NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
  18. // possible in the Go code is worth a small (but measurable) performance boost
  19. // by avoiding some MOVQs. Vars are needed for the asm and also are useful for
  20. // convenience in the Go code in a few places where we need to intentionally
  21. // avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
  22. // result overflows a uint64).
  23. var (
  24. prime1v = prime1
  25. prime2v = prime2
  26. prime3v = prime3
  27. prime4v = prime4
  28. prime5v = prime5
  29. )
  30. // Digest implements hash.Hash64.
  31. type Digest struct {
  32. v1 uint64
  33. v2 uint64
  34. v3 uint64
  35. v4 uint64
  36. total uint64
  37. mem [32]byte
  38. n int // how much of mem is used
  39. }
  40. // New creates a new Digest that computes the 64-bit xxHash algorithm.
  41. func New() *Digest {
  42. var d Digest
  43. d.Reset()
  44. return &d
  45. }
  46. // Reset clears the Digest's state so that it can be reused.
  47. func (d *Digest) Reset() {
  48. d.v1 = prime1v + prime2
  49. d.v2 = prime2
  50. d.v3 = 0
  51. d.v4 = -prime1v
  52. d.total = 0
  53. d.n = 0
  54. }
  55. // Size always returns 8 bytes.
  56. func (d *Digest) Size() int { return 8 }
  57. // BlockSize always returns 32 bytes.
  58. func (d *Digest) BlockSize() int { return 32 }
  59. // Write adds more data to d. It always returns len(b), nil.
  60. func (d *Digest) Write(b []byte) (n int, err error) {
  61. n = len(b)
  62. d.total += uint64(n)
  63. if d.n+n < 32 {
  64. // This new data doesn't even fill the current block.
  65. copy(d.mem[d.n:], b)
  66. d.n += n
  67. return
  68. }
  69. if d.n > 0 {
  70. // Finish off the partial block.
  71. copy(d.mem[d.n:], b)
  72. d.v1 = round(d.v1, u64(d.mem[0:8]))
  73. d.v2 = round(d.v2, u64(d.mem[8:16]))
  74. d.v3 = round(d.v3, u64(d.mem[16:24]))
  75. d.v4 = round(d.v4, u64(d.mem[24:32]))
  76. b = b[32-d.n:]
  77. d.n = 0
  78. }
  79. if len(b) >= 32 {
  80. // One or more full blocks left.
  81. nw := writeBlocks(d, b)
  82. b = b[nw:]
  83. }
  84. // Store any remaining partial block.
  85. copy(d.mem[:], b)
  86. d.n = len(b)
  87. return
  88. }
  89. // Sum appends the current hash to b and returns the resulting slice.
  90. func (d *Digest) Sum(b []byte) []byte {
  91. s := d.Sum64()
  92. return append(
  93. b,
  94. byte(s>>56),
  95. byte(s>>48),
  96. byte(s>>40),
  97. byte(s>>32),
  98. byte(s>>24),
  99. byte(s>>16),
  100. byte(s>>8),
  101. byte(s),
  102. )
  103. }
  104. // Sum64 returns the current hash.
  105. func (d *Digest) Sum64() uint64 {
  106. var h uint64
  107. if d.total >= 32 {
  108. v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4
  109. h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
  110. h = mergeRound(h, v1)
  111. h = mergeRound(h, v2)
  112. h = mergeRound(h, v3)
  113. h = mergeRound(h, v4)
  114. } else {
  115. h = d.v3 + prime5
  116. }
  117. h += d.total
  118. i, end := 0, d.n
  119. for ; i+8 <= end; i += 8 {
  120. k1 := round(0, u64(d.mem[i:i+8]))
  121. h ^= k1
  122. h = rol27(h)*prime1 + prime4
  123. }
  124. if i+4 <= end {
  125. h ^= uint64(u32(d.mem[i:i+4])) * prime1
  126. h = rol23(h)*prime2 + prime3
  127. i += 4
  128. }
  129. for i < end {
  130. h ^= uint64(d.mem[i]) * prime5
  131. h = rol11(h) * prime1
  132. i++
  133. }
  134. h ^= h >> 33
  135. h *= prime2
  136. h ^= h >> 29
  137. h *= prime3
  138. h ^= h >> 32
  139. return h
  140. }
  141. const (
  142. magic = "xxh\x06"
  143. marshaledSize = len(magic) + 8*5 + 32
  144. )
  145. // MarshalBinary implements the encoding.BinaryMarshaler interface.
  146. func (d *Digest) MarshalBinary() ([]byte, error) {
  147. b := make([]byte, 0, marshaledSize)
  148. b = append(b, magic...)
  149. b = appendUint64(b, d.v1)
  150. b = appendUint64(b, d.v2)
  151. b = appendUint64(b, d.v3)
  152. b = appendUint64(b, d.v4)
  153. b = appendUint64(b, d.total)
  154. b = append(b, d.mem[:d.n]...)
  155. b = b[:len(b)+len(d.mem)-d.n]
  156. return b, nil
  157. }
  158. // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface.
  159. func (d *Digest) UnmarshalBinary(b []byte) error {
  160. if len(b) < len(magic) || string(b[:len(magic)]) != magic {
  161. return errors.New("xxhash: invalid hash state identifier")
  162. }
  163. if len(b) != marshaledSize {
  164. return errors.New("xxhash: invalid hash state size")
  165. }
  166. b = b[len(magic):]
  167. b, d.v1 = consumeUint64(b)
  168. b, d.v2 = consumeUint64(b)
  169. b, d.v3 = consumeUint64(b)
  170. b, d.v4 = consumeUint64(b)
  171. b, d.total = consumeUint64(b)
  172. copy(d.mem[:], b)
  173. b = b[len(d.mem):]
  174. d.n = int(d.total % uint64(len(d.mem)))
  175. return nil
  176. }
  177. func appendUint64(b []byte, x uint64) []byte {
  178. var a [8]byte
  179. binary.LittleEndian.PutUint64(a[:], x)
  180. return append(b, a[:]...)
  181. }
  182. func consumeUint64(b []byte) ([]byte, uint64) {
  183. x := u64(b)
  184. return b[8:], x
  185. }
  186. func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) }
  187. func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) }
  188. func round(acc, input uint64) uint64 {
  189. acc += input * prime2
  190. acc = rol31(acc)
  191. acc *= prime1
  192. return acc
  193. }
  194. func mergeRound(acc, val uint64) uint64 {
  195. val = round(0, val)
  196. acc ^= val
  197. acc = acc*prime1 + prime4
  198. return acc
  199. }
  200. func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) }
  201. func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) }
  202. func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) }
  203. func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) }
  204. func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) }
  205. func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) }
  206. func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) }
  207. func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) }