bloom.go 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. package bloom
  2. import (
  3. "errors"
  4. "strconv"
  5. "github.com/tal-tech/go-zero/core/hash"
  6. "github.com/tal-tech/go-zero/core/stores/redis"
  7. )
  8. const (
  9. // for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
  10. // maps as k in the error rate table
  11. maps = 14
  12. setScript = `
  13. for _, offset in ipairs(ARGV) do
  14. redis.call("setbit", KEYS[1], offset, 1)
  15. end
  16. `
  17. testScript = `
  18. for _, offset in ipairs(ARGV) do
  19. if tonumber(redis.call("getbit", KEYS[1], offset)) == 0 then
  20. return false
  21. end
  22. end
  23. return true
  24. `
  25. )
  26. // ErrTooLargeOffset indicates the offset is too large in bitset.
  27. var ErrTooLargeOffset = errors.New("too large offset")
  28. type (
  29. // A Filter is a bloom filter.
  30. Filter struct {
  31. bits uint
  32. bitSet bitSetProvider
  33. }
  34. bitSetProvider interface {
  35. check([]uint) (bool, error)
  36. set([]uint) error
  37. }
  38. )
  39. // New create a Filter, store is the backed redis, key is the key for the bloom filter,
  40. // bits is how many bits will be used, maps is how many hashes for each addition.
  41. // best practices:
  42. // elements - means how many actual elements
  43. // when maps = 14, formula: 0.7*(bits/maps), bits = 20*elements, the error rate is 0.000067 < 1e-4
  44. // for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
  45. func New(store *redis.Redis, key string, bits uint) *Filter {
  46. return &Filter{
  47. bits: bits,
  48. bitSet: newRedisBitSet(store, key, bits),
  49. }
  50. }
  51. // Add adds data into f.
  52. func (f *Filter) Add(data []byte) error {
  53. locations := f.getLocations(data)
  54. return f.bitSet.set(locations)
  55. }
  56. // Exists checks if data is in f.
  57. func (f *Filter) Exists(data []byte) (bool, error) {
  58. locations := f.getLocations(data)
  59. isSet, err := f.bitSet.check(locations)
  60. if err != nil {
  61. return false, err
  62. }
  63. if !isSet {
  64. return false, nil
  65. }
  66. return true, nil
  67. }
  68. func (f *Filter) getLocations(data []byte) []uint {
  69. locations := make([]uint, maps)
  70. for i := uint(0); i < maps; i++ {
  71. hashValue := hash.Hash(append(data, byte(i)))
  72. locations[i] = uint(hashValue % uint64(f.bits))
  73. }
  74. return locations
  75. }
  76. type redisBitSet struct {
  77. store *redis.Redis
  78. key string
  79. bits uint
  80. }
  81. func newRedisBitSet(store *redis.Redis, key string, bits uint) *redisBitSet {
  82. return &redisBitSet{
  83. store: store,
  84. key: key,
  85. bits: bits,
  86. }
  87. }
  88. func (r *redisBitSet) buildOffsetArgs(offsets []uint) ([]string, error) {
  89. var args []string
  90. for _, offset := range offsets {
  91. if offset >= r.bits {
  92. return nil, ErrTooLargeOffset
  93. }
  94. args = append(args, strconv.FormatUint(uint64(offset), 10))
  95. }
  96. return args, nil
  97. }
  98. func (r *redisBitSet) check(offsets []uint) (bool, error) {
  99. args, err := r.buildOffsetArgs(offsets)
  100. if err != nil {
  101. return false, err
  102. }
  103. resp, err := r.store.Eval(testScript, []string{r.key}, args)
  104. if err == redis.Nil {
  105. return false, nil
  106. } else if err != nil {
  107. return false, err
  108. }
  109. exists, ok := resp.(int64)
  110. if !ok {
  111. return false, nil
  112. }
  113. return exists == 1, nil
  114. }
  115. func (r *redisBitSet) del() error {
  116. _, err := r.store.Del(r.key)
  117. return err
  118. }
  119. func (r *redisBitSet) expire(seconds int) error {
  120. return r.store.Expire(r.key, seconds)
  121. }
  122. func (r *redisBitSet) set(offsets []uint) error {
  123. args, err := r.buildOffsetArgs(offsets)
  124. if err != nil {
  125. return err
  126. }
  127. _, err = r.store.Eval(setScript, []string{r.key}, args)
  128. if err == redis.Nil {
  129. return nil
  130. }
  131. return err
  132. }