123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- package bloom
- import (
- "errors"
- "strconv"
- "github.com/tal-tech/go-zero/core/hash"
- "github.com/tal-tech/go-zero/core/stores/redis"
- )
- const (
- // for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
- // maps as k in the error rate table
- maps = 14
- setScript = `
- for _, offset in ipairs(ARGV) do
- redis.call("setbit", KEYS[1], offset, 1)
- end
- `
- testScript = `
- for _, offset in ipairs(ARGV) do
- if tonumber(redis.call("getbit", KEYS[1], offset)) == 0 then
- return false
- end
- end
- return true
- `
- )
- var ErrTooLargeOffset = errors.New("too large offset")
- type (
- BitSetProvider interface {
- check([]uint) (bool, error)
- set([]uint) error
- }
- BloomFilter struct {
- bits uint
- bitSet BitSetProvider
- }
- )
- // New create a BloomFilter, store is the backed redis, key is the key for the bloom filter,
- // bits is how many bits will be used, maps is how many hashes for each addition.
- // best practices:
- // elements - means how many actual elements
- // when maps = 14, formula: 0.7*(bits/maps), bits = 20*elements, the error rate is 0.000067 < 1e-4
- // for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
- func New(store *redis.Redis, key string, bits uint) *BloomFilter {
- return &BloomFilter{
- bits: bits,
- bitSet: newRedisBitSet(store, key, bits),
- }
- }
- func (f *BloomFilter) Add(data []byte) error {
- locations := f.getLocations(data)
- err := f.bitSet.set(locations)
- if err != nil {
- return err
- }
- return nil
- }
- func (f *BloomFilter) Exists(data []byte) (bool, error) {
- locations := f.getLocations(data)
- isSet, err := f.bitSet.check(locations)
- if err != nil {
- return false, err
- }
- if !isSet {
- return false, nil
- }
- return true, nil
- }
- func (f *BloomFilter) getLocations(data []byte) []uint {
- locations := make([]uint, maps)
- for i := uint(0); i < maps; i++ {
- hashValue := hash.Hash(append(data, byte(i)))
- locations[i] = uint(hashValue % uint64(f.bits))
- }
- return locations
- }
- type redisBitSet struct {
- store *redis.Redis
- key string
- bits uint
- }
- func newRedisBitSet(store *redis.Redis, key string, bits uint) *redisBitSet {
- return &redisBitSet{
- store: store,
- key: key,
- bits: bits,
- }
- }
- func (r *redisBitSet) buildOffsetArgs(offsets []uint) ([]string, error) {
- var args []string
- for _, offset := range offsets {
- if offset >= r.bits {
- return nil, ErrTooLargeOffset
- }
- args = append(args, strconv.FormatUint(uint64(offset), 10))
- }
- return args, nil
- }
- func (r *redisBitSet) check(offsets []uint) (bool, error) {
- args, err := r.buildOffsetArgs(offsets)
- if err != nil {
- return false, err
- }
- resp, err := r.store.Eval(testScript, []string{r.key}, args)
- if err == redis.Nil {
- return false, nil
- } else if err != nil {
- return false, err
- }
- if exists, ok := resp.(int64); !ok {
- return false, nil
- } else {
- return exists == 1, nil
- }
- }
- func (r *redisBitSet) del() error {
- _, err := r.store.Del(r.key)
- return err
- }
- func (r *redisBitSet) expire(seconds int) error {
- return r.store.Expire(r.key, seconds)
- }
- func (r *redisBitSet) set(offsets []uint) error {
- args, err := r.buildOffsetArgs(offsets)
- if err != nil {
- return err
- }
- _, err = r.store.Eval(setScript, []string{r.key}, args)
- if err == redis.Nil {
- return nil
- } else {
- return err
- }
- }
|