-
Notifications
You must be signed in to change notification settings - Fork 1
/
bloomFilter.py
78 lines (60 loc) · 2.09 KB
/
bloomFilter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import mmh3 # murmurhash: is faster for blooms
import math
class BloomFilter(object):
def __init__(self, m, k):
self.m = m # size of bloom filter
self.k = k # number of hash functions
self.n = 0 # total count of the elemnts inserted in the set
self.bloomFilter = [0 for i in range(self.m)]
def _setAllBitsToZero(self):
self.n = 0
for i in self.bloomFilter:
self.bloomFilter[i] = 0
def getBitArrayIndices(self, item):
"""
hashes the key for k defined,
returns a list of the indexes which have to be set
"""
indexList = []
for i in range(1, self.k + 1):
indexList.append((hash(item) + i * mmh3.hash(item)) % self.m)
return indexList
def add(self, item):
"""
Insert an item in the filter
"""
for i in self.getBitArrayIndices(item):
self.bloomFilter[i] = 1
self.n += 1
def contains(self, key):
"""
returns whether item exists in the set or not
"""
for i in self.getBitArrayIndices(key):
if self.bloomFilter[i] != 1:
return False
return True
def length(self):
"""
returns the current size of the filter
"""
return self.n
def generateStats(self):
"""
Calculates the statistics of a filter
Probability of False Positives, predicted false positive rate, n, m, k.
"""
n = float(self.n)
m = float(self.m)
k = float(self.k)
probability_fp = math.pow((1.0 - math.exp(-(k*n)/m)), k)
print("Number of elements entered in filter: ", n)
print("Number of bits in filter: ", m)
print("Number of hash functions used: ", k)
print("Predicted Probability of false positives: ", probability_fp)
print("Predicted false positive rate: ", probability_fp * 100.0, "%")
def reset(self):
"""
Resets the filter and clears old values and statistics
"""
self._setAllBitsToZero()