-
Notifications
You must be signed in to change notification settings - Fork 0
/
MyBloomFilter.py
82 lines (58 loc) · 2.07 KB
/
MyBloomFilter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*- coding: utf8 -*-
class Bitarray:
def __init__(self, size):
""" Create a bit array of a specific size """
self.size = size
self.bitarray = bytearray(size/8) #创造了这么大的位数租空间 注意要除以8 因为1byte = 8bit
def set(self, n):
""" Sets the nth element of the bitarray """
index = n / 8
position = n % 8
self.bitarray[index] = self.bitarray[index] | 1 << (7 - position)
def get(self, n):
""" Gets the nth element of the bitarray """
index = n / 8
position = n % 8
return (self.bitarray[index] & (1 << (7 - position))) > 0
def BKDRHash(key,seed,m):
hashValue = 0
for i in range(len(key)):
hashValue = ( hashValue * seed ) + ord(key[i])
return hashValue % m
class MyBloomFilter(object):
"""docstring for MyBloomFilter"""
seeds = [31,7,15,127,131,1313,131311,1313113,1311113113,1731,71,177]
def __init__(self, size,k = 8):
self.bitmap = Bitarray(size)
self.m = size
self.k = k
def add(self,key): #增加key到我们的位图中
#进行k次hash
for i in range(self.k):
hv = BKDRHash(key,MyBloomFilter.seeds[i],self.m)
self.bitmap.set(hv)
def lookup(self,key):
for i in range(self.k):
hv = BKDRHash(key,MyBloomFilter.seeds[i],self.m)
if(not self.bitmap.get(hv)): return False
return True
# mbf = MyBloomFilter(32000,8)
# mbf.add('asdddd')
# mbf.add('asdsdff')
# mbf.add('avvvvsad12334242')
# mbf.add('232321vvvv3fsdasdf')
# mbf.add('klklekr3r31ee1asd')
# while True:
# strr = raw_input("Please input your str:__")
# # for i in range(mbf.k):
# # hv = BKDRHash(strr,MyBloomFilter.seeds[i],mbf.m)
# # print hv,
# print mbf.lookup(strr)
# if __name__ == "__main__":
# bitarray_obj = Bitarray(32000)
# for i in range(5):
# print "Setting index %d of bitarray .." % i
# bitarray_obj.set(i)
# print "bitarray[%d] = %d" % (i, bitarray_obj.get(i))
#
#