-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathlsh_variants.py
25 lines (22 loc) · 953 Bytes
/
lsh_variants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import numpy
def generate_random_vector(shape):
v = numpy.random.normal(loc=0.0, scale=0.2, size=shape)
l = numpy.linalg.norm(v)
return v/l
class LSHHasher(object):
def __init__(self, num_vectors, vector_shape):
self.num_vectors = num_vectors
self.vector_shape = vector_shape
self.random_vectors = [generate_random_vector(self.vector_shape) for i in range(self.num_vectors)]
def hash_vector(self, vector, num_bits=None, as_str=False):
if num_bits is None:
num_bits = self.num_vectors
assert num_bits <= self.num_vectors, "Can't have more bits than vectors"
bits = []
for random_vec in self.random_vectors[:num_bits]:
cos_sim = numpy.dot(vector, random_vec)
hash_bit = +1 if cos_sim >= 0 else -1
if as_str:
hash_bit = "+1" if hash_bit == 1 else "-1"
bits.append(hash_bit)
return bits