# Import package 

In [None]:
from __future__ import division
import numpy as np
import math

from sklearn.metrics.pairwise import cosine_similarity

# Support function 

In [None]:
def signature_bit(data, planes):
	"""
	LSH signature generation using random projection
	Returns the signature bits for two data points.
	The signature bits of the two points are different
 	only for the plane that divides the two points.
 	"""
	sig = 0
	for p in planes:
		sig <<=  1		
		if np.dot(data, p) >= 0:
			sig |= 1
	return sig

In [None]:
 def bitcount(n):
	"""
	gets the number of bits set to 1
	"""
	count = 0
	while n:
		count += 1
		n = n & (n-1)
	return count

In [None]:
def length(v):
	"""
	gets the length of vector 
	"""

	return np.dot(v, v)**0.5

In [None]:
def get_cosine_similarity(x, y):
    return cosine_similarity([x], [y])[0][0]

In [None]:
if __name__ == '__main__':
	dim = 20       # dimension of data points (# of features)
	bits = 128    # number of bits (planes) per signature
	run = 100       # number of runs
	avg = 0

	for r in range(run):
	    # Generate two data points p1, p2
	    pt1 = np.random.randn(dim)
	    pt2 = np.random.randn(dim)	
	    # reference planes as many as bits (= signature bits)
	    ref_planes = np.random.randn(bits, dim)

	    # signature bits for two data points
	    sig1 = signature_bit(pt1, ref_planes)
	    sig2 = signature_bit(pt2, ref_planes)

	    # cosine = np.dot(pt1,pt2)/length(pt1)/length(pt2)
	    cosine = get_cosine_similarity(pt1, pt2)
	    exact = math.acos(cosine)/math.pi
     
	    # Calculates angle difference using LSH based on cosine distance
	    # It's using signature bits' count
	    cosine_hash = bitcount(sig1^sig2)/bits


	    # Difference between exact and LSH
	    diff = abs(cosine_hash-exact)/exact
	    avg += diff
	    print('exact {:.2f}, hash {:.2f}, diff {:.2f}'.format(exact, cosine_hash, diff))

	print('avg diff = {:.2f}'.format(avg/run))

exact 0.55, hash 0.55, diff 0.01
exact 0.46, hash 0.55, diff 0.20
exact 0.51, hash 0.51, diff 0.00
exact 0.47, hash 0.39, diff 0.17
exact 0.45, hash 0.39, diff 0.13
exact 0.66, hash 0.60, diff 0.09
exact 0.64, hash 0.67, diff 0.06
exact 0.54, hash 0.41, diff 0.23
exact 0.50, hash 0.57, diff 0.14
exact 0.50, hash 0.51, diff 0.02
exact 0.52, hash 0.48, diff 0.08
exact 0.51, hash 0.59, diff 0.15
exact 0.41, hash 0.41, diff 0.00
exact 0.45, hash 0.43, diff 0.04
exact 0.49, hash 0.46, diff 0.06
exact 0.51, hash 0.57, diff 0.12
exact 0.59, hash 0.62, diff 0.05
exact 0.50, hash 0.48, diff 0.05
exact 0.49, hash 0.40, diff 0.19
exact 0.60, hash 0.64, diff 0.06
exact 0.40, hash 0.46, diff 0.14
exact 0.57, hash 0.62, diff 0.08
exact 0.58, hash 0.55, diff 0.05
exact 0.46, hash 0.45, diff 0.01
exact 0.49, hash 0.46, diff 0.05
exact 0.57, hash 0.57, diff 0.00
exact 0.46, hash 0.52, diff 0.14
exact 0.36, hash 0.38, diff 0.07
exact 0.58, hash 0.59, diff 0.03
exact 0.50, hash 0.45, diff 0.09
exact 0.50

In [None]:
# Reference 

