# <center>Demo on Hashing</center>

In [None]:
! unzip YALE.zip

## Import libraries and Load data

In [2]:
import os
import numpy as np
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

In [3]:
# Reading multiple images from a folder and storing it in a list.
folder = "/content/YALE"
images = []
for file in os.listdir(folder):
    img = mpimg.imread(os.path.join(folder, file))
    if img is not None:
        images.append(img)

In [None]:
len(images)

In [14]:
# Vectorizing the images and storing it in a list
image_vector = []
for image in images:
    row,col = image.shape
    img_vec = image.reshape(row*col)
    img_vec_norm = img_vec / np.linalg.norm(img_vec)  # Converting the image vector to a unit vector
    image_vector.append(img_vec_norm)

# Locality Sensitive Hashing – Random Projections

Locality-Sensitive Hashing (LSH) is a technique used in computer science and data mining to approximate and speed up the nearest neighbor search in high-dimensional spaces. It is particularly useful when dealing with large datasets and high-dimensional data, where traditional exact nearest neighbor search algorithms become computationally expensive.

The main idea behind LSH is to hash data points in such a way that similar data points are more likely to be hashed to the same or nearby buckets, allowing for efficient approximate similarity search. LSH works on the principle that if two points are similar, they should collide or hash to the same bucket with high probability, but if they are dissimilar, they should collide with low probability.

## Random Projections
The core idea behind random projection is that if points in a vector space are of sufficiently high dimension, then they may be projected into a suitable lower-dimensional space in a way which approximately preserves the distances between the points.

## Steps:
1. Generate K random  unit vectors

2. Perform dot product with unit vector and data. If result is greater then or equal to 0 generate 1 else 0. do this with all the K unit vectors.

3. Concatenate all the bit values computed for k dot products. The sequence of 0's and 1's will be the hash.  


In [10]:
def genRandomHashVectors(m, length):  # Generate random unit vectors for Hashing
    hash_vector = []
    for i in range(m):
        v = np.random.uniform(-1,1,length)
        vcap = v / np.linalg.norm(v)
        hash_vector.append(vcap)
    return hash_vector

In [11]:
def localSensitiveHashing(hash_vector ,data):
    hash_code = []
    for i in range(len(hash_vector)):
        if np.dot(data,hash_vector[i]) > 0:
            hash_code.append('1')
        else:
            hash_code.append('0')
    return ''.join(hash_code)

In [12]:
x=['I', 'am', 'a', 'boy']
''.join(x)

'Iamaboy'

In [15]:
hash_vector = genRandomHashVectors(20,len(image_vector[0]))

In [None]:
hash_vector

In [None]:
print(localSensitiveHashing(hash_vector,image_vector[0]))

In [21]:
# Creating a Image Dictionary using the hash as the keys
image_dict = {}
for i in range(len(image_vector)):
    hash_code = localSensitiveHashing(hash_vector,image_vector[i])
    if hash_code not in image_dict.keys():
        image_dict[hash_code] = [i]
    else:
        image_dict[hash_code].append(i)

In [22]:
# Getting the keys and values of the Dictionary
keys = list(image_dict.keys())
values = list(image_dict.values())

In [25]:
len(keys)

94

In [None]:
values

In [None]:
print(image_dict)

In [36]:
# Plotting images with same hash code
def plotImages(images, img_indices):
    imgs = [images[i] for i in range(len(images)) if i in img_indices]
    fig = plt.figure()
    cols = 2
    n_images = len(imgs)
    for n,image in zip(range(n_images),imgs):
        ax = fig.add_subplot(cols, int(np.ceil(n_images/float(cols))), n + 1)
        plt.gray()
        plt.imshow(image)
    fig.set_size_inches(np.array(fig.get_size_inches()) * n_images)
    plt.show()

In [None]:
values[15]

In [None]:
plotImages(images, values[15])