# Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
from random import sample
import shutil
from zipfile import ZipFile ## Download this library if not available
import warnings

warnings.filterwarnings("ignore") ##Ignore all sort of warnings

# Extracting data from zip

In [None]:
my_dir = os.getcwd() ## my_dir is my current working folder
zip_folder = os.path.join(my_dir,"yalefaces.zip")
print("Path to the zipped folder is {}".format(zip_folder))
with ZipFile(zip_folder, 'r') as zip: 
    zip.extractall()

# Working with data directories

In [52]:
data_folder = os.path.join(my_dir,"yaleface")
file_list = os.listdir(data_folder)

In [53]:
train_folder_path = os.path.join(my_dir,"Faces_Train")
test_folder_path = os.path.join(my_dir,"Faces_Test")

## Delete the folders if they exist already
if os.path.exists(train_folder_path):
    shutil.rmtree(train_folder_path)

if os.path.exists(test_folder_path):
    shutil.rmtree(test_folder_path)

os.mkdir(train_folder_path)  ## Creates a new directory
os.mkdir(test_folder_path) ## Creates a new directory

idx_list = [str(i).zfill(2) for i in range(1,16,1)] ##Creates numbers with leading zeros(i.e. 01 instead of 1, 02 instaed of 2 etc.)
print(idx_list)

file_name_list = [[] for i in range(15)]

for i in range(len(idx_list)):
    for fname in file_list:
        if fname.startswith("subject"+idx_list[i]):
            file_name_list[i].append(os.path.join(data_folder,fname))

print(file_name_list)

for i in range(len(idx_list)):
    ls = file_name_list[i]  ## Finds a list within the 'file_name_list',which contains the paths to images of a particular subject
    
    ## Since a single list contains paths to the images of a particular subject and each subject has 11 images, we will randomly choose an index between 0 and 11
    test_idx = np.random.choice(11)  
    test_file = ls[test_idx]
    shutil.copy(test_file,test_folder_path)
    
    ls.remove(ls[test_idx])
    
    for train_file in ls:
        shutil.copy(train_file,train_folder_path)

['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15']
[['D:\\Eckovation\\yaleface\\subject01.centerlight', 'D:\\Eckovation\\yaleface\\subject01.glasses', 'D:\\Eckovation\\yaleface\\subject01.happy', 'D:\\Eckovation\\yaleface\\subject01.leftlight', 'D:\\Eckovation\\yaleface\\subject01.noglasses', 'D:\\Eckovation\\yaleface\\subject01.normal', 'D:\\Eckovation\\yaleface\\subject01.rightlight', 'D:\\Eckovation\\yaleface\\subject01.sad', 'D:\\Eckovation\\yaleface\\subject01.sleepy', 'D:\\Eckovation\\yaleface\\subject01.surprised', 'D:\\Eckovation\\yaleface\\subject01.wink'], ['D:\\Eckovation\\yaleface\\subject02.centerlight', 'D:\\Eckovation\\yaleface\\subject02.glasses', 'D:\\Eckovation\\yaleface\\subject02.happy', 'D:\\Eckovation\\yaleface\\subject02.leftlight', 'D:\\Eckovation\\yaleface\\subject02.noglasses', 'D:\\Eckovation\\yaleface\\subject02.normal', 'D:\\Eckovation\\yaleface\\subject02.rightlight', 'D:\\Eckovation\\yaleface\\subject02.sad', 'D:\\E

In [54]:
train_files_list = os.listdir(train_folder_path)
#print(train_files_list)
train_ls = []
for file in train_files_list:
    img_file = os.path.join(train_folder_path,file)
    arr = mpimg.imread(img_file)
    #print(arr.shape)
    arr = arr.reshape(1,arr.shape[0]*arr.shape[1]) ## Convert to a 1D matrix
    train_ls.append(np.ravel(arr)) ## Before appending, convert the 1D martix to a 1d array using np.ravel 
train_mat = np.matrix(train_ls)
print(train_mat.shape)
mean_img = np.mean(train_mat,axis=0)
print(mean_img.shape)
cov = np.cov(train_mat)
print(cov.shape)
eig_val,eig_vec = np.linalg.eig(cov)

(150, 77760)
(1, 77760)
(150, 150)


In [55]:
eigen_vec_ls = []
#eig1 = train_mat.T@eig_vec[:,0]
#print(eig1.shape)
for i in range(eig_vec.shape[1]):
    eig1 = train_mat.T@eig_vec[:,i]
    eig1 = eig1/eig_val[i]
    eigen_vec_ls.append(np.ravel(eig1))

In [56]:
sort_idx = np.argsort(eig_val) ## indices for eigenvalues which are in ascending order
sort_idx = sort_idx[::-1]

eig_val_sum = np.sum(eig_val)
temp_sum = 0
principal_eig_vec = []
principal_eig_val = []
i=0
while(temp_sum<0.95*eig_val_sum):
    principal_eig_vec.append(eigen_vec_ls[sort_idx[i]])
    principal_eig_val.append(eig_val[sort_idx[i]])
    temp_sum += eig_val[sort_idx[i]]
    i += 1
print("Number of components is {}".format(i))

Number of components is 28


## SOL 2

In [57]:
mu = mean_img

In [58]:
Q_hat = np.matrix(principal_eig_vec)

In [59]:
print(mu)
print(Q_hat)

[[123.02666667 123.12       123.48       ...  68.          68.
   68.        ]]
[[ 0.00213529+0.j  0.00214677+0.j  0.00215752+0.j ...  0.00116381+0.j
   0.00116381+0.j  0.00116381+0.j]
 [ 0.00074259+0.j  0.00060178+0.j  0.00053262+0.j ...  0.00052511+0.j
   0.00052511+0.j  0.00052511+0.j]
 [-0.00103015+0.j -0.00088853+0.j -0.00101814+0.j ... -0.00147415+0.j
  -0.00147415+0.j -0.00147415+0.j]
 ...
 [-0.01071212+0.j -0.0137249 +0.j -0.01541506+0.j ... -0.00600069+0.j
  -0.00600069+0.j -0.00600069+0.j]
 [-0.01102957+0.j -0.01038227+0.j -0.00783687+0.j ... -0.00315606+0.j
  -0.00315606+0.j -0.00315606+0.j]
 [ 0.01342805+0.j  0.01189997+0.j  0.01051877+0.j ...  0.00497032+0.j
   0.00497032+0.j  0.00497032+0.j]]


## SOL 3

In [60]:
test_files = os.listdir(test_folder_path)
feat_vec_ls = []
for file in test_files:
    img_file = os.path.join(test_folder_path,file)
    test_img = mpimg.imread(img_file)
    test_img = test_img.reshape(arr.shape[0]*arr.shape[1],1)
    test_img = test_img - mean_img.T
    #print(np.linalg.pinv(Q_hat).shape,test_img.shape)
    feat_vec = np.linalg.pinv(Q_hat).T@test_img
    feat_vec_ls.append(np.ravel(feat_vec))

In [61]:
print(feat_vec_ls)

[array([-2856.54807971+0.j, -7423.81718459+0.j, -1855.97720439+0.j,
       -1870.63843002+0.j, -2382.36374606+0.j,   428.72635078+0.j,
         717.25693231+0.j,   242.43817761+0.j,  1508.06039985+0.j,
        -487.93921429+0.j,   201.39918138+0.j,  1256.75232757+0.j,
       -1180.09925241+0.j,  -487.11949577+0.j,   137.97426772+0.j,
         493.31185267+0.j,   -70.56040171+0.j,  -374.0133857 +0.j,
         144.11429312+0.j,   197.64945051+0.j,  -342.55415672+0.j,
         131.86979126+0.j,  -238.2392443 +0.j,    78.67343603+0.j,
         300.6315067 +0.j,    33.9981009 +0.j,  -241.02018073+0.j,
         -27.94309214+0.j]), array([ 3.23490999e+02+0.j, -2.04240523e+04+0.j, -4.73584602e+03+0.j,
       -2.91183851e+02+0.j, -3.54676140e+02+0.j,  1.74924116e+03+0.j,
        2.59783511e+03+0.j, -1.10118556e+03+0.j, -1.65595479e+03+0.j,
        2.78264186e+02+0.j, -1.11337016e+03+0.j,  5.06278803e+02+0.j,
        3.97939073e+02+0.j,  3.06434052e+02+0.j, -6.86201294e+02+0.j,
       -1.7336334

## SOL 4 50 random vectors

In [62]:
vec_len = len(feat_vec_ls[0])

In [63]:
def genRandomHashVector(m, length):
    hash_vector = []
    for i in range(m):
        v = np.random.uniform(-1, 1, length)
        vcap = v / np.linalg.norm(v)
        hash_vector.append(vcap)
    return hash_vector

In [64]:
hash_vec = genRandomHashVector(50, vec_len)
print(hash_vec)

[array([ 0.1934939 ,  0.27243028, -0.2458886 , -0.15914453, -0.25981944,
       -0.18679621, -0.04237228, -0.06666435,  0.24039586, -0.24264393,
        0.1125829 ,  0.19782962,  0.05238855, -0.18984542, -0.03704131,
        0.06335903,  0.17880735,  0.10562989, -0.30396836,  0.266182  ,
        0.18735538, -0.24682503,  0.13759899, -0.24913813,  0.12686382,
        0.16538534,  0.12484094,  0.19704288]), array([-0.02603302,  0.25622859, -0.1715533 ,  0.14766962,  0.06585354,
       -0.25287981,  0.18156512, -0.29329017, -0.05330643,  0.22021571,
        0.29809373,  0.24060203,  0.2361327 ,  0.12951416, -0.22253012,
       -0.25301533,  0.29099691,  0.01275329,  0.13497443, -0.18631384,
        0.20555486, -0.28107322,  0.05285801,  0.00205663,  0.18118925,
       -0.07953464,  0.08366354,  0.05501709]), array([ 3.00631710e-01, -2.52454379e-02,  3.05957990e-01,  2.98366357e-01,
        9.29889776e-02,  6.84795301e-02,  1.89776105e-01,  2.84088004e-01,
       -2.39468256e-01, -2.094373

## SOL 5 50bit Hashes

In [65]:
def localSensitiveHashing(hash_vector, data):
    hash_code = []
    for i in range(len(hash_vector)):
        if np.dot(data, hash_vector[i]) > 0:
            hash_code.append('1')
        else:
            hash_code.append('0')
    return hash_code

In [66]:
hashes = []
for i in range(len(feat_vec_ls)):
    hash_cd = localSensitiveHashing(hash_vec, feat_vec_ls[i])
    hashes.append(hash_cd)
print(hashes)

[['0', '0', '0', '0', '1', '0', '1', '1', '1', '0', '1', '0', '0', '0', '0', '1', '1', '0', '0', '0', '1', '1', '0', '1', '1', '1', '0', '0', '0', '1', '0', '0', '0', '1', '0', '1', '1', '1', '0', '1', '1', '1', '0', '0', '1', '0', '0', '0', '1', '0'], ['0', '0', '1', '0', '1', '0', '1', '1', '1', '1', '1', '0', '0', '1', '0', '1', '1', '0', '1', '0', '1', '1', '0', '1', '1', '1', '0', '0', '0', '1', '0', '1', '0', '1', '0', '0', '1', '1', '0', '1', '0', '1', '0', '0', '1', '0', '0', '1', '1', '1'], ['0', '0', '1', '0', '0', '0', '1', '1', '1', '1', '1', '0', '0', '1', '0', '1', '1', '0', '0', '0', '1', '1', '0', '1', '1', '1', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '1', '1', '0', '1', '0', '1', '0', '0', '1', '1', '0', '1', '1', '1'], ['1', '0', '1', '0', '0', '1', '0', '1', '0', '1', '1', '0', '0', '1', '1', '0', '0', '0', '1', '0', '1', '0', '0', '0', '1', '1', '1', '1', '0', '1', '0', '1', '0', '1', '0', '0', '1', '1', '0', '1', '0', '0', '1', '0', '1', '0', '0', '1', '1

## SOL 6 L1 NORM betweed hash reps 

In [67]:
n = np.random.randint(len(feat_vec_ls))
list_1 = []

In [68]:
for i in range(len(feat_vec_ls)):
    list_1.append(np.linalg.norm((feat_vec_ls[i] - feat_vec_ls[n]), ord=1))

In [69]:
print(sorted(list_1)) #sorted list

[0.0, 50657.77895900335, 84929.16195537405, 86340.29870858208, 88511.4844639466, 95193.8289269935, 96657.98507255239, 98140.50784706324, 101542.347149671, 103299.01215625754, 104437.29864308747, 104713.59463145027, 105963.69830332791, 108531.84880036137, 117869.28102802842]
