In [None]:
import numpy as np
import os
import cv2
from tqdm import tqdm


%matplotlib inline


In [None]:
# configure path according to your dataset

MEN_DATA_PATH = '/kaggle/input/indian-face-dataset/men'
WOMEN_DATA_PATH = '/kaggle/input/indian-face-dataset/women'

In [None]:
# You can cheange as you want 
# this will we your eigen faces size
target_dim = (120,120)



# if your memory is not enough then you can reduce the number of images
# -1 indicate all images and 1000 indicate 1000 images
SPLIT_SIZE = 10000


In [None]:
image_width= 0
image_height= 0

In [None]:
def is_image_file(filename):
    extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif')
    return filename.lower().endswith(extensions)

In [None]:
def list_images_by_class(base_dir):

    data = {}
    
    # Loop through each item in the base directory
    for class_name in os.listdir(base_dir):
        class_path = os.path.join(base_dir, class_name)
    
        # Check if it's a directory
        if os.path.isdir(class_path):
            images = []
    
            for file in os.listdir(class_path):
                if is_image_file(file):
                    images.append(os.path.join(class_path, file))
    
            data[class_name] = images
    
    return data

In [None]:
men = list_images_by_class(MEN_DATA_PATH)
women = list_images_by_class(WOMEN_DATA_PATH)

In [None]:
all_image = []

for key , value in tqdm(men.items() , total=len(men)):
    
    for image in value :
        img = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, target_dim)
        all_image.append(img)


for key , value in tqdm(women.items() , total=len(women)):
    
    for image in value :
        img = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img , target_dim)
        all_image.append(img)

In [None]:
all_image = np.array(all_image)
all_image.shape

In [None]:
all_image = all_image[:SPLIT_SIZE]  
all_image.shape

In [None]:
# it give all the eigen faces in sorted order (sorting critria is eigen values)
def principalComponentAnalysis(X):

    total_images = X.shape[0]
    image_height, image_width = X.shape[1], X.shape[2]
    
    flattened_images = X.reshape(total_images, -1)
    
    mean_face = np.mean(flattened_images, axis=0)
    centered_images = flattened_images - mean_face

    # Compute covariance matrix
    covariance_matrix = np.dot(centered_images, centered_images.T) / total_images
    
    # Eigen decomposition
    eigen_values, eigen_vectors_temp = np.linalg.eig(covariance_matrix)
    
    # Compute actual eigenfaces
    eigen_vectors = np.dot(centered_images.T, eigen_vectors_temp)
    eigen_vectors = eigen_vectors / np.linalg.norm(eigen_vectors, axis=0)

    # Sort eigenvectors by eigenvalues
    sorted_indices = np.argsort(eigen_values)[::-1]
    eigen_values = eigen_values[sorted_indices]
    eigen_vectors = eigen_vectors[:, sorted_indices]

    # Select top `numComponents` eigenvectors
    # eigen_vectors = eigen_vectors[:, :]
    preserved_variance = np.sum(eigen_values[:]) / np.sum(eigen_values)
    
    return eigen_values, eigen_vectors, preserved_variance, mean_face


In [None]:
eigen_values , eigen_faces ,_, mean_face = principalComponentAnalysis(all_image)
eigen_faces.shape

In [None]:
np.save('eigen_faces.npy', eigen_faces)
np.save('mean_faces.npy', mean_face)