<p style="font-family:newtimeroman; font-size:250%; text-align:center; border-radius: 10px 100px;">Solution to Facial Recognition System using a SVM Model</p>

# Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import os
import shutil
import errno
import cv2
import sklearn
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_lfw_people
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
import tensorflow as tf
import matplotlib.pyplot as plt

# Copying Dataset

In [None]:
# This is a function for copying files or directories from 'src' to 'dest'.
def copy(src, dest):
    try:
        # shutil.copytree copies an entire directory tree, including all subdirectories.
        shutil.copytree(src, dest)
    except OSError as e:
        # errno.ENOTDIR means the source is a file and not a directory.
        if e.errno == errno.ENOTDIR:
            # shutil.copy copies a file from 'src' to 'dest'.
            shutil.copy(src, dest)
        else:
            print('Directory not copied. Error: %s' % e)

# Specify the source and destination paths for the dataset.            
src = '../input/lfwpeople'
dest = '../LFW/lfw4/lfw_home'
# Use the copy function to copy the files from source to destination.
copy(src,dest)
# Print the list of files in the source and destination directories.
print(os.listdir('../input/lfwpeople'))
print(os.listdir('../LFW/lfw4/lfw_home'))

# Loading Dataset

In [None]:
# Specify the path to the dataset.
path = '../LFW/lfw4/'
print("Fetching LFW people dataset from:", path)

# Fetch the dataset from the specified path and only download the images of people with 80 or more faces.
lfw_dataset = sklearn.datasets.fetch_lfw_people(data_home = path, min_faces_per_person=80,  download_if_missing = False)

n_samples, h, w = lfw_dataset.images.shape
print("Fetched dataset with", n_samples, "samples")
# Set the seed for the numpy random number generator, which is used for creating random splits of the data for training and testing.
np.random.seed(42)
# Assign the image data to 'X'.
X = lfw_dataset.data
# Get the number of features in the dataset, which in this case would be the height of the images.
n_features = X.shape[1]

# Assign the target values to 'y'.
y = lfw_dataset.target
target_names = lfw_dataset.target_names
# Get the number of classes in the dataset.
n_classes = target_names.shape[0]

# Data Exploration

In [None]:
# Print out details regarding the dataset.
print(f"Number of images: {n_samples}")
print(f"Number of classes: {n_classes}")
print(f"Image shape: {h} x {w}")

In [None]:
# Create a new figure to display images with a size of (10, 5).
plt.figure(figsize=(10, 5))
# Loop to display the first five images in the dataset.
for i in range(5):
    plt.subplot(1, 5, i+1)
    plt.imshow(X[i].reshape((h, w)), cmap='gray')
    plt.title(target_names[y[i]])
    plt.axis('off')
plt.show()

In [None]:
# Create a bar chart to visualise the amount of images for each class.
label, counts = np.unique(y, return_counts=True)
plt.figure(figsize=(15, 5))
plt.bar(target_names[label], counts)
plt.xlabel('Class')
plt.ylabel('Number of images')
plt.title('Distribution of classes')
plt.show()

# Data Splitting

In [None]:
# Split the dataset into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Applying PCA

In [None]:
# Set the number of components for further analysis after applying PCA.
n_components = 200
# Print the number of components and training examples.
print(f"Extracting the top {n_components} eigenfaces from {X_train.shape[0]} faces")

# Fit a PCA (Principal Component Analysis) model to the training data.
# The model will keep the top 200 components based on their significance.
# PCA is used here to reduce the dimensionality of the data and keep only significant features.
# svd_solver='randomized' is efficient for large datasets.
# whiten=True can help the model perform better as it transforms the data to have a mean of 0 and variance of 1.
pca = PCA(n_components=n_components, svd_solver='randomized',whiten=True).fit(X_train)

# Reshapes the components to have the same shape as the images.
eigenfaces = pca.components_.reshape((n_components, h, w))

# Transform the training and testing data by applying the PCA transformation.
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

# Fitting Model

In [None]:
print("Fitting the classifier to the training set...")
# Create a parameter grid to be used for a grid search. The grid search will find the best parameters for the SVM classifier from the given options.
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }

# Initialize an SVM classifier and perform a grid search with cross-validation to find the best parameters.
clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
# Fit the classifier to the training data.
clf = clf.fit(X_train_pca, y_train)

# Get the results of the grid search.
cv_results = clf.cv_results_

# Print the best parameters found by the grid search.
print("Best parameters found by grid search:")
print(clf.best_params_)

# Evaluation

In [None]:
# Predict the labels for the testing data.
y_pred = clf.predict(X_test_pca)

# Print the classification report for the predictions on the test set.
print("Predicting people's names on the test set")
print(classification_report(y_test, y_pred, target_names=target_names))

In [None]:
# Print the confusion matrix for the predictions on the test set.
print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))