In [28]:
import pylab as pl
import numpy as np
from matplotlib import pyplot as plt
from sklearn.datasets import fetch_lfw_people

# Fetching and Analysing the Data

In [29]:
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

In [30]:
lfw_people.keys()

dict_keys(['data', 'images', 'target', 'target_names', 'DESCR'])

In [31]:
n_samples, h, w = lfw_people.images.shape
np.random.seed(42)

In [32]:
lfw_people.images.shape

(904, 50, 37)

In [33]:
X = lfw_people.data
n_features = X.shape[1]

In [34]:
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]

In [35]:
target_names

array(['Donald Rumsfeld', 'George W Bush', 'Gerhard Schroeder',
       'Tony Blair'], dtype='<U17')

# Splitting the data into Training and Testing

In [36]:
from sklearn.model_selection import train_test_split

In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Using RandomizedPCA for dimensionality Reduction

In [38]:
from sklearn.decomposition import PCA as RandomizedPCA

In [39]:
n_components = 70
pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)

In [40]:
eigenfaces = pca.components_.reshape((n_components, h, w))

In [41]:
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

# Tuning the hyperparameters and applying SVC

In [42]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [43]:
param_grid = {
         'C': [1e3, 5e3, 1e4, 5e4, 1e5],
          'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
          }

In [44]:
clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)

In [45]:
clf = clf.fit(X_train_pca, y_train)

In [46]:
print("Best estimator found by grid search:\n", clf.best_estimator_)

Best estimator found by grid search:
 SVC(C=1000.0, class_weight='balanced', gamma=0.01)


# Model Evaluation

In [47]:
y_pred = clf.predict(X_test_pca)

In [48]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [49]:
print(classification_report(y_test, y_pred, target_names=target_names))

                   precision    recall  f1-score   support

  Donald Rumsfeld       0.73      0.75      0.74        32
    George W Bush       0.89      0.95      0.92       129
Gerhard Schroeder       0.86      0.62      0.72        29
       Tony Blair       0.86      0.83      0.85        36

         accuracy                           0.86       226
        macro avg       0.83      0.79      0.81       226
     weighted avg       0.86      0.86      0.86       226



In [50]:
print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))

[[ 24   8   0   0]
 [  6 122   1   0]
 [  2   4  18   5]
 [  1   3   2  30]]


In [51]:
from sklearn.metrics import accuracy_score
score = accuracy_score(y_test, y_pred)
print(score)

0.8584070796460177
