# Multiclass SVM Classifier

## Data Preprocessing

In [60]:
import pandas as pd
import numpy as np
import os

from time import time
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import ConfusionMatrixDisplay, classification_report, roc_curve, auc
from sklearn.svm import SVC, LinearSVC
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, StandardScaler
from skimage.feature import hog
from PIL import Image

### Load data

In [61]:
name_path = './archive/lfw_allnames.csv'
images_path = './archive/lfw-deepfunneled/lfw-deepfunneled'

Only use classes with at least 100 images to simplify the problem.

In [62]:
df_names = pd.read_csv(name_path)
min_faces_per_person = 100
df_names = df_names.loc[df_names["images"] >= min_faces_per_person, :]
names = list(df_names["name"])

In [63]:
X = []
Y = []
for name in names:
    dir_path = os.path.join(images_path, name)
    list_images_name = os.listdir(dir_path)
    for image_name in list_images_name:
        image_path = os.path.join(dir_path, image_name)
        img_rgb = plt.imread(image_path)
        X.append(img_rgb)
        Y.append(name)
    print(f"Class: {name}, number of samples: {len(list_images_name)}.")
X = np.asarray(X)
Y = np.asarray(Y)

Class: Colin_Powell, number of samples: 236.
Class: Donald_Rumsfeld, number of samples: 121.
Class: George_W_Bush, number of samples: 530.
Class: Gerhard_Schroeder, number of samples: 109.
Class: Tony_Blair, number of samples: 144.


In [64]:
def create_features(img, show_hog=False):
    # flatten image
    color_features = img.flatten()
    # convert image to greyscale
    grey_image = np.array(0.299 * img[:, :, 0] + 0.587 * img[:, :, 1] + 0.114 * img[:, :, 2])
    # get HOG features from greyscale image
    hog_features, hog_image = hog(grey_image, visualize=True, block_norm='L2-Hys', pixels_per_cell=(16, 16))
    if show_hog:
        plt.imshow(hog_image, cmap="gray")
    # combine color and hog features into a single array
    flat_features = np.hstack((color_features, hog_features))
    return flat_features

In [65]:
feature_matrix = []
for x in X:
    feature_matrix.append(create_features(x))

In [66]:
# get shape of feature matrix
feature_matrix = np.asarray(feature_matrix)
print('Feature matrix shape is: ', feature_matrix.shape)

# define standard scaler
ss = StandardScaler()
# run this on our feature matrix
bees_stand = ss.fit_transform(feature_matrix)

pca = PCA(n_components=500)
# use fit_transform to run PCA on our standardized matrix
pca.fit(bees_stand)
X_pca = pca.transform(bees_stand)
print('PCA matrix shape is: ', X_pca.shape)

Feature matrix shape is:  (1140, 201189)
PCA matrix shape is:  (1140, 500)


In [67]:
# label_encoder object knows  
# how to understand word labels. 
label_encoder = LabelEncoder() 
  
# Encode labels in column 'species'. 
Y = label_encoder.fit_transform(Y) 

In [68]:
X_train, X_test, Y_train, Y_test = train_test_split(X_pca, Y, random_state=42)

In [70]:
class SVM:
    def __init__(self, learning_rate=0.001, _lambda=2, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self._lambda = _lambda
        self.weights = None
        self.bias = None

    def train(self, X, y):
        num_samples, num_features = X.shape
        unique_classes = np.unique(y)
        num_classes = len(unique_classes)

        # Initialize weights and bias
        self.weights = np.zeros((num_classes, num_features))
        self.bias = np.zeros(num_classes)

        # One-vs-All training
        for c in range(num_classes):
            binary_labels = np.where(y == unique_classes[c], 1, -1)

            for epoch in range(self.epochs):
                for i in range(num_samples):
                    xi = X[i]
                    yi = binary_labels[i]

                    # Update weights and bias using the SVM update rule
                    if yi * (np.dot(self.weights[c], xi) + self.bias[c]) <= 1:
                        self.weights[c] += self.learning_rate * (yi * xi - self._lambda * self.weights[c])
                        self.bias[c] += self.learning_rate * yi

    def predict(self, X):
        # Make predictions using the trained SVM
        predictions = []
        for xi in X:
            scores = np.dot(self.weights, xi) + self.bias
            predicted_class = np.argmax(scores)
            predictions.append(predicted_class)
        return np.array(predictions)

# Example usage:
# Assuming train_mat is your training data and train_label is the corresponding labels
# Ensure that your labels are encoded as integers (e.g., 0, 1, 2, ...) for multiclass classification

# Assuming train_mat is your training data and train_label is the corresponding labels
# Ensure that your labels are encoded as integers (e.g., 0, 1, 2, ...) for multiclass classification

# Create and train the SVM model
svm_model = SVM()
svm_model.train(X_train, Y_train)

# Make predictions on new data
# Assuming test_mat is your test data
test_predictions = svm_model.predict(X_test)
print("Test Predictions:", test_predictions)
accuracy = np.mean(test_predictions == Y_test)
print("Accuracy:", accuracy)

Test Predictions: [2 2 1 3 1 0 2 0 2 0 4 0 2 2 4 3 2 2 2 0 0 2 3 0 0 0 2 1 3 0 0 2 2 2 2 2 2
 2 2 4 1 2 2 0 4 1 0 4 3 1 2 2 2 2 4 0 4 1 0 3 2 2 0 2 4 2 2 4 2 2 2 4 2 2
 2 1 2 2 2 0 1 0 3 2 2 2 2 2 0 2 3 4 2 0 2 2 1 1 1 2 2 0 4 2 2 0 1 1 2 2 2
 3 2 3 2 2 2 2 0 0 0 0 1 2 3 4 3 4 2 2 2 4 0 2 2 0 2 2 0 3 4 2 2 3 0 2 0 1
 2 2 4 0 2 4 2 0 4 2 2 1 2 2 1 1 0 2 2 2 3 4 0 4 3 0 0 2 2 0 2 0 0 0 2 0 0
 2 2 1 4 0 4 3 0 1 2 4 2 4 2 1 3 2 0 2 2 0 1 4 2 0 0 2 2 2 2 2 2 2 1 0 2 2
 2 0 2 0 2 0 0 2 4 0 3 1 2 3 4 0 2 2 2 2 2 2 1 4 2 2 2 2 2 2 1 1 2 3 0 0 3
 0 2 0 2 2 2 2 2 2 2 3 1 3 1 1 0 2 4 0 0 2 0 0 2 2 4]
Accuracy: 0.8947368421052632
