# Classifying CMU Face Images

# Load libraries

In [1]:
import random

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Dataset, random_split

In [3]:
import optuna

# Check for MPS (Apple Silicon), CUDA (for Nvidia GPUs), or fallback to CPU

We're checking for different device options (MPS, CUDA, or CPU) to ensure that our PyTorch model runs on the most powerful hardware available. Running on a GPU (like CUDA or MPS) significantly speeds up computations, especially for tasks involving large datasets or deep learning models, by performing parallel processing. If no GPU is available, the code falls back to using the CPU, which is slower but still functional.

On Mac, newer Apple Silicon chips (M1/M2) support GPU acceleration through Metal Performance Shaders (MPS), which we check for to optimize performance.

In [4]:
if torch.backends.mps.is_available():
    device = torch.device('mps')  # Metal Performance Shaders for Mac
elif torch.cuda.is_available():
    device = torch.device('cuda')  # CUDA for Nvidia GPUs (if available on Mac)
else:
    device = torch.device('cpu')   # Fallback to CPU

print(f'Using device: {device}')

Using device: mps


# Loading/Exploring our data

We should resize the images to be 32x32 because the original LeNet-5 architecture expects input images of size 32x32, as it was originally designed for the MNIST dataset, where images have that specific resolution.

In [5]:
# Dataset transformations
transform = transforms.Compose([
    transforms.Grayscale(),  # Convert image to grayscale
    transforms.Resize((32, 32)),  # Resize images to 32x32
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize
])

In [6]:
# Load dataset
data = ImageFolder(root='./face_0/', transform=transform)

In [7]:
train_size = int(0.6 * len(data))
val_size = int(0.2 * len(data))
test_size = len(data) - train_size - val_size

train_data, val_data, test_data = random_split(data, [train_size, val_size, test_size])

print(f"Training set: {len(train_data)}, Validation set: {len(val_data)}, Testing set: {len(test_data)}")

Training set: 374, Validation set: 124, Testing set: 126


In [8]:
# Data loaders
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_loader = DataLoader(val_data, batch_size=64, shuffle=False)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

# Define the LeNet-5 architecture

We switched the original 10 output features to 20 in the final fully connected layer because the number of output neurons must match the number of classes in your dataset. The original model had 10 output neurons, suitable for datasets like MNIST with 10 classes. Since your dataset has 20 classes, we need 20 output neurons, with each neuron representing one class, allowing the model to output a score for each of the 20 possible classes.

Additionally, I'm also including dropout rate for hyperparameter tuning.

In [9]:
# Modify LeNet5 to include dropout, L1/L2 regularization
class LeNet5(nn.Module):
    def __init__(self, dropout_rate):
        super(LeNet5, self).__init__()

        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully connected layers with Dropout
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 20)

        self.dropout = nn.Dropout(dropout_rate)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool1(self.relu(self.conv1(x)))
        x = self.pool2(self.relu(self.conv2(x)))

        x = x.view(x.size(0), -1)

        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)

        return x

In [10]:
# Objective function for Optuna
def objective(trial):
    # Hyperparameter tuning
    learning_rate = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
    l1_lambda = trial.suggest_float("l1_lambda", 1e-6, 1e-2, log=True)
    l2_lambda = trial.suggest_float("l2_lambda", 1e-6, 1e-2, log=True)
    dropout_rate = trial.suggest_float("dropout_rate", 0.0, 0.5)

    # Instantiate the model with tuned parameters
    model = LeNet5(dropout_rate).to(device)

    # Define the optimizer with L2 regularization
    optimizer = optim.SGD(
        model.parameters(), 
        lr=learning_rate, 
        momentum=0.9, 
        weight_decay=l2_lambda  # L2 regularization (also known as weight decay)
    )

    # Loss function with L1 regularization
    criterion = nn.CrossEntropyLoss()

    # Data Loaders (use the previously defined loaders)
    train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=64, shuffle=False)

    # Training loop
    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for i, (images, labels) in enumerate(train_loader, 0):
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # L1 regularization
            l1_loss = 0
            for param in model.parameters():
                l1_loss += torch.sum(torch.abs(param))
            loss += l1_lambda * l1_loss

            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        trial.report(val_loss, epoch)

        # Handle pruning based on the intermediate results.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return val_loss

In [11]:
%%time

# Optuna study
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20)

# Best hyperparameters
print(f"Best hyperparameters: {study.best_trial.params}")

[I 2024-09-18 13:36:15,974] A new study created in memory with name: no-name-fed62642-20a0-4f32-ba64-b26b901ff1e5
[I 2024-09-18 13:36:34,628] Trial 0 finished with value: 3.000635862350464 and parameters: {'lr': 0.004514434262947315, 'l1_lambda': 2.5708069040824766e-05, 'l2_lambda': 8.816498122509794e-06, 'dropout_rate': 0.4516834438704467}. Best is trial 0 with value: 3.000635862350464.
[I 2024-09-18 13:36:53,018] Trial 1 finished with value: 3.0007574558258057 and parameters: {'lr': 0.0011525811387040228, 'l1_lambda': 0.0011605095398469771, 'l2_lambda': 0.0010263065347655988, 'dropout_rate': 0.489364635236485}. Best is trial 0 with value: 3.000635862350464.
[I 2024-09-18 13:37:11,334] Trial 2 finished with value: 3.003084182739258 and parameters: {'lr': 0.0020671788141021956, 'l1_lambda': 0.0001411568831359299, 'l2_lambda': 0.0005026857802810134, 'dropout_rate': 0.03792366944921827}. Best is trial 0 with value: 3.000635862350464.
[I 2024-09-18 13:37:29,559] Trial 3 finished with valu

Best hyperparameters: {'lr': 0.00019878060666910093, 'l1_lambda': 0.000173830493954671, 'l2_lambda': 0.0004381773422481323, 'dropout_rate': 0.25496347559264027}
CPU times: user 4min 23s, sys: 3.36 s, total: 4min 27s
Wall time: 4min 28s


In [12]:
# Best hyperparameters
print(f"Best val loss: {study.best_value}")


Best val loss: 2.9859237670898438
