In [1]:
import os
import shutil
from facenet_pytorch import MTCNN
import cv2
import torch
import numpy as np
from deepface import DeepFace
from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm


### **Get path, create, and crop the image with the corresponding person**
Get the path to the images, get the names of the directories (which are the political figures), scanned through all of the images, crop them, and then create a directory to store them

In [3]:
path_to_data = './dataset/'
path_to_images = './dataset/cropped/'

In [11]:
img_dir = [entry.path for entry in os.scandir(path_to_data) if entry.is_dir()]
img_dir

['./dataset/Vladimir Putin',
 './dataset/Xi Jinping',
 './dataset/Justin Trudeau',
 './dataset/Donald Trump',
 './dataset/Joe Biden']

In [4]:
os.path.exists(path_to_images)

True

In [5]:
# Load MTCNN face detector
mtcnn = MTCNN(keep_all=True)

In [15]:
### This code is to delete the cropped images folder and create a new one
# if os.path.exists(path_to_images):
#     shutil.rmtree(path_to_images)
# os.makedirs(path_to_images)

loop = 0
for person in os.listdir(path_to_data):
    
    # skip the first iteration cause it something wrong with the first person
    if loop == 0:
        loop += 1
        continue

    # ./dataset/person1
    person_path = os.path.join(path_to_data, person)
    # ./dataset/cropped/person1
    cropped_person_path = os.path.join(path_to_images, person)

    # create a directory for the person
    # os.makedirs(cropped_person_path, exist_ok=True)

    if not os.path.isdir(person_path):
        continue
    
    print(f"Processing images for {person}")

    for image_name in os.listdir(person_path):
        image_path = os.path.join(person_path, image_name)
        
        try:
            # Load image
            img = Image.open(image_path)
            # Detect faces
            boxes, _ = mtcnn.detect(img)

            # If no faces are detected, move on to the next image
            if boxes is not None:

                # loop through all the faces detected
                for i, box in enumerate(boxes):

                    # Get the coordinates of the box
                    x, y, x2, y2 = map(int, box)
                    face = img.crop((x, y, x2, y2))

                    # Save the face
                    face_save_path = face.save(os.path.join(cropped_person_path, f"{person}_{i}.jpg"))
                    face.save(face_save_path)
                    print(f"Saved {face_save_path}")
        except Exception as e:
            print(f"Error processing {image_path}: {e}")
            
print("Face detection done")
    

Processing images for Vladimir Putin
Error processing ./dataset/Vladimir Putin/f9019acd83844a216489dd46c0caa81b47c86edf.jpg: unknown file extension: 
Error processing ./dataset/Vladimir Putin/45d545b8af8b03517e84618086f7353aa3b3c958.jpg: unknown file extension: 
Error processing ./dataset/Vladimir Putin/ad31b3eeefa4cd1bb9bbf6484f30e326a1334de2.jpg: unknown file extension: 
Error processing ./dataset/Vladimir Putin/4abe5f488defbc533b24a8c910234c51e64b773d.jpg: unknown file extension: 
Error processing ./dataset/Vladimir Putin/1de3be676ab68094ff52cac4cb8fba4ff5300d56.jpg: unknown file extension: 
Error processing ./dataset/Vladimir Putin/935cf3b6a3a2a48c4e42510fb7dad8fd09e81b51.jpg: unknown file extension: 
Error processing ./dataset/Vladimir Putin/b919b6e96a0fc29703f4f1c6523ed54473ec7c86.jpg: unknown file extension: 
Error processing ./dataset/Vladimir Putin/4e39eab840d24eb94d15dc8b2be8fea974c988fd.jpg: unknown file extension: 
Error processing ./dataset/Vladimir Putin/a6267ada27b21e1d0

### **Creating a Custom Face Dataset for PyTorch Trainings**
This section defines a custom PyTorch dataset class (FaceDataset) to load images from a structured dataset directory. Each subdirectory in root_dir represents a different person, and images inside are labeled accordingly.

Loops through all subdirectories (each representing a person).
Stores image paths and corresponding labels in a list (self.data).
Implements PyTorch’s Dataset methods:

__len__() → Returns total number of images. 

__getitem__() → Loads an image and applies transformations.

In [83]:
# Remove the .DS_Store file
if os.path.exists('./dataset/cropped/.DS_Store'):
    os.remove('./dataset/cropped/.DS_Store')

In [84]:
people = sorted(os.listdir(path_to_images))
people

['Donald Trump', 'Joe Biden', 'Justin Trudeau', 'Vladimir Putin', 'Xi Jinping']

In [85]:
label_dict = {name : i for i, name in enumerate(people)}
print("Label dictionary:", label_dict)

Label dictionary: {'Donald Trump': 0, 'Joe Biden': 1, 'Justin Trudeau': 2, 'Vladimir Putin': 3, 'Xi Jinping': 4}


In [86]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.Resize((224, 224)), # Resize to fit ResNet input
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5]) # Normalize pixel values to between -1 and 1
])

In [None]:
class FaceDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.data= []

        # Loop through all the people in the dataset
        for person in os.listdir(root_dir):
            # Get the path to the person's directory
            person_path = os.path.join(root_dir, person)

            # Loop through all the images of the person
            if os.path.isdir(person_path):
                for image_name in os.listdir(person_path):
                    # Get the path to the image and append it to the data list
                    image_path = os.path.join(person_path, image_name)
                    self.data.append((image_path, label_dict[person]))
    # Required by PyTorch’s Dataset class. 
    # Helps DataLoader determine the number of batches.
    def __len__(self):
        return len(self.data)
    
    # DataLoader calls this method to get a sample
    def __getitem__(self, idx: int):
        img_path, label = self.data[idx]
        img = Image.open(img_path).convert('RGB')

        if self.transform:
            img = self.transform(img)
        return img, label
    
face_dataset = FaceDataset(root_dir=path_to_images, transform=transform)

# getitem test
img, label = face_dataset[0]
print("Image shape:", img.shape)
print("Label:", label)

Image shape: torch.Size([3, 224, 224])
Label: 3


### **Select the model for the training**

In [88]:
import torch.nn as nn
import torchvision.models as models

# Load the pretrained ResNet model
model = models.resnet18(pretrained=True)

# modify the final layer to output the number of classes in our dataset
num_classes = len(people)

# Resnet has 18 layers that are already trained
# we freeze the earlier layers and only train the last fully connected (FC) layer. 
# This is called Transfer Learning.

model.fc = nn.Linear(model.fc.in_features, num_classes)

print(model)


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
import torch.optim as optim

# Split the dataset into training and validation sets
train_dataset, val_dataset = torch.utils.data.random_split(face_dataset, [int(0.8 * len(face_dataset)), len(face_dataset) - int(0.8 * len(face_dataset))])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# training loop
num_epochs = 20
best_loss = float('inf')

for epoch in range(num_epochs):
    model.train()
    running_lost = 0.0

    for images, labels in train_loader:
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_lost += loss.item()

    model.eval()
    val_loss = 0.0
    with torch.inference_mode():
        for images, labels in val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    print(f"Epoch {epoch+1}, Train Loss: {running_lost/len(train_loader)}, Val Loss: {val_loss/len(val_loader)}")

    if val_loss < best_loss:
        best_loss = val_loss
        torch.save(model.state_dict(), 'best_model.pth')

Epoch 1, Train Loss: 0.8946371898055077, Val Loss: 3.741490602493286
Epoch 2, Train Loss: 0.1574297919869423, Val Loss: 4.898905515670776
Epoch 3, Train Loss: 0.12580684696634611, Val Loss: 3.7763350009918213
Epoch 4, Train Loss: 0.10956310294568539, Val Loss: 0.8412430584430695
Epoch 5, Train Loss: 0.05938295150796572, Val Loss: 1.9780666828155518
Epoch 6, Train Loss: 0.09878160936447482, Val Loss: 1.2219275832176208
Epoch 7, Train Loss: 0.051368876437967025, Val Loss: 0.38434572517871857
Epoch 8, Train Loss: 0.006500377901829779, Val Loss: 0.3312400132417679
Epoch 9, Train Loss: 0.016528285341337323, Val Loss: 0.2907616049051285
Epoch 10, Train Loss: 0.005388321432595451, Val Loss: 0.32868072390556335
Epoch 11, Train Loss: 0.002405960171017796, Val Loss: 0.243282288312912
Epoch 12, Train Loss: 0.001541079138405621, Val Loss: 0.2577904760837555
Epoch 13, Train Loss: 0.0050527098355814815, Val Loss: 0.3040517494082451
Epoch 14, Train Loss: 0.0010680185271970306, Val Loss: 0.21131274849

### **Testing the model**

In [90]:
model.load_state_dict(torch.load('best_model.pth'))

<All keys matched successfully>

In [91]:
def classify_new_image(img_path, model, transform, label_dict):
    model.eval()

    image = Image.open(img_path).convert('RGB')
    image = transform(image).unsqueeze(0)

    with torch.no_grad():
        output = model(image)
        probabilities = torch.nn.functional.softmax(output, dim=1)
        confidence, predicted_class = torch.max(probabilities, dim=1)

    predicted_label = list(label_dict.keys())[list(label_dict.values()).index(predicted_class.item())]
    print(f"Predicted: {predicted_label} (Confidence: {confidence.item()*100:.2f}%)")
    return predicted_label

In [92]:
test_image_path = './test_images/Trump.png'
classify_new_image(test_image_path, model, transform, label_dict)

Predicted: Donald Trump (Confidence: 99.99%)


'Donald Trump'

In [95]:
test_image_path = './test_images/Putin.png'
classify_new_image(test_image_path, model, transform, label_dict)

Predicted: Vladimir Putin (Confidence: 66.13%)


'Vladimir Putin'

In [98]:
test_image_path = './test_images/Trudeau.png'
classify_new_image(test_image_path, model, transform, label_dict)

Predicted: Justin Trudeau (Confidence: 71.14%)


'Justin Trudeau'