In [1]:
!pip install facenet-pytorch

Collecting facenet-pytorch
  Downloading facenet_pytorch-2.6.0-py3-none-any.whl.metadata (12 kB)
Collecting Pillow<10.3.0,>=10.2.0 (from facenet-pytorch)
  Downloading pillow-10.2.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting torch<2.3.0,>=2.2.0 (from facenet-pytorch)
  Downloading torch-2.2.2-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting torchvision<0.18.0,>=0.17.0 (from facenet-pytorch)
  Downloading torchvision-0.17.2-cp310-cp310-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Downloading nvidia_cuda_

In [2]:
import kagglehub

path = kagglehub.dataset_download("alfredhhw/adiencegender")
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/adiencegender


In [3]:
IMAGE_DIR = path + "/AdienceGender/aligned/"

In [4]:
import os
import re
import numpy as np
import pandas as pd
import torch
from PIL import Image
import cv2
from facenet_pytorch import MTCNN
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt
import torch.nn as nn
import torchvision.models as models
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
from sklearn.manifold import TSNE

In [5]:
adience_df = pd.read_csv("https://drive.google.com/uc?id=11a8GW8PXR5jYWQCb73-7IzOTYUR6abQc&export=download")

In [6]:
adience_df

Unnamed: 0,image_path,age
0,30601258@N03/landmark_aligned_face.1.103996468...,"(25, 32)"
1,30601258@N03/landmark_aligned_face.2.104248158...,"(25, 32)"
2,30601258@N03/landmark_aligned_face.1.104379798...,"(25, 32)"
3,30601258@N03/landmark_aligned_face.3.104379798...,"(25, 32)"
4,30601258@N03/landmark_aligned_face.2.118166449...,"(25, 32)"
...,...,...
13963,101515718@N03/landmark_aligned_face.2280.10587...,"(25, 32)"
13964,101515718@N03/landmark_aligned_face.2278.10587...,"(25, 32)"
13965,101515718@N03/landmark_aligned_face.2279.10587...,"(25, 32)"
13966,50458575@N08/landmark_aligned_face.2281.942669...,"(25, 32)"


In [7]:
adience_df['image_path'] = adience_df['image_path'].astype(str)
adience_df['age'] = adience_df['age'].apply(eval)

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [9]:
mtcnn = MTCNN(image_size=224, select_largest=True, keep_all=True, device=device)

In [10]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomGrayscale(p=0.1),
    transforms.RandomHorizontalFlip(),
    transforms.RandomPerspective(distortion_scale=0.2, p=0.3, interpolation=3, fill=0),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [11]:
adience_df = adience_df[adience_df['age'] != (8, 23)]

In [12]:
sorted(adience_df["age"].unique())

[(0, 2), (4, 6), (8, 12), (15, 20), (25, 32), (38, 48), (48, 53), (60, 100)]

In [13]:
class AdienceDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        self.age_range_to_index = {
            age_range : idx
            for idx, age_range in enumerate(sorted(self.dataframe["age"].unique()))
        }

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image_path = row["image_path"]
        label = self.age_range_to_index[row["age"]]
        label = torch.tensor(label, dtype=torch.long)

        image = Image.open(IMAGE_DIR + image_path)
        image = image.resize((224, 224))

        faces, _ = mtcnn.detect(image)

        if faces is None:
            image = Image.open(IMAGE_DIR + image_path)
            image = image.resize((224, 224))
        else:
            largest_face_box = faces[0]
            image = image.crop((largest_face_box[0], largest_face_box[1], largest_face_box[2], largest_face_box[3]))

        if self.transform:
            image = self.transform(image)
            
        return image, label

In [14]:
dataset = AdienceDataset(adience_df, transform=transform)

train_size = int(0.7 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

In [15]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [16]:
num_classes = 8

In [17]:
model = models.vgg16(pretrained=True)

num_ftrs = model.classifier[-1].in_features

model.classifier[-1] = nn.Linear(num_ftrs, num_classes)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 186MB/s]  


In [18]:
model = model.to(device)

In [19]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [28]:
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        
        outputs = model(images)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)  # Total loss across all samples
        _, predicted = torch.max(outputs,1)

        # print(outputs, labels, predicted)

        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    avg_train_loss = running_loss / len(train_loader.dataset)  # Average loss per sample
    train_accuracy = 100 * correct_train / total_train
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%")

print("Training complete.")

Epoch [1/5], Training Loss: 1.8933, Training Accuracy: 32.18%
Epoch [2/5], Training Loss: 1.8946, Training Accuracy: 32.18%
Epoch [3/5], Training Loss: 1.8946, Training Accuracy: 32.18%
Epoch [4/5], Training Loss: 1.8927, Training Accuracy: 32.18%
Epoch [5/5], Training Loss: 1.8938, Training Accuracy: 32.18%
Training complete.


In [30]:
model.eval()
correct_test = 0
total_test = 0

all_predictions = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)

        _, predicted = torch.max(outputs, 1)
        correct_test += (predicted == labels).sum().item()
        total_test += labels.size(0)

        all_predictions.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

test_accuracy = 100 * correct_test / total_test
print(f'Test Accuracy: {test_accuracy:.2f}%')
conf_matrix = confusion_matrix(all_labels, all_predictions)
precision, recall, f1_score, support = precision_recall_fscore_support(all_labels, all_predictions)
print("Confusion Matrix:\n", conf_matrix)
print("\nPrecision per class:", precision)
print("Recall per class:", recall)
print("F1-score per class:", f1_score)
print("Samples per class:", support)


Test Accuracy: 30.57%
Confusion Matrix:
 [[   0    0    0    0  661    0    0    0]
 [   0    0    0    0  450    0    0    0]
 [   0    0    0    0  462    0    0    0]
 [   0    0    0    0  347    0    0    0]
 [   0    0    0    0 1262    0    0    0]
 [   0    0    0    0  528    0    0    0]
 [   0    0    0    0  220    0    0    0]
 [   0    0    0    0  198    0    0    0]]

Precision per class: [0.         0.         0.         0.         0.30571705 0.
 0.         0.        ]
Recall per class: [0. 0. 0. 0. 1. 0. 0. 0.]
F1-score per class: [0.         0.         0.         0.         0.46827458 0.
 0.         0.        ]
Samples per class: [ 661  450  462  347 1262  528  220  198]


  _warn_prf(average, modifier, msg_start, len(result))
