In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader
from dataset import FaceDataset
from sklearn.metrics import classification_report
import time
import os
# os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [2]:
# Paths
train_path = "../data/train"
test_path = "../data/test"

# Transformations
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),  # Randomly flip images horizontally
    transforms.RandomRotation(degrees=15),   # Rotate images by up to 15 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Color adjustments
    transforms.RandomResizedCrop(size=(128, 128), scale=(0.8, 1.0)),  # Random cropping and resizing
    transforms.ToTensor(),  # Convert image to tensor
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize the image
])

test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Datasets and Dataloaders
train_dataset = FaceDataset(train_path, transform=train_transform, include_class=[1,2], transform_class={1:0, 2:1})
test_dataset = FaceDataset(test_path, transform=test_transform, include_class=[1,2], transform_class={1:0, 2:1})

batch_size = 32 * 3
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Pretrained model
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
num_classes = 2
model.fc = nn.Linear(model.fc.in_features, num_classes)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Loss function
cross_criterion = nn.CrossEntropyLoss()

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.00001, weight_decay=1e-4)




In [3]:
def train_and_evaluate_model(model, train_loader, test_loader, criterion, optimizer, device, num_epochs=100):
    best_accuracy = 0
    for epoch in range(num_epochs):
        start_time = time.time()
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            # print(outputs, labels)

            # Compute loss
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_accuracy = 100 * correct_train / total_train

        # Evaluate the model
        model.eval()
        all_labels = []
        all_predictions = []
        correct = 0
        total = 0
        val_running_loss = 0.0

        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)

                # Forward pass
                outputs = model(images)
                loss = criterion(outputs, labels)

                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                val_running_loss += loss.item()
                all_labels.extend(labels.cpu().numpy())
                all_predictions.extend(predicted.cpu().numpy())

        accuracy = 100 * correct / total

        if best_accuracy < accuracy:
            torch.save(model.state_dict(), "models/best_two_class.pt")
            best_accuracy = accuracy

        report = classification_report(all_labels, all_predictions,
                                       target_names=[f"Class {i}" for i in range(num_classes)], zero_division=0)
        epoch_time = time.time() - start_time
        print(f"Epoch [{epoch + 1}/{num_epochs}], Training Loss: {running_loss / len(train_loader):.4f}, "
              f"Training Accuracy: {train_accuracy:.2f}%, Val Loss: {val_running_loss / len(test_loader):.4f}, "
              f"Test Accuracy: {accuracy:.2f}%, Time: {epoch_time:.2f} seconds")
        print(f"Test Classification Report:\n{report}")



In [5]:
train_and_evaluate_model(model, train_loader, test_loader, cross_criterion, optimizer, device, num_epochs=100)

Epoch [1/100], Training Loss: 0.6226, Training Accuracy: 65.38%, Val Loss: 0.6073, Test Accuracy: 68.03%, Time: 45.88 seconds
Test Classification Report:
              precision    recall  f1-score   support

     Class 0       0.68      0.68      0.68       123
     Class 1       0.68      0.68      0.68       121

    accuracy                           0.68       244
   macro avg       0.68      0.68      0.68       244
weighted avg       0.68      0.68      0.68       244



KeyboardInterrupt: 