In [3]:
import os
import torch
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms, models, datasets
import torch.optim as optim
import torch.nn as nn
from sklearn.model_selection import KFold
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, precision_recall_fscore_support)
from transformers import ViTForImageClassification, ViTFeatureExtractor
import optuna

In [5]:
dataset_dir = '/kaggle/input/deepfake/DeepFake'

In [6]:
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),  
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(p=0.2),
    transforms.RandomRotation(15),
    transforms.RandomCrop(224, padding=10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=20, scale=(0.8, 1.2), shear=10),
    transforms.RandomErasing(p=0.3),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  
])

transform_val_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


In [7]:
# Load the dataset
from torchvision.datasets import ImageFolder
dataset = ImageFolder(root=dataset_dir, transform=transform_train)
print("Classes:", dataset.classes)
print("Class-to-Index Mapping:", dataset.class_to_idx)
print("Number of Samples:", len(dataset))

Classes: ['Fake', 'Real']
Class-to-Index Mapping: {'Fake': 0, 'Real': 1}
Number of Samples: 10826


In [8]:
def get_model(model_name):
    if model_name == "mobilenet-v2":
        # Load pre-trained MobileNet-v2 model
        model = models.mobilenet_v2(pretrained=True)
        
        # Freeze all layers initially
        for param in model.parameters():
            param.requires_grad = False
        
        # Update the classifier for binary classification
        model.classifier[1] = nn.Linear(model.last_channel, 2)
        
        return model

In [9]:
# Calculate metrics function
def calculate_metrics(model, loader, device):
    
    # Set the model to evaluation mode (disables dropout)
    model.eval()

    # Lists to store true labels and predicted labels
    all_labels = []
    all_predictions = []

    # Disabling gradient computation
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
             # Get predicted labels by taking the argmax (most likely class)
            _, predicted = torch.max(outputs, 1)
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

     # Calculate the confusion matrix,which give TN, FP, FN, and TP
    conf_matrix = confusion_matrix(all_labels, all_predictions)
    # Unpack the confusion matrix into four components: TN, FP, FN, TP
    TN, FP, FN, TP = conf_matrix.ravel() 

    total = conf_matrix.sum()
    accuracy = (TP + TN) / total if total > 0 else 0.0
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0.0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0.0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
    
    return accuracy, precision, recall, f1, conf_matrix


In [10]:
# Train the model function with validation accuracy printed after each epoch
def train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=5):
    # Variable to track the best validation accuracy
    best_val_accuracy = 0
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
         # Iterate over batches in the training data
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss / len(train_loader)}")
        # Validation phase
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_accuracy = 100 * correct / total
        print(f"Epoch {epoch+1}/{epochs}, Validation Accuracy: {val_accuracy:.2f}%")
        
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
    
    return best_val_accuracy

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Cross-validation setup
num_folds = 3
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)


In [11]:
def objective(trial, model_name):
    # Get a suggested learning rate from Optuna
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
    
    # Initialize the model with dropout
    model = get_model(model_name).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    val_accuracies = []
    for fold_idx, (train_val_idx, test_idx) in enumerate(kf.split(dataset)):
        print(f"Fold {fold_idx + 1}/{num_folds}")
        
        # Create training/validation split
        train_val_data = Subset(dataset, train_val_idx)
        test_data = Subset(dataset, test_idx)
        
        train_size = int(0.8 * len(train_val_data))
        val_size = len(train_val_data) - train_size
        train_data, val_data = torch.utils.data.random_split(
            train_val_data, [train_size, val_size], generator=torch.Generator().manual_seed(42)
        )
        train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
        
        # Train the model and get validation accuracy
        train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=5)
        
        # Evaluate on validation set
        val_accuracy, _, _, _, _ = calculate_metrics(model, val_loader, device)
        val_accuracies.append(val_accuracy)
    
    # Return the average validation accuracy across all folds as the objective value
    return np.mean(val_accuracies)


In [12]:
def evaluate_test_set(model_name, best_lr):
    # Initialize model with the best learning rate
    model = get_model(model_name).to(device)
    optimizer = optim.Adam(model.parameters(), lr=best_lr)
    criterion = nn.CrossEntropyLoss()

    fold_metrics = []
    for fold_idx, (train_val_idx, test_idx) in enumerate(kf.split(dataset)):
        print(f"\nEvaluating on Fold {fold_idx + 1}/{num_folds}")
        
        # Create training/validation split
        train_val_data = Subset(dataset, train_val_idx)
        test_data = Subset(dataset, test_idx)
        
        train_size = int(0.8 * len(train_val_data))
        val_size = len(train_val_data) - train_size
        train_data, val_data = torch.utils.data.random_split(
            train_val_data, [train_size, val_size], generator=torch.Generator().manual_seed(42)
        )
        train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
        
        # Train the model
        train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=5)
        
        # Evaluate on the test set
        test_loader = DataLoader(test_data, batch_size=32, shuffle=False)
        fold_metrics.append(calculate_metrics(model, test_loader, device))
    
    # Print metrics for each fold
    for fold_idx, metrics in enumerate(fold_metrics):
        accuracy, precision, recall, f1, conf_matrix = metrics
        print(f"Fold {fold_idx + 1} Metrics:")
        print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1-Score: {f1:.2f}")
        print(f"Confusion Matrix:\n{conf_matrix}")

# Calculate average metrics across folds
    avg_accuracy = np.mean([metrics[0] for metrics in fold_metrics])
    avg_precision = np.mean([metrics[1] for metrics in fold_metrics])
    avg_recall = np.mean([metrics[2] for metrics in fold_metrics])
    avg_f1 = np.mean([metrics[3] for metrics in fold_metrics])
    total_conf_matrix = np.sum([metrics[4] for metrics in fold_metrics], axis=0)

    print("\nAverage Metrics Across Folds:")
    print(f"Accuracy: {avg_accuracy:.2f}, Precision: {avg_precision:.2f}, Recall: {avg_recall:.2f}, F1-Score: {avg_f1:.2f}")
    print(f"Confusion Matrix (sum of all folds):\n{total_conf_matrix}")


# Optuna Optimization and Final Testing
for model_name in ["mobilenet-v2"]:
    print(f"\nOptimizing for {model_name.upper()}...")
    study = optuna.create_study(direction='maximize')
    study.optimize(lambda trial: objective(trial, model_name), n_trials=5)  # You can increase the number of trials if needed

    # Best learning rate found for the model
    best_lr = study.best_params['lr']
    print(f"Best Learning Rate for {model_name.upper()}: {best_lr}")

    # Evaluate on test sets for each fold
    evaluate_test_set(model_name, best_lr)
    


[I 2024-12-25 20:44:59,565] A new study created in memory with name: no-name-ca8333eb-a895-4121-bf86-3b81196e7293



Optimizing for MOBILENET-V2...


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 111MB/s] 


Fold 1/3
Epoch 1/5, Loss: 0.7147341576399724
Epoch 1/5, Validation Accuracy: 64.96%
Epoch 2/5, Loss: 0.7273419736498627
Epoch 2/5, Validation Accuracy: 66.48%
Epoch 3/5, Loss: 0.6947253484752297
Epoch 3/5, Validation Accuracy: 67.59%
Epoch 4/5, Loss: 0.7314096708982689
Epoch 4/5, Validation Accuracy: 64.68%
Epoch 5/5, Loss: 0.7058119304601659
Epoch 5/5, Validation Accuracy: 70.78%
Fold 2/3
Epoch 1/5, Loss: 0.7092787017479786
Epoch 1/5, Validation Accuracy: 65.44%
Epoch 2/5, Loss: 0.6859393575902801
Epoch 2/5, Validation Accuracy: 63.71%
Epoch 3/5, Loss: 0.7197405669570628
Epoch 3/5, Validation Accuracy: 68.14%
Epoch 4/5, Loss: 0.6931390834776736
Epoch 4/5, Validation Accuracy: 68.14%
Epoch 5/5, Loss: 0.6936759021730055
Epoch 5/5, Validation Accuracy: 69.11%
Fold 3/3
Epoch 1/5, Loss: 0.7226736082885805
Epoch 1/5, Validation Accuracy: 69.74%
Epoch 2/5, Loss: 0.7062867117520853
Epoch 2/5, Validation Accuracy: 67.24%
Epoch 3/5, Loss: 0.7055264796999937
Epoch 3/5, Validation Accuracy: 69.25

[I 2024-12-25 21:10:11,397] Trial 0 finished with value: 0.6696675900277009 and parameters: {'lr': 0.004443927960100261}. Best is trial 0 with value: 0.6696675900277009.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Fold 1/3
Epoch 1/5, Loss: 0.818146201787074
Epoch 1/5, Validation Accuracy: 65.58%
Epoch 2/5, Loss: 0.7048153915128655
Epoch 2/5, Validation Accuracy: 63.43%
Epoch 3/5, Loss: 0.7258577221664935
Epoch 3/5, Validation Accuracy: 69.25%
Epoch 4/5, Loss: 0.7490042462862657
Epoch 4/5, Validation Accuracy: 65.44%
Epoch 5/5, Loss: 0.8220236660367217
Epoch 5/5, Validation Accuracy: 65.17%
Fold 2/3
Epoch 1/5, Loss: 0.7808067306299895
Epoch 1/5, Validation Accuracy: 67.04%
Epoch 2/5, Loss: 0.7537847120129602
Epoch 2/5, Validation Accuracy: 58.86%
Epoch 3/5, Loss: 0.7896632203081037
Epoch 3/5, Validation Accuracy: 70.50%
Epoch 4/5, Loss: 0.7315936771874928
Epoch 4/5, Validation Accuracy: 63.92%
Epoch 5/5, Loss: 0.7528217691742913
Epoch 5/5, Validation Accuracy: 68.98%
Fold 3/3
Epoch 1/5, Loss: 0.7746059664046567
Epoch 1/5, Validation Accuracy: 69.39%
Epoch 2/5, Loss: 0.7469685533758026
Epoch 2/5, Validation Accuracy: 69.53%
Epoch 3/5, Loss: 0.7855175985486468
Epoch 3/5, Validation Accuracy: 69.32%

[I 2024-12-25 21:35:14,182] Trial 1 finished with value: 0.6491228070175439 and parameters: {'lr': 0.006235132000782023}. Best is trial 0 with value: 0.6696675900277009.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Fold 1/3
Epoch 1/5, Loss: 0.7216680925854003
Epoch 1/5, Validation Accuracy: 53.19%
Epoch 2/5, Loss: 0.7022787181053373
Epoch 2/5, Validation Accuracy: 53.05%
Epoch 3/5, Loss: 0.6875540077357002
Epoch 3/5, Validation Accuracy: 58.17%
Epoch 4/5, Loss: 0.6789502254507159
Epoch 4/5, Validation Accuracy: 62.12%
Epoch 5/5, Loss: 0.6663403718511044
Epoch 5/5, Validation Accuracy: 61.84%
Fold 2/3
Epoch 1/5, Loss: 0.6644435594095051
Epoch 1/5, Validation Accuracy: 63.64%
Epoch 2/5, Loss: 0.6595460792272789
Epoch 2/5, Validation Accuracy: 64.06%
Epoch 3/5, Loss: 0.6540260163459989
Epoch 3/5, Validation Accuracy: 64.47%
Epoch 4/5, Loss: 0.645662412129713
Epoch 4/5, Validation Accuracy: 64.34%
Epoch 5/5, Loss: 0.6436064426082274
Epoch 5/5, Validation Accuracy: 65.65%
Fold 3/3
Epoch 1/5, Loss: 0.634175832758951
Epoch 1/5, Validation Accuracy: 65.58%
Epoch 2/5, Loss: 0.6336883276865627
Epoch 2/5, Validation Accuracy: 67.94%
Epoch 3/5, Loss: 0.6305777529326592
Epoch 3/5, Validation Accuracy: 67.87%


[I 2024-12-25 22:00:00,367] Trial 2 finished with value: 0.6470452446906739 and parameters: {'lr': 1.9492216631053976e-05}. Best is trial 0 with value: 0.6696675900277009.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Fold 1/3
Epoch 1/5, Loss: 1.0244979789243878
Epoch 1/5, Validation Accuracy: 60.73%
Epoch 2/5, Loss: 0.8246411175688327
Epoch 2/5, Validation Accuracy: 68.98%
Epoch 3/5, Loss: 1.0155025101498345
Epoch 3/5, Validation Accuracy: 63.50%
Epoch 4/5, Loss: 0.9024274360738407
Epoch 4/5, Validation Accuracy: 65.51%
Epoch 5/5, Loss: 0.9915180321556428
Epoch 5/5, Validation Accuracy: 65.51%
Fold 2/3
Epoch 1/5, Loss: 0.9863850334401947
Epoch 1/5, Validation Accuracy: 63.99%
Epoch 2/5, Loss: 1.0436607656887222
Epoch 2/5, Validation Accuracy: 53.32%
Epoch 3/5, Loss: 1.0590817129743693
Epoch 3/5, Validation Accuracy: 65.93%
Epoch 4/5, Loss: 0.9518634426659642
Epoch 4/5, Validation Accuracy: 65.79%
Epoch 5/5, Loss: 1.0224360311558234
Epoch 5/5, Validation Accuracy: 66.83%
Fold 3/3
Epoch 1/5, Loss: 1.0510689399683673
Epoch 1/5, Validation Accuracy: 62.33%
Epoch 2/5, Loss: 1.0053163170814514
Epoch 2/5, Validation Accuracy: 63.85%
Epoch 3/5, Loss: 1.125035048683704
Epoch 3/5, Validation Accuracy: 69.88%

[I 2024-12-25 22:24:53,306] Trial 3 finished with value: 0.6708217913204063 and parameters: {'lr': 0.012079301406861595}. Best is trial 3 with value: 0.6708217913204063.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Fold 1/3
Epoch 1/5, Loss: 0.668746625191599
Epoch 1/5, Validation Accuracy: 64.82%
Epoch 2/5, Loss: 0.6167525973438558
Epoch 2/5, Validation Accuracy: 66.14%
Epoch 3/5, Loss: 0.6007698398927299
Epoch 3/5, Validation Accuracy: 68.07%
Epoch 4/5, Loss: 0.605215409842644
Epoch 4/5, Validation Accuracy: 68.35%
Epoch 5/5, Loss: 0.5938662056764845
Epoch 5/5, Validation Accuracy: 69.18%
Fold 2/3
Epoch 1/5, Loss: 0.5939260708034367
Epoch 1/5, Validation Accuracy: 70.29%
Epoch 2/5, Loss: 0.5823503829826966
Epoch 2/5, Validation Accuracy: 68.84%
Epoch 3/5, Loss: 0.5856700616976174
Epoch 3/5, Validation Accuracy: 69.53%
Epoch 4/5, Loss: 0.5839731368900004
Epoch 4/5, Validation Accuracy: 70.71%
Epoch 5/5, Loss: 0.5769858898708174
Epoch 5/5, Validation Accuracy: 71.47%
Fold 3/3
Epoch 1/5, Loss: 0.5805896779120956
Epoch 1/5, Validation Accuracy: 70.36%
Epoch 2/5, Loss: 0.5773774658777437
Epoch 2/5, Validation Accuracy: 70.01%
Epoch 3/5, Loss: 0.5832174372607173
Epoch 3/5, Validation Accuracy: 70.43%


[I 2024-12-25 22:49:44,709] Trial 4 finished with value: 0.6975992613111727 and parameters: {'lr': 0.00029867476169288365}. Best is trial 4 with value: 0.6975992613111727.


Best Learning Rate for MOBILENET-V2: 0.00029867476169288365

Evaluating on Fold 1/3
Epoch 1/5, Loss: 0.6639582060318625
Epoch 1/5, Validation Accuracy: 68.63%
Epoch 2/5, Loss: 0.6105755337035459
Epoch 2/5, Validation Accuracy: 69.32%
Epoch 3/5, Loss: 0.5993891122920737
Epoch 3/5, Validation Accuracy: 67.59%
Epoch 4/5, Loss: 0.5986514473488318
Epoch 4/5, Validation Accuracy: 69.11%
Epoch 5/5, Loss: 0.5897427514113115
Epoch 5/5, Validation Accuracy: 68.91%

Evaluating on Fold 2/3
Epoch 1/5, Loss: 0.595393615039014
Epoch 1/5, Validation Accuracy: 69.60%
Epoch 2/5, Loss: 0.6014858747714132
Epoch 2/5, Validation Accuracy: 71.12%
Epoch 3/5, Loss: 0.59179246573817
Epoch 3/5, Validation Accuracy: 69.94%
Epoch 4/5, Loss: 0.5812959333480392
Epoch 4/5, Validation Accuracy: 73.27%
Epoch 5/5, Loss: 0.5862871841501794
Epoch 5/5, Validation Accuracy: 69.53%

Evaluating on Fold 3/3
Epoch 1/5, Loss: 0.5821776477342152
Epoch 1/5, Validation Accuracy: 72.30%
Epoch 2/5, Loss: 0.5822707329007143
Epoch 2/5,

In [13]:
import torch.nn as nn
import torchvision.models as models

def initialize_model(name):
    if name == "mobilenet-v2":
        model = models.mobilenet_v2(pretrained=True)
        
        # Freeze all layers initially
        for param in model.parameters():
            param.requires_grad = False
        
        # Unfreeze the last convolutional block (features[16]) in MobileNet
        for param in model.features[16].parameters():
            param.requires_grad = True
        
        # Unfreeze and modify the classifier for binary classification
        model.classifier[1] = nn.Linear(model.last_channel, 2)
    
    else:
        raise ValueError("Model name must be 'mobilenet-v2'")
    
    # Print trainable parameters
    def count_trainable_params(model):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    print(f"Total trainable parameters: {count_trainable_params(model):,}")
    
    return model


In [14]:
# for fine tunning
def objective(trial, model_name):
    # Get a suggested learning rate from Optuna
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
    
    # Initialize the model with dropout
    model = initialize_model(model_name).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    val_accuracies = []
    for fold_idx, (train_val_idx, test_idx) in enumerate(kf.split(dataset)):
        print(f"Fold {fold_idx + 1}/{num_folds}")
        
        # Create training/validation split
        train_val_data = Subset(dataset, train_val_idx)
        test_data = Subset(dataset, test_idx)
        
        train_size = int(0.8 * len(train_val_data))
        val_size = len(train_val_data) - train_size
        train_data, val_data = torch.utils.data.random_split(
            train_val_data, [train_size, val_size], generator=torch.Generator().manual_seed(42)
        )
        train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
        
        # Train the model and get validation accuracy
        train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=5)
        
        # Evaluate on validation set
        val_accuracy, _, _, _, _ = calculate_metrics(model, val_loader, device)
        val_accuracies.append(val_accuracy)
    
    # Return the average validation accuracy across all folds as the objective value
    return np.mean(val_accuracies)


In [16]:
def evaluate_test_set(model_name, best_lr):
    # Initialize model with the best learning rate
    model = initialize_model(model_name).to(device)
    optimizer = optim.Adam(model.parameters(), lr=best_lr)
    criterion = nn.CrossEntropyLoss()

    fold_metrics = []
    for fold_idx, (train_val_idx, test_idx) in enumerate(kf.split(dataset)):
        print(f"\nEvaluating on Fold {fold_idx + 1}/{num_folds}")
        
        # Create training/validation split
        train_val_data = Subset(dataset, train_val_idx)
        test_data = Subset(dataset, test_idx)
        
        train_size = int(0.8 * len(train_val_data))
        val_size = len(train_val_data) - train_size
        train_data, val_data = torch.utils.data.random_split(
            train_val_data, [train_size, val_size], generator=torch.Generator().manual_seed(42)
        )
        train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
        
        # Train the model
        train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=5)
        
        # Evaluate on the test set
        test_loader = DataLoader(test_data, batch_size=32, shuffle=False)
        fold_metrics.append(calculate_metrics(model, test_loader, device))
    
    # Print metrics for each fold
    for fold_idx, metrics in enumerate(fold_metrics):
        accuracy, precision, recall, f1, conf_matrix = metrics
        print(f"Fold {fold_idx + 1} Metrics:")
        print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1-Score: {f1:.2f}")
        print(f"Confusion Matrix:\n{conf_matrix}")

# Calculate average metrics across folds
    avg_accuracy = np.mean([metrics[0] for metrics in fold_metrics])
    avg_precision = np.mean([metrics[1] for metrics in fold_metrics])
    avg_recall = np.mean([metrics[2] for metrics in fold_metrics])
    avg_f1 = np.mean([metrics[3] for metrics in fold_metrics])
    total_conf_matrix = np.sum([metrics[4] for metrics in fold_metrics], axis=0)

    print("\nAverage Metrics Across Folds:")
    print(f"Accuracy: {avg_accuracy:.2f}, Precision: {avg_precision:.2f}, Recall: {avg_recall:.2f}, F1-Score: {avg_f1:.2f}")
    print(f"Confusion Matrix (sum of all folds):\n{total_conf_matrix}")


# Optuna Optimization and Final Testing
for model_name in ["mobilenet-v2"]:
    print(f"\nOptimizing for {model_name.upper()}...")
    study = optuna.create_study(direction='maximize')
    study.optimize(lambda trial: objective(trial, model_name), n_trials=5)  # You can increase the number of trials if needed

    # Best learning rate found for the model
    best_lr = study.best_params['lr']
    print(f"Best Learning Rate for {model_name.upper()}: {best_lr}")

    # Evaluate on test sets for each fold
    evaluate_test_set(model_name, best_lr)
    


[I 2024-12-25 23:24:30,806] A new study created in memory with name: no-name-76ae84c6-f21c-4d78-b20d-b966d49dc86b



Optimizing for MOBILENET-V2...
Total trainable parameters: 322,562
Fold 1/3


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Epoch 1/5, Loss: 0.6995144532530347
Epoch 1/5, Validation Accuracy: 59.83%
Epoch 2/5, Loss: 0.6582492543847521
Epoch 2/5, Validation Accuracy: 63.92%
Epoch 3/5, Loss: 0.6402103229122267
Epoch 3/5, Validation Accuracy: 67.66%
Epoch 4/5, Loss: 0.6177715537297791
Epoch 4/5, Validation Accuracy: 68.21%
Epoch 5/5, Loss: 0.5995829452796536
Epoch 5/5, Validation Accuracy: 69.88%
Fold 2/3
Epoch 1/5, Loss: 0.5932482262013367
Epoch 1/5, Validation Accuracy: 70.57%
Epoch 2/5, Loss: 0.5760339319047348
Epoch 2/5, Validation Accuracy: 71.33%
Epoch 3/5, Loss: 0.5687125142437318
Epoch 3/5, Validation Accuracy: 73.13%
Epoch 4/5, Loss: 0.5528306162489053
Epoch 4/5, Validation Accuracy: 71.95%
Epoch 5/5, Loss: 0.5497544327164223
Epoch 5/5, Validation Accuracy: 72.37%
Fold 3/3
Epoch 1/5, Loss: 0.5400752843414223
Epoch 1/5, Validation Accuracy: 73.06%
Epoch 2/5, Loss: 0.5350997797033404
Epoch 2/5, Validation Accuracy: 73.20%
Epoch 3/5, Loss: 0.5283351934087869
Epoch 3/5, Validation Accuracy: 74.10%
Epoch 4

[I 2024-12-25 23:49:49,493] Trial 0 finished with value: 0.7301477377654663 and parameters: {'lr': 1.3036571335702671e-05}. Best is trial 0 with value: 0.7301477377654663.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Total trainable parameters: 322,562
Fold 1/3
Epoch 1/5, Loss: 0.5688418500660533
Epoch 1/5, Validation Accuracy: 73.13%
Epoch 2/5, Loss: 0.5004152785677937
Epoch 2/5, Validation Accuracy: 74.31%
Epoch 3/5, Loss: 0.4640498283159667
Epoch 3/5, Validation Accuracy: 78.74%
Epoch 4/5, Loss: 0.4516609766371342
Epoch 4/5, Validation Accuracy: 77.42%
Epoch 5/5, Loss: 0.43846709943937334
Epoch 5/5, Validation Accuracy: 78.05%
Fold 2/3
Epoch 1/5, Loss: 0.4579309697638559
Epoch 1/5, Validation Accuracy: 77.56%
Epoch 2/5, Loss: 0.4321066669666965
Epoch 2/5, Validation Accuracy: 80.61%
Epoch 3/5, Loss: 0.4139901256363695
Epoch 3/5, Validation Accuracy: 81.79%
Epoch 4/5, Loss: 0.4119853753575963
Epoch 4/5, Validation Accuracy: 80.47%
Epoch 5/5, Loss: 0.3951178384913924
Epoch 5/5, Validation Accuracy: 80.40%
Fold 3/3
Epoch 1/5, Loss: 0.40735956063257395
Epoch 1/5, Validation Accuracy: 82.69%
Epoch 2/5, Loss: 0.39802654071078114
Epoch 2/5, Validation Accuracy: 82.76%
Epoch 3/5, Loss: 0.393764010450458

[I 2024-12-26 00:14:49,996] Trial 1 finished with value: 0.8000923361034165 and parameters: {'lr': 0.0005358852134223212}. Best is trial 1 with value: 0.8000923361034165.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Total trainable parameters: 322,562
Fold 1/3
Epoch 1/5, Loss: 1.295462268986096
Epoch 1/5, Validation Accuracy: 70.29%
Epoch 2/5, Loss: 0.8565546547510348
Epoch 2/5, Validation Accuracy: 70.78%
Epoch 3/5, Loss: 0.7718939978773423
Epoch 3/5, Validation Accuracy: 69.53%
Epoch 4/5, Loss: 0.7612238066960435
Epoch 4/5, Validation Accuracy: 64.20%
Epoch 5/5, Loss: 0.7500767628790924
Epoch 5/5, Validation Accuracy: 67.66%
Fold 2/3
Epoch 1/5, Loss: 0.7680169090381643
Epoch 1/5, Validation Accuracy: 54.16%
Epoch 2/5, Loss: 0.8457812943511246
Epoch 2/5, Validation Accuracy: 73.06%
Epoch 3/5, Loss: 0.8119724602824416
Epoch 3/5, Validation Accuracy: 65.24%
Epoch 4/5, Loss: 0.8513665247029363
Epoch 4/5, Validation Accuracy: 69.88%
Epoch 5/5, Loss: 0.8791207521330586
Epoch 5/5, Validation Accuracy: 69.04%
Fold 3/3
Epoch 1/5, Loss: 0.939784914419796
Epoch 1/5, Validation Accuracy: 65.44%
Epoch 2/5, Loss: 1.0011298228363965
Epoch 2/5, Validation Accuracy: 69.81%
Epoch 3/5, Loss: 0.9520354975652958
Epo

[I 2024-12-26 00:40:13,980] Trial 2 finished with value: 0.6710526315789475 and parameters: {'lr': 0.03533549148188352}. Best is trial 1 with value: 0.8000923361034165.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Total trainable parameters: 322,562
Fold 1/3
Epoch 1/5, Loss: 1.4852922669925743
Epoch 1/5, Validation Accuracy: 68.21%
Epoch 2/5, Loss: 1.1162220396060312
Epoch 2/5, Validation Accuracy: 72.58%
Epoch 3/5, Loss: 0.765747710650797
Epoch 3/5, Validation Accuracy: 65.65%
Epoch 4/5, Loss: 0.7320874786179369
Epoch 4/5, Validation Accuracy: 72.09%
Epoch 5/5, Loss: 0.769423152860357
Epoch 5/5, Validation Accuracy: 53.81%
Fold 2/3
Epoch 1/5, Loss: 0.8847812250338866
Epoch 1/5, Validation Accuracy: 66.97%
Epoch 2/5, Loss: 0.9314551176451846
Epoch 2/5, Validation Accuracy: 72.37%
Epoch 3/5, Loss: 0.9598060039525532
Epoch 3/5, Validation Accuracy: 51.25%
Epoch 4/5, Loss: 0.9930416554706532
Epoch 4/5, Validation Accuracy: 55.12%
Epoch 5/5, Loss: 0.9983612378822506
Epoch 5/5, Validation Accuracy: 59.28%
Fold 3/3
Epoch 1/5, Loss: 1.149370577042274
Epoch 1/5, Validation Accuracy: 54.43%
Epoch 2/5, Loss: 1.0799398407422376
Epoch 2/5, Validation Accuracy: 48.55%
Epoch 3/5, Loss: 1.0893226037038624
Epoc

[I 2024-12-26 01:05:03,020] Trial 3 finished with value: 0.5967220683287165 and parameters: {'lr': 0.03804465934988528}. Best is trial 1 with value: 0.8000923361034165.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Total trainable parameters: 322,562
Fold 1/3
Epoch 1/5, Loss: 1.8221591552976746
Epoch 1/5, Validation Accuracy: 70.91%
Epoch 2/5, Loss: 1.057603949341326
Epoch 2/5, Validation Accuracy: 61.08%
Epoch 3/5, Loss: 0.9654474584437207
Epoch 3/5, Validation Accuracy: 67.87%
Epoch 4/5, Loss: 0.8432630025550146
Epoch 4/5, Validation Accuracy: 71.61%
Epoch 5/5, Loss: 0.8959910195341426
Epoch 5/5, Validation Accuracy: 64.89%
Fold 2/3
Epoch 1/5, Loss: 0.9700871434330282
Epoch 1/5, Validation Accuracy: 53.88%
Epoch 2/5, Loss: 0.9702557213398633
Epoch 2/5, Validation Accuracy: 70.08%
Epoch 3/5, Loss: 0.8536625838740755
Epoch 3/5, Validation Accuracy: 68.49%
Epoch 4/5, Loss: 0.9688798864243439
Epoch 4/5, Validation Accuracy: 68.28%
Epoch 5/5, Loss: 1.0540350229371318
Epoch 5/5, Validation Accuracy: 50.62%
Fold 3/3
Epoch 1/5, Loss: 0.9882221877245613
Epoch 1/5, Validation Accuracy: 55.12%
Epoch 2/5, Loss: 0.988232741533722
Epoch 2/5, Validation Accuracy: 66.83%
Epoch 3/5, Loss: 0.9855969133298041
Epo

[I 2024-12-26 01:30:19,279] Trial 4 finished with value: 0.6048014773776546 and parameters: {'lr': 0.04536774544628303}. Best is trial 1 with value: 0.8000923361034165.


Best Learning Rate for MOBILENET-V2: 0.0005358852134223212
Total trainable parameters: 322,562

Evaluating on Fold 1/3
Epoch 1/5, Loss: 0.5749680057414988
Epoch 1/5, Validation Accuracy: 74.65%
Epoch 2/5, Loss: 0.5109964225502962
Epoch 2/5, Validation Accuracy: 74.03%
Epoch 3/5, Loss: 0.4681811415852763
Epoch 3/5, Validation Accuracy: 77.15%
Epoch 4/5, Loss: 0.44557433423087084
Epoch 4/5, Validation Accuracy: 77.08%
Epoch 5/5, Loss: 0.4444709707524895
Epoch 5/5, Validation Accuracy: 78.39%

Evaluating on Fold 2/3
Epoch 1/5, Loss: 0.4317691458687598
Epoch 1/5, Validation Accuracy: 78.32%
Epoch 2/5, Loss: 0.42117531539985487
Epoch 2/5, Validation Accuracy: 80.06%
Epoch 3/5, Loss: 0.40551101947357643
Epoch 3/5, Validation Accuracy: 79.09%
Epoch 4/5, Loss: 0.4172816409919802
Epoch 4/5, Validation Accuracy: 76.18%
Epoch 5/5, Loss: 0.39732762660769466
Epoch 5/5, Validation Accuracy: 82.41%

Evaluating on Fold 3/3
Epoch 1/5, Loss: 0.4047333216963552
Epoch 1/5, Validation Accuracy: 81.65%
Epoc