In [2]:
import os
import torch
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms, models, datasets
import torch.optim as optim
import torch.nn as nn
from sklearn.model_selection import KFold
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, precision_recall_fscore_support)
from transformers import ViTForImageClassification, ViTFeatureExtractor
import optuna

In [8]:
dataset_dir = '/kaggle/input/deepfake/DeepFake'

In [9]:
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),  
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(p=0.2),
    transforms.RandomRotation(15),
    transforms.RandomCrop(224, padding=10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=20, scale=(0.8, 1.2), shear=10),
    transforms.RandomErasing(p=0.3),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  
])

transform_val_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])


In [10]:
# Load the dataset
from torchvision.datasets import ImageFolder
dataset = ImageFolder(root=dataset_dir, transform=transform_train)
print("Classes:", dataset.classes)
print("Class-to-Index Mapping:", dataset.class_to_idx)
print("Number of Samples:", len(dataset))

Classes: ['Fake', 'Real']
Class-to-Index Mapping: {'Fake': 0, 'Real': 1}
Number of Samples: 10826


In [11]:
def get_model(model_name):
    if model_name == "resnet-50":
        # Load pre-trained ResNet-50 model
        model = models.resnet50(pretrained=True)
        
        # Freeze all layers initially
        for param in model.parameters():
            param.requires_grad = False
        
        # Update the fully connected layer for binary classification
        model.fc = nn.Linear(model.fc.in_features, 2)
        
        return model

In [12]:
# Calculate metrics function
def calculate_metrics(model, loader, device):
    
    # Set the model to evaluation mode (disables dropout)
    model.eval()

    # Lists to store true labels and predicted labels
    all_labels = []
    all_predictions = []

    # Disabling gradient computation
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
             # Get predicted labels by taking the argmax (most likely class)
            _, predicted = torch.max(outputs, 1)
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

     # Calculate the confusion matrix,which give TN, FP, FN, and TP
    conf_matrix = confusion_matrix(all_labels, all_predictions)
    # Unpack the confusion matrix into four components: TN, FP, FN, TP
    TN, FP, FN, TP = conf_matrix.ravel() 

    total = conf_matrix.sum()
    accuracy = (TP + TN) / total if total > 0 else 0.0
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0.0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0.0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
    
    return accuracy, precision, recall, f1, conf_matrix


In [13]:
# Train the model function with validation accuracy printed after each epoch
def train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=5):
    # Variable to track the best validation accuracy
    best_val_accuracy = 0
    
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
         # Iterate over batches in the training data
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss / len(train_loader)}")
        # Validation phase
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_accuracy = 100 * correct / total
        print(f"Epoch {epoch+1}/{epochs}, Validation Accuracy: {val_accuracy:.2f}%")
        
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
    
    return best_val_accuracy

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Cross-validation setup
num_folds = 3
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)


In [14]:
def objective(trial, model_name):
    # Get a suggested learning rate from Optuna
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
    
    # Initialize the model with dropout
    model = get_model(model_name).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    val_accuracies = []
    for fold_idx, (train_val_idx, test_idx) in enumerate(kf.split(dataset)):
        print(f"Fold {fold_idx + 1}/{num_folds}")
        
        # Create training/validation split
        train_val_data = Subset(dataset, train_val_idx)
        test_data = Subset(dataset, test_idx)
        
        train_size = int(0.8 * len(train_val_data))
        val_size = len(train_val_data) - train_size
        train_data, val_data = torch.utils.data.random_split(
            train_val_data, [train_size, val_size], generator=torch.Generator().manual_seed(42)
        )
        train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
        
        # Train the model and get validation accuracy
        train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=5)
        
        # Evaluate on validation set
        val_accuracy, _, _, _, _ = calculate_metrics(model, val_loader, device)
        val_accuracies.append(val_accuracy)
    
    # Return the average validation accuracy across all folds as the objective value
    return np.mean(val_accuracies)


In [16]:
def evaluate_test_set(model_name, best_lr):
    # Initialize model with the best learning rate
    model = get_model(model_name).to(device)
    optimizer = optim.Adam(model.parameters(), lr=best_lr)
    criterion = nn.CrossEntropyLoss()

    fold_metrics = []
    for fold_idx, (train_val_idx, test_idx) in enumerate(kf.split(dataset)):
        print(f"\nEvaluating on Fold {fold_idx + 1}/{num_folds}")
        
        # Create training/validation split
        train_val_data = Subset(dataset, train_val_idx)
        test_data = Subset(dataset, test_idx)
        
        train_size = int(0.8 * len(train_val_data))
        val_size = len(train_val_data) - train_size
        train_data, val_data = torch.utils.data.random_split(
            train_val_data, [train_size, val_size], generator=torch.Generator().manual_seed(42)
        )
        train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
        
        # Train the model
        train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=5)
        
        # Evaluate on the test set
        test_loader = DataLoader(test_data, batch_size=32, shuffle=False)
        fold_metrics.append(calculate_metrics(model, test_loader, device))
    
    # Print metrics for each fold
    for fold_idx, metrics in enumerate(fold_metrics):
        accuracy, precision, recall, f1, conf_matrix = metrics
        print(f"Fold {fold_idx + 1} Metrics:")
        print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1-Score: {f1:.2f}")
        print(f"Confusion Matrix:\n{conf_matrix}")

# Calculate average metrics across folds
    avg_accuracy = np.mean([metrics[0] for metrics in fold_metrics])
    avg_precision = np.mean([metrics[1] for metrics in fold_metrics])
    avg_recall = np.mean([metrics[2] for metrics in fold_metrics])
    avg_f1 = np.mean([metrics[3] for metrics in fold_metrics])
    total_conf_matrix = np.sum([metrics[4] for metrics in fold_metrics], axis=0)

    print("\nAverage Metrics Across Folds:")
    print(f"Accuracy: {avg_accuracy:.2f}, Precision: {avg_precision:.2f}, Recall: {avg_recall:.2f}, F1-Score: {avg_f1:.2f}")
    print(f"Confusion Matrix (sum of all folds):\n{total_conf_matrix}")


# Optuna Optimization and Final Testing
for model_name in ["resnet-50"]:
    print(f"\nOptimizing for {model_name.upper()}...")
    study = optuna.create_study(direction='maximize')
    study.optimize(lambda trial: objective(trial, model_name), n_trials=5)  # You can increase the number of trials if needed

    # Best learning rate found for the model
    best_lr = study.best_params['lr']
    print(f"Best Learning Rate for {model_name.upper()}: {best_lr}")

    # Evaluate on test sets for each fold
    evaluate_test_set(model_name, best_lr)
    


[I 2024-12-25 13:59:42,048] A new study created in memory with name: no-name-77a8874e-b7fc-4df5-a6d0-177901710fb6



Optimizing for RESNET-50...


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 211MB/s]


Fold 1/3
Epoch 1/5, Loss: 2.6325327655228463
Epoch 1/5, Validation Accuracy: 66.83%
Epoch 2/5, Loss: 1.9378970094148624
Epoch 2/5, Validation Accuracy: 60.39%
Epoch 3/5, Loss: 2.256535168840082
Epoch 3/5, Validation Accuracy: 68.01%
Epoch 4/5, Loss: 2.631577669586266
Epoch 4/5, Validation Accuracy: 54.85%
Epoch 5/5, Loss: 2.0797723225467113
Epoch 5/5, Validation Accuracy: 67.38%
Fold 2/3
Epoch 1/5, Loss: 2.061254018907389
Epoch 1/5, Validation Accuracy: 60.66%
Epoch 2/5, Loss: 2.404883705120719
Epoch 2/5, Validation Accuracy: 69.67%
Epoch 3/5, Loss: 3.675904724808688
Epoch 3/5, Validation Accuracy: 69.32%
Epoch 4/5, Loss: 3.4277701122655393
Epoch 4/5, Validation Accuracy: 61.01%
Epoch 5/5, Loss: 3.0723594171895505
Epoch 5/5, Validation Accuracy: 68.21%
Fold 3/3
Epoch 1/5, Loss: 2.8197471545546096
Epoch 1/5, Validation Accuracy: 71.33%
Epoch 2/5, Loss: 2.304173856315033
Epoch 2/5, Validation Accuracy: 64.06%
Epoch 3/5, Loss: 3.129160007060562
Epoch 3/5, Validation Accuracy: 66.07%
Epoch

[I 2024-12-25 14:28:19,839] Trial 0 finished with value: 0.6823638042474608 and parameters: {'lr': 0.04722758116217684}. Best is trial 0 with value: 0.6823638042474608.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Fold 1/3
Epoch 1/5, Loss: 2.571131027205873
Epoch 1/5, Validation Accuracy: 53.88%
Epoch 2/5, Loss: 2.0780664336615504
Epoch 2/5, Validation Accuracy: 50.97%
Epoch 3/5, Loss: 2.019721493536596
Epoch 3/5, Validation Accuracy: 53.46%
Epoch 4/5, Loss: 2.787081377282327
Epoch 4/5, Validation Accuracy: 59.83%
Epoch 5/5, Loss: 1.8184023313430133
Epoch 5/5, Validation Accuracy: 70.57%
Fold 2/3
Epoch 1/5, Loss: 1.8382339441315245
Epoch 1/5, Validation Accuracy: 62.74%
Epoch 2/5, Loss: 1.6980435230784654
Epoch 2/5, Validation Accuracy: 69.53%
Epoch 3/5, Loss: 1.694767294338395
Epoch 3/5, Validation Accuracy: 56.51%
Epoch 4/5, Loss: 1.7861252053337202
Epoch 4/5, Validation Accuracy: 67.73%
Epoch 5/5, Loss: 2.319406638817234
Epoch 5/5, Validation Accuracy: 67.94%
Fold 3/3
Epoch 1/5, Loss: 2.0631778734165
Epoch 1/5, Validation Accuracy: 65.93%
Epoch 2/5, Loss: 1.8279962073702838
Epoch 2/5, Validation Accuracy: 70.64%
Epoch 3/5, Loss: 1.896511474696312
Epoch 3/5, Validation Accuracy: 59.21%
Epoch 4

[I 2024-12-25 14:56:46,915] Trial 1 finished with value: 0.6525854108956602 and parameters: {'lr': 0.03702599268428947}. Best is trial 0 with value: 0.6823638042474608.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Fold 1/3
Epoch 1/5, Loss: 0.7012037144181478
Epoch 1/5, Validation Accuracy: 53.81%
Epoch 2/5, Loss: 0.6798062192800954
Epoch 2/5, Validation Accuracy: 61.98%
Epoch 3/5, Loss: 0.662187484087865
Epoch 3/5, Validation Accuracy: 60.04%
Epoch 4/5, Loss: 0.6485887578179165
Epoch 4/5, Validation Accuracy: 63.37%
Epoch 5/5, Loss: 0.6380240581312232
Epoch 5/5, Validation Accuracy: 64.40%
Fold 2/3
Epoch 1/5, Loss: 0.6317433649005152
Epoch 1/5, Validation Accuracy: 64.82%
Epoch 2/5, Loss: 0.6264294142222536
Epoch 2/5, Validation Accuracy: 65.37%
Epoch 3/5, Loss: 0.6167103565858872
Epoch 3/5, Validation Accuracy: 68.49%
Epoch 4/5, Loss: 0.6115542189192377
Epoch 4/5, Validation Accuracy: 68.70%
Epoch 5/5, Loss: 0.6097571566618608
Epoch 5/5, Validation Accuracy: 68.63%
Fold 3/3
Epoch 1/5, Loss: 0.6017608901084457
Epoch 1/5, Validation Accuracy: 68.91%
Epoch 2/5, Loss: 0.601945873782121
Epoch 2/5, Validation Accuracy: 68.91%
Epoch 3/5, Loss: 0.5975486820573965
Epoch 3/5, Validation Accuracy: 70.71%


[I 2024-12-25 15:24:52,328] Trial 2 finished with value: 0.6726685133887349 and parameters: {'lr': 2.5927378167889653e-05}. Best is trial 0 with value: 0.6823638042474608.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Fold 1/3
Epoch 1/5, Loss: 0.7002603800257267
Epoch 1/5, Validation Accuracy: 52.22%
Epoch 2/5, Loss: 0.6806751239365636
Epoch 2/5, Validation Accuracy: 59.00%
Epoch 3/5, Loss: 0.667740376614734
Epoch 3/5, Validation Accuracy: 60.60%
Epoch 4/5, Loss: 0.6595369570821689
Epoch 4/5, Validation Accuracy: 61.98%
Epoch 5/5, Loss: 0.6484514231181276
Epoch 5/5, Validation Accuracy: 65.03%
Fold 2/3
Epoch 1/5, Loss: 0.641383278435765
Epoch 1/5, Validation Accuracy: 65.93%
Epoch 2/5, Loss: 0.6370106481056845
Epoch 2/5, Validation Accuracy: 66.55%
Epoch 3/5, Loss: 0.630670487222092
Epoch 3/5, Validation Accuracy: 66.90%
Epoch 4/5, Loss: 0.6236219462110193
Epoch 4/5, Validation Accuracy: 67.45%
Epoch 5/5, Loss: 0.619940670484996
Epoch 5/5, Validation Accuracy: 69.32%
Fold 3/3
Epoch 1/5, Loss: 0.6201555946255257
Epoch 1/5, Validation Accuracy: 67.80%
Epoch 2/5, Loss: 0.6156942110035301
Epoch 2/5, Validation Accuracy: 68.01%
Epoch 3/5, Loss: 0.6077481577409565
Epoch 3/5, Validation Accuracy: 67.45%
Ep

[I 2024-12-25 15:52:44,336] Trial 3 finished with value: 0.6627423822714681 and parameters: {'lr': 1.8166961537163752e-05}. Best is trial 0 with value: 0.6823638042474608.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Fold 1/3
Epoch 1/5, Loss: 0.6882135703418795
Epoch 1/5, Validation Accuracy: 58.38%
Epoch 2/5, Loss: 0.6643620584551142
Epoch 2/5, Validation Accuracy: 61.36%
Epoch 3/5, Loss: 0.6495809396986145
Epoch 3/5, Validation Accuracy: 64.06%
Epoch 4/5, Loss: 0.6367074511327796
Epoch 4/5, Validation Accuracy: 65.65%
Epoch 5/5, Loss: 0.6260198031999789
Epoch 5/5, Validation Accuracy: 67.45%
Fold 2/3
Epoch 1/5, Loss: 0.623858847835446
Epoch 1/5, Validation Accuracy: 67.80%
Epoch 2/5, Loss: 0.6193809943963151
Epoch 2/5, Validation Accuracy: 68.35%
Epoch 3/5, Loss: 0.6130160134470923
Epoch 3/5, Validation Accuracy: 68.77%
Epoch 4/5, Loss: 0.608506024213127
Epoch 4/5, Validation Accuracy: 66.83%
Epoch 5/5, Loss: 0.6023563862834846
Epoch 5/5, Validation Accuracy: 67.94%
Fold 3/3
Epoch 1/5, Loss: 0.5997128598597827
Epoch 1/5, Validation Accuracy: 68.07%
Epoch 2/5, Loss: 0.5957922349318615
Epoch 2/5, Validation Accuracy: 70.36%
Epoch 3/5, Loss: 0.5953457421360754
Epoch 3/5, Validation Accuracy: 68.84%


[I 2024-12-25 16:20:53,965] Trial 4 finished with value: 0.6798245614035087 and parameters: {'lr': 3.070691748554705e-05}. Best is trial 0 with value: 0.6823638042474608.


Best Learning Rate for RESNET-50: 0.04722758116217684

Evaluating on Fold 1/3
Epoch 1/5, Loss: 3.1195410597060924
Epoch 1/5, Validation Accuracy: 64.27%
Epoch 2/5, Loss: 2.4474058740705416
Epoch 2/5, Validation Accuracy: 55.61%
Epoch 3/5, Loss: 2.312732575348069
Epoch 3/5, Validation Accuracy: 55.19%
Epoch 4/5, Loss: 2.3110913390612735
Epoch 4/5, Validation Accuracy: 63.64%
Epoch 5/5, Loss: 2.2533526660987686
Epoch 5/5, Validation Accuracy: 66.55%

Evaluating on Fold 2/3
Epoch 1/5, Loss: 2.6921204613387912
Epoch 1/5, Validation Accuracy: 66.14%
Epoch 2/5, Loss: 1.9200723734349836
Epoch 2/5, Validation Accuracy: 70.22%
Epoch 3/5, Loss: 2.9706147125412747
Epoch 3/5, Validation Accuracy: 70.22%
Epoch 4/5, Loss: 2.6655379155064156
Epoch 4/5, Validation Accuracy: 68.63%
Epoch 5/5, Loss: 3.5381527645153237
Epoch 5/5, Validation Accuracy: 70.36%

Evaluating on Fold 3/3
Epoch 1/5, Loss: 3.2501211182847207
Epoch 1/5, Validation Accuracy: 74.03%
Epoch 2/5, Loss: 2.507180038242709
Epoch 2/5, Vali

In [17]:
def initialize_model(name):
    if name == "resnet-50":
        model = models.resnet50(pretrained=True)
        
        # Freeze all layers initially
        for param in model.parameters():
            param.requires_grad = False
            
        # Unfreeze the final two residual blocks (layer3 and layer4)
        for layer in [model.layer3, model.layer4]:
            for param in layer.parameters():
                param.requires_grad = True
                
        # Unfreeze and modify the final fully connected layer
        model.fc = nn.Linear(model.fc.in_features, 2)
        
        # Print layer status
        def count_trainable_params(model):
            return sum(p.numel() for p in model.parameters() if p.requires_grad)
            
        print(f"Total trainable parameters: {count_trainable_params(model):,}")
        
    else:
        raise ValueError("Model name must be 'resnet-50'")
    
    return model



In [18]:
# for fine tunning
def objective(trial, model_name):
    # Get a suggested learning rate from Optuna
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)
    
    # Initialize the model with dropout
    model = initialize_model(model_name).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    val_accuracies = []
    for fold_idx, (train_val_idx, test_idx) in enumerate(kf.split(dataset)):
        print(f"Fold {fold_idx + 1}/{num_folds}")
        
        # Create training/validation split
        train_val_data = Subset(dataset, train_val_idx)
        test_data = Subset(dataset, test_idx)
        
        train_size = int(0.8 * len(train_val_data))
        val_size = len(train_val_data) - train_size
        train_data, val_data = torch.utils.data.random_split(
            train_val_data, [train_size, val_size], generator=torch.Generator().manual_seed(42)
        )
        train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
        
        # Train the model and get validation accuracy
        train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=5)
        
        # Evaluate on validation set
        val_accuracy, _, _, _, _ = calculate_metrics(model, val_loader, device)
        val_accuracies.append(val_accuracy)
    
    # Return the average validation accuracy across all folds as the objective value
    return np.mean(val_accuracies)


In [19]:
def evaluate_test_set(model_name, best_lr):
    # Initialize model with the best learning rate
    model = initialize_model(model_name).to(device)
    optimizer = optim.Adam(model.parameters(), lr=best_lr)
    criterion = nn.CrossEntropyLoss()

    fold_metrics = []
    for fold_idx, (train_val_idx, test_idx) in enumerate(kf.split(dataset)):
        print(f"\nEvaluating on Fold {fold_idx + 1}/{num_folds}")
        
        # Create training/validation split
        train_val_data = Subset(dataset, train_val_idx)
        test_data = Subset(dataset, test_idx)
        
        train_size = int(0.8 * len(train_val_data))
        val_size = len(train_val_data) - train_size
        train_data, val_data = torch.utils.data.random_split(
            train_val_data, [train_size, val_size], generator=torch.Generator().manual_seed(42)
        )
        train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
        
        # Train the model
        train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=5)
        
        # Evaluate on the test set
        test_loader = DataLoader(test_data, batch_size=32, shuffle=False)
        fold_metrics.append(calculate_metrics(model, test_loader, device))
    
    # Print metrics for each fold
    for fold_idx, metrics in enumerate(fold_metrics):
        accuracy, precision, recall, f1, conf_matrix = metrics
        print(f"Fold {fold_idx + 1} Metrics:")
        print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1-Score: {f1:.2f}")
        print(f"Confusion Matrix:\n{conf_matrix}")

# Calculate average metrics across folds
    avg_accuracy = np.mean([metrics[0] for metrics in fold_metrics])
    avg_precision = np.mean([metrics[1] for metrics in fold_metrics])
    avg_recall = np.mean([metrics[2] for metrics in fold_metrics])
    avg_f1 = np.mean([metrics[3] for metrics in fold_metrics])
    total_conf_matrix = np.sum([metrics[4] for metrics in fold_metrics], axis=0)

    print("\nAverage Metrics Across Folds:")
    print(f"Accuracy: {avg_accuracy:.2f}, Precision: {avg_precision:.2f}, Recall: {avg_recall:.2f}, F1-Score: {avg_f1:.2f}")
    print(f"Confusion Matrix (sum of all folds):\n{total_conf_matrix}")


# Optuna Optimization and Final Testing
for model_name in ["resnet-50"]:
    print(f"\nOptimizing for {model_name.upper()}...")
    study = optuna.create_study(direction='maximize')
    study.optimize(lambda trial: objective(trial, model_name), n_trials=5)  # You can increase the number of trials if needed

    # Best learning rate found for the model
    best_lr = study.best_params['lr']
    print(f"Best Learning Rate for {model_name.upper()}: {best_lr}")

    # Evaluate on test sets for each fold
    evaluate_test_set(model_name, best_lr)
    


[I 2024-12-25 16:51:56,613] A new study created in memory with name: no-name-75644693-23bf-4baf-aeb7-7db581845812



Optimizing for RESNET-50...


  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Total trainable parameters: 22,067,202
Fold 1/3
Epoch 1/5, Loss: 0.7628418450856077
Epoch 1/5, Validation Accuracy: 50.21%
Epoch 2/5, Loss: 0.6497182636959118
Epoch 2/5, Validation Accuracy: 62.26%
Epoch 3/5, Loss: 0.5605222620687432
Epoch 3/5, Validation Accuracy: 68.98%
Epoch 4/5, Loss: 0.5117340568679473
Epoch 4/5, Validation Accuracy: 67.66%
Epoch 5/5, Loss: 0.44573503600958303
Epoch 5/5, Validation Accuracy: 74.31%
Fold 2/3
Epoch 1/5, Loss: 0.4175016579542371
Epoch 1/5, Validation Accuracy: 72.85%
Epoch 2/5, Loss: 0.3989502611410552
Epoch 2/5, Validation Accuracy: 85.73%
Epoch 3/5, Loss: 0.3773586157277144
Epoch 3/5, Validation Accuracy: 83.03%
Epoch 4/5, Loss: 0.35716825139127384
Epoch 4/5, Validation Accuracy: 80.26%
Epoch 5/5, Loss: 0.3510269730542246
Epoch 5/5, Validation Accuracy: 82.48%
Fold 3/3
Epoch 1/5, Loss: 0.35399011370226824
Epoch 1/5, Validation Accuracy: 84.97%
Epoch 2/5, Loss: 0.3452872964887988
Epoch 2/5, Validation Accuracy: 86.08%
Epoch 3/5, Loss: 0.328649531082

[I 2024-12-25 17:24:46,116] Trial 0 finished with value: 0.7890120036934443 and parameters: {'lr': 0.005736397688215054}. Best is trial 0 with value: 0.7890120036934443.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Total trainable parameters: 22,067,202
Fold 1/3
Epoch 1/5, Loss: 0.8389237796404085
Epoch 1/5, Validation Accuracy: 50.76%
Epoch 2/5, Loss: 0.6455392796360986
Epoch 2/5, Validation Accuracy: 70.15%
Epoch 3/5, Loss: 0.5558758183737487
Epoch 3/5, Validation Accuracy: 75.97%
Epoch 4/5, Loss: 0.4914824048787849
Epoch 4/5, Validation Accuracy: 78.19%
Epoch 5/5, Loss: 0.4454140679612344
Epoch 5/5, Validation Accuracy: 78.53%
Fold 2/3
Epoch 1/5, Loss: 0.4184362186088088
Epoch 1/5, Validation Accuracy: 82.20%
Epoch 2/5, Loss: 0.4018844822987667
Epoch 2/5, Validation Accuracy: 68.42%
Epoch 3/5, Loss: 0.37788502386261746
Epoch 3/5, Validation Accuracy: 84.76%
Epoch 4/5, Loss: 0.36769635720147614
Epoch 4/5, Validation Accuracy: 83.03%
Epoch 5/5, Loss: 0.3661045098666987
Epoch 5/5, Validation Accuracy: 80.68%
Fold 3/3
Epoch 1/5, Loss: 0.35756829208102675
Epoch 1/5, Validation Accuracy: 85.11%
Epoch 2/5, Loss: 0.33971798510511936
Epoch 2/5, Validation Accuracy: 73.68%
Epoch 3/5, Loss: 0.32838196789

[I 2024-12-25 17:57:44,765] Trial 1 finished with value: 0.8289473684210527 and parameters: {'lr': 0.012675771576100597}. Best is trial 1 with value: 0.8289473684210527.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Total trainable parameters: 22,067,202
Fold 1/3
Epoch 1/5, Loss: 0.6002792678155952
Epoch 1/5, Validation Accuracy: 71.75%
Epoch 2/5, Loss: 0.4484793386571315
Epoch 2/5, Validation Accuracy: 78.67%
Epoch 3/5, Loss: 0.3990535369729469
Epoch 3/5, Validation Accuracy: 75.00%
Epoch 4/5, Loss: 0.37039015750858667
Epoch 4/5, Validation Accuracy: 85.25%
Epoch 5/5, Loss: 0.36056571902491114
Epoch 5/5, Validation Accuracy: 83.80%
Fold 2/3
Epoch 1/5, Loss: 0.341909898743445
Epoch 1/5, Validation Accuracy: 84.83%
Epoch 2/5, Loss: 0.32491720768298893
Epoch 2/5, Validation Accuracy: 84.00%
Epoch 3/5, Loss: 0.31558664739955195
Epoch 3/5, Validation Accuracy: 85.04%
Epoch 4/5, Loss: 0.2971603394954244
Epoch 4/5, Validation Accuracy: 85.25%
Epoch 5/5, Loss: 0.2902698348898914
Epoch 5/5, Validation Accuracy: 83.45%
Fold 3/3
Epoch 1/5, Loss: 0.3060758417647188
Epoch 1/5, Validation Accuracy: 86.63%
Epoch 2/5, Loss: 0.2777114368110731
Epoch 2/5, Validation Accuracy: 86.84%
Epoch 3/5, Loss: 0.280753112506

[I 2024-12-25 18:30:55,806] Trial 2 finished with value: 0.8333333333333334 and parameters: {'lr': 0.001172905989332636}. Best is trial 2 with value: 0.8333333333333334.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Total trainable parameters: 22,067,202
Fold 1/3
Epoch 1/5, Loss: 0.5250162963873773
Epoch 1/5, Validation Accuracy: 78.74%
Epoch 2/5, Loss: 0.3996987050574129
Epoch 2/5, Validation Accuracy: 82.48%
Epoch 3/5, Loss: 0.3439591477589054
Epoch 3/5, Validation Accuracy: 84.35%
Epoch 4/5, Loss: 0.30796296235935466
Epoch 4/5, Validation Accuracy: 83.93%
Epoch 5/5, Loss: 0.2937195279239291
Epoch 5/5, Validation Accuracy: 85.39%
Fold 2/3
Epoch 1/5, Loss: 0.3003933663937927
Epoch 1/5, Validation Accuracy: 87.67%
Epoch 2/5, Loss: 0.266571542381911
Epoch 2/5, Validation Accuracy: 90.10%
Epoch 3/5, Loss: 0.2492460009554473
Epoch 3/5, Validation Accuracy: 90.03%
Epoch 4/5, Loss: 0.24263770470797028
Epoch 4/5, Validation Accuracy: 90.24%
Epoch 5/5, Loss: 0.23177979750692515
Epoch 5/5, Validation Accuracy: 90.44%
Fold 3/3
Epoch 1/5, Loss: 0.25104345093115915
Epoch 1/5, Validation Accuracy: 89.89%
Epoch 2/5, Loss: 0.23202985784460828
Epoch 2/5, Validation Accuracy: 90.03%
Epoch 3/5, Loss: 0.22507071231

[I 2024-12-25 19:04:00,353] Trial 3 finished with value: 0.881809787626962 and parameters: {'lr': 2.2016545182065265e-05}. Best is trial 3 with value: 0.881809787626962.
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-1)


Total trainable parameters: 22,067,202
Fold 1/3
Epoch 1/5, Loss: 0.5647225516606431
Epoch 1/5, Validation Accuracy: 75.21%
Epoch 2/5, Loss: 0.42392392118991407
Epoch 2/5, Validation Accuracy: 80.54%
Epoch 3/5, Loss: 0.3744371768848672
Epoch 3/5, Validation Accuracy: 81.93%
Epoch 4/5, Loss: 0.34109522180004015
Epoch 4/5, Validation Accuracy: 81.16%
Epoch 5/5, Loss: 0.3202399695446478
Epoch 5/5, Validation Accuracy: 86.43%
Fold 2/3
Epoch 1/5, Loss: 0.31769744887207096
Epoch 1/5, Validation Accuracy: 85.80%
Epoch 2/5, Loss: 0.30535349167512926
Epoch 2/5, Validation Accuracy: 87.33%
Epoch 3/5, Loss: 0.2950883157016164
Epoch 3/5, Validation Accuracy: 87.60%
Epoch 4/5, Loss: 0.27491019361585545
Epoch 4/5, Validation Accuracy: 86.29%
Epoch 5/5, Loss: 0.2743705428354648
Epoch 5/5, Validation Accuracy: 87.26%
Fold 3/3
Epoch 1/5, Loss: 0.26947216899520127
Epoch 1/5, Validation Accuracy: 86.63%
Epoch 2/5, Loss: 0.2671359562165829
Epoch 2/5, Validation Accuracy: 87.26%
Epoch 3/5, Loss: 0.250343236

[I 2024-12-25 19:36:57,337] Trial 4 finished with value: 0.8721144967682365 and parameters: {'lr': 0.0008262218582997476}. Best is trial 3 with value: 0.881809787626962.


Best Learning Rate for RESNET-50: 2.2016545182065265e-05
Total trainable parameters: 22,067,202

Evaluating on Fold 1/3
Epoch 1/5, Loss: 0.5310034758478238
Epoch 1/5, Validation Accuracy: 79.02%
Epoch 2/5, Loss: 0.3887972894294486
Epoch 2/5, Validation Accuracy: 81.16%
Epoch 3/5, Loss: 0.33442202185728276
Epoch 3/5, Validation Accuracy: 83.03%
Epoch 4/5, Loss: 0.31179434851388244
Epoch 4/5, Validation Accuracy: 86.01%
Epoch 5/5, Loss: 0.2818460930447552
Epoch 5/5, Validation Accuracy: 85.60%

Evaluating on Fold 2/3
Epoch 1/5, Loss: 0.3027783467624727
Epoch 1/5, Validation Accuracy: 88.16%
Epoch 2/5, Loss: 0.2783394424625523
Epoch 2/5, Validation Accuracy: 88.37%
Epoch 3/5, Loss: 0.264151846794463
Epoch 3/5, Validation Accuracy: 90.37%
Epoch 4/5, Loss: 0.2540232158003591
Epoch 4/5, Validation Accuracy: 89.06%
Epoch 5/5, Loss: 0.2375289557388474
Epoch 5/5, Validation Accuracy: 89.40%

Evaluating on Fold 3/3
Epoch 1/5, Loss: 0.2381005324217496
Epoch 1/5, Validation Accuracy: 90.03%
Epoch 