# Import Libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import OneCycleLR
import torchvision.transforms as transforms
import timm
from torch.utils.data import DataLoader, Dataset, random_split, Subset
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold
import albumentations as A
from albumentations.pytorch import ToTensorV2
import pandas as pd
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

  check_for_updates()


# Dataset Class with Albumentations

In [2]:
class CustomImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        # Initialize the dataset with a CSV file and image directory
        self.annotations = csv_file  # CSV file containing image paths and labels
        self.img_dir = img_dir  # Directory where images are stored
        self.transform = transform  # Optional transformations to apply to images

    def __len__(self):
        # Return the total number of samples in the dataset
        return len(self.annotations)

    def __getitem__(self, idx):
        # Get the image and its corresponding label for the given index (idx)
        img_path = os.path.join(self.img_dir, self.annotations.iloc[idx, 0])  # Get image path
        image = Image.open(img_path).convert("RGB")  # Open image and convert it to RGB (3 channels)
        
        # Get the label (assuming label is in the second column of the CSV)
        label = torch.tensor(int(self.annotations.iloc[idx, 1]))  # Convert label to tensor
        
        # Apply the transformation (if any)
        if self.transform:
            # Convert image to numpy array before applying transformation
            image = np.array(image)
            # Apply the transformation and retrieve the transformed image
            image = self.transform(image=image)["image"]
        
        # Return the transformed image and the corresponding label
        return image, label


# Define Data Augmentations

In [3]:
# Function to get data augmentation and normalization transforms
def get_transforms(img_size):
    """
    Creates and returns training and validation image transformations.

    Parameters:
    img_size (tuple): Desired image size as (height, width).

    Returns:
    tuple: A tuple containing training and validation transformations.
    """
    # Training transformations include data augmentation techniques to improve model generalization
    train_transform = A.Compose([
        A.Resize(img_size[0], img_size[1]),  # Resizes the image to the specified dimensions
        A.HorizontalFlip(p=0.5),  # Randomly flips the image horizontally with a 50% probability
        A.RandomBrightnessContrast(p=0.5),  # Adjusts brightness and contrast randomly with a 50% probability
        A.HueSaturationValue(p=0.5),  # Randomly changes hue, saturation, and value with a 50% probability
        A.GaussianBlur(p=0.3),  # Applies Gaussian blur to the image with a 30% probability
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalizes image pixel values
        ToTensorV2(),  # Converts the image and its augmentations to a PyTorch tensor
    ])
    
    # Validation transformations focus only on resizing and normalization for consistency
    val_transform = A.Compose([
        A.Resize(img_size[0], img_size[1]),  # Resizes the image to the specified dimensions
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalizes image pixel values
        ToTensorV2(),  # Converts the image to a PyTorch tensor
    ])
    
    return train_transform, val_transform


# Create DataLoaders

In [4]:
def create_dataloaders(csv_file, img_dir, img_size=(224, 224), batch_size=32, n_fold=0):
    """
    Creates DataLoader objects for training and validation datasets.

    Parameters:
    csv_file (pd.DataFrame): DataFrame containing image paths and labels.
    img_dir (str): Directory where images are stored.
    img_size (tuple): Desired image size as (height, width). Default is (224, 224).
    batch_size (int): Number of samples per batch. Default is 32.
    n_fold (int): Index of the fold for cross-validation. Default is 0.

    Returns:
    tuple: DataLoader objects for training and validation datasets.
    """
    # Get training and validation transformations
    train_transform, val_transform = get_transforms(img_size)
    
    # Initialize the dataset without any transformations
    dataset = CustomImageDataset(csv_file=csv_file, img_dir=img_dir, transform=None)
    
    # Stratified K-Fold for splitting dataset into train and validation sets
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2024)
    
    # Iterate through folds to get train and validation indices for the specified fold
    for i, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(csv_file)), csv_file.iloc[:, 1].values)):
        if i == n_fold:
            break
    
    # Subset datasets for training and validation with respective transformations applied
    train_dataset = Subset(
        CustomImageDataset(csv_file.iloc[train_idx], img_dir, transform=train_transform),
        range(len(train_idx))
    )
    val_dataset = Subset(
        CustomImageDataset(csv_file.iloc[val_idx], img_dir, transform=val_transform),
        range(len(val_idx))
    )
    
    # Create DataLoader for training dataset with shuffling and multi-threading for efficiency
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
    
    # Create DataLoader for validation dataset without shuffling
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
    
    return train_loader, val_loader


# Training Loop with Mixed Precision

In [5]:
# Initialize the gradient scaler for mixed precision training
scaler = torch.cuda.amp.GradScaler()

def train_one_epoch(model, train_loader, criterion, optimizer, scheduler, device):
    # Set the model to training mode (enables features like dropout)
    model.train()
    
    # Variable to accumulate the loss over the epoch
    running_loss = 0.0
    
    # Loop over batches of data in the training set
    for images, labels in tqdm(train_loader):  # tqdm provides a progress bar
        # Move images and labels to the specified device (GPU/CPU)
        images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
        
        # Zero the gradients of all optimized variables
        optimizer.zero_grad()
        
        # Enable mixed precision with autocasting for faster computation
        with torch.cuda.amp.autocast():
            # Forward pass: Compute the model's outputs
            outputs = model(images)
            # Calculate the loss between the model's outputs and the true labels
            loss = criterion(outputs, labels)
        
        # Scales the loss for mixed precision training, then backpropagate
        scaler.scale(loss).backward()
        
        # Optimizer step: Adjust model parameters based on gradients
        scaler.step(optimizer)
        
        # Updates the scale of the loss for the next iteration
        scaler.update()
        
        # Update the learning rate scheduler based on the optimizer step
        scheduler.step()
        
        # Accumulate the running loss, weighted by the batch size
        running_loss += loss.item() * images.size(0)

    # Return the average loss over the entire dataset (epoch)
    return running_loss / len(train_loader.dataset)


  scaler = torch.cuda.amp.GradScaler()


# Validation

In [6]:
# Disable gradient computation to save memory and computations during evaluation
@torch.no_grad()
def validate(model, val_loader, criterion, device):
    # Set the model to evaluation mode (disables dropout, batch norm, etc.)
    model.eval()
    
    # Variable to accumulate the loss over the validation set
    running_loss = 0.0
    
    # Lists to store the true labels and model outputs for performance evaluation
    all_labels, all_outputs = [], []

    # Loop over batches of validation data
    for images, labels in tqdm(val_loader):  # tqdm provides a progress bar
        # Move images and labels to the specified device (GPU/CPU)
        images, labels = images.to(device), labels.to(device).float().unsqueeze(1)

        # Forward pass: Compute the model's outputs (no gradient computation here)
        outputs = model(images)
        
        # Calculate the loss between the model's outputs and the true labels
        loss = criterion(outputs, labels)

        # Accumulate the loss, weighted by the batch size
        running_loss += loss.item() * images.size(0)
        
        # Append the true labels and model outputs to lists (to compute metrics later)
        all_labels.append(labels.cpu().numpy())  # Move labels back to CPU for storage
        all_outputs.append(outputs.cpu().numpy())  # Move outputs back to CPU for storage

    # Concatenate the list of all labels and outputs into arrays for further evaluation
    all_labels = np.concatenate(all_labels)
    all_outputs = np.concatenate(all_outputs)

    # Return the average loss over the entire validation dataset and the predictions/labels
    return running_loss / len(val_loader.dataset), all_labels, all_outputs


# Train Model

In [7]:
def train_model(csv_file, img_dir, model, img_size=(224, 224), num_epochs=10, batch_size=32, lr=1e-4, n_fold=0, device='cuda'):
    # Create the data loaders for training and validation from the provided CSV and image directory
    train_loader, val_loader = create_dataloaders(csv_file, img_dir, img_size, batch_size, n_fold)

    # Move the model to the specified device (e.g., GPU or CPU)
    model = model.to(device)
    
    # Define the loss function (Binary Cross-Entropy with logits)
    criterion = nn.BCEWithLogitsLoss()
    
    # Set up the optimizer (AdamW in this case)
    optimizer = optim.AdamW(model.parameters(), lr=lr)
    
    # Set up the learning rate scheduler for dynamic learning rate adjustment during training
    scheduler = OneCycleLR(optimizer, max_lr=lr, epochs=num_epochs, steps_per_epoch=len(train_loader))

    # Variable to track the best validation ROC AUC score across all epochs
    best_val_auc = 0
    
    # Loop over the specified number of epochs
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        
        # Train the model for one epoch and get the training loss
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer, scheduler, device)
        
        # Validate the model after each epoch, get validation loss and predictions
        val_loss, val_labels, val_outputs = validate(model, val_loader, criterion, device)

        # Apply sigmoid to the model outputs to get probabilities (since BCEWithLogitsLoss is used)
        val_outputs = torch.sigmoid(torch.tensor(val_outputs)).numpy()
        
        # Convert the probabilities into binary predictions (threshold at 0.5)
        val_preds = (val_outputs > 0.5).astype(int)

        # Compute performance metrics: accuracy, F1 score, and ROC AUC score
        accuracy = accuracy_score(val_labels, val_preds)
        f1 = f1_score(val_labels, val_preds)
        roc_auc = roc_auc_score(val_labels, val_outputs)

        # Print the metrics for this epoch
        print(f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Accuracy: {accuracy:.4f}, F1: {f1:.4f}, ROC AUC: {roc_auc:.4f}")

        # If the current ROC AUC is the best we've seen, save the model
        if roc_auc > best_val_auc:
            print("Saving Best Model...")
            torch.save(model.state_dict(), f"best_model_fold{n_fold}.pth")
            best_val_auc = roc_auc


 # Prediction

In [8]:
def predict(csv_file, img_dir, model, img_size=(224, 224), batch_size=32, device='cuda', model_path='best_model.pth'):
    # Get the necessary transformations for validation (e.g., resizing, normalization, etc.)
    _, val_transform = get_transforms(img_size)
    
    # Create a custom dataset using the CSV file and image directory with the validation transformations
    dataset = CustomImageDataset(csv_file, img_dir, transform=val_transform)
    
    # Create a DataLoader for the validation dataset with batch processing
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

    # Load the model weights from the specified file (model_path)
    model.load_state_dict(torch.load(model_path))
    
    # Move the model to the specified device (GPU or CPU)
    model = model.to(device)
    
    # Set the model to evaluation mode (disables dropout, batch normalization, etc.)
    model.eval()

    # List to store the model's outputs (predictions)
    outputs = []
    
    # Disable gradient calculation to save memory and computations during prediction
    with torch.no_grad():
        # Loop over the batches in the DataLoader
        for images, _ in tqdm(loader):  # _ means labels are not needed for prediction
            images = images.to(device)  # Move images to the appropriate device (GPU/CPU)
            
            # Forward pass: Get the model's predictions
            preds = model(images)
            
            # Apply sigmoid to the outputs to get probabilities (for binary classification)
            outputs.append(torch.sigmoid(preds).cpu().numpy())

    # Concatenate all the predicted outputs from different batches into one array
    return np.concatenate(outputs)


# Set Parameters

In [9]:

img_dir = "/Train"
csv_path = "train.csv"
labels = pd.read_csv(csv_path)
labels["label"] = labels["label"].map({"editada": 0, "real": 1})


# Initialize Model

In [10]:

model = timm.create_model('swin_base_patch4_window7_224', pretrained=True, num_classes=1)


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

# Train Model

In [11]:

train_model(labels, img_dir, model, img_size=(224, 224), num_epochs=30, batch_size=16, lr=1e-4, n_fold=0)


Epoch 1/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:14<00:00,  2.49it/s]
100%|██████████| 9/9 [00:01<00:00,  5.96it/s]


Train Loss: 0.7024, Val Loss: 0.6373, Accuracy: 0.6111, F1: 0.7407, ROC AUC: 0.7576
Saving Best Model...
Epoch 2/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.76it/s]
100%|██████████| 9/9 [00:01<00:00,  6.32it/s]


Train Loss: 0.5780, Val Loss: 0.5129, Accuracy: 0.6458, F1: 0.7606, ROC AUC: 0.9637
Saving Best Model...
Epoch 3/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.45it/s]


Train Loss: 0.3741, Val Loss: 0.0209, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Saving Best Model...
Epoch 4/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.61it/s]
100%|██████████| 9/9 [00:01<00:00,  6.42it/s]


Train Loss: 0.2514, Val Loss: 0.1366, Accuracy: 0.9722, F1: 0.9747, ROC AUC: 0.9998
Epoch 5/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.33it/s]


Train Loss: 0.2323, Val Loss: 0.0381, Accuracy: 0.9931, F1: 0.9938, ROC AUC: 0.9998
Epoch 6/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.22it/s]


Train Loss: 0.2676, Val Loss: 0.0329, Accuracy: 0.9861, F1: 0.9878, ROC AUC: 1.0000
Epoch 7/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.31it/s]


Train Loss: 0.1818, Val Loss: 0.0159, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 8/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.25it/s]


Train Loss: 0.1881, Val Loss: 0.0646, Accuracy: 0.9792, F1: 0.9818, ROC AUC: 1.0000
Epoch 9/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.46it/s]


Train Loss: 0.2339, Val Loss: 0.1034, Accuracy: 0.9375, F1: 0.9474, ROC AUC: 0.9992
Epoch 10/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.41it/s]


Train Loss: 0.2373, Val Loss: 0.1372, Accuracy: 0.9236, F1: 0.9272, ROC AUC: 1.0000
Epoch 11/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.74it/s]
100%|██████████| 9/9 [00:01<00:00,  6.08it/s]


Train Loss: 0.1525, Val Loss: 0.0684, Accuracy: 0.9722, F1: 0.9747, ROC AUC: 1.0000
Epoch 12/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.40it/s]


Train Loss: 0.1227, Val Loss: 0.0038, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 13/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.40it/s]


Train Loss: 0.1570, Val Loss: 0.0414, Accuracy: 0.9792, F1: 0.9814, ROC AUC: 0.9992
Epoch 14/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.37it/s]


Train Loss: 0.0917, Val Loss: 0.0011, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 15/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.76it/s]
100%|██████████| 9/9 [00:01<00:00,  6.42it/s]


Train Loss: 0.1233, Val Loss: 0.0244, Accuracy: 0.9861, F1: 0.9877, ROC AUC: 0.9998
Epoch 16/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.45it/s]


Train Loss: 0.0777, Val Loss: 0.0006, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 17/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.17it/s]


Train Loss: 0.1112, Val Loss: 0.0088, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 18/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.37it/s]


Train Loss: 0.0685, Val Loss: 0.0002, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 19/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.76it/s]
100%|██████████| 9/9 [00:01<00:00,  6.47it/s]


Train Loss: 0.1085, Val Loss: 0.0068, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 20/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.32it/s]


Train Loss: 0.0609, Val Loss: 0.0019, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 21/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.76it/s]
100%|██████████| 9/9 [00:01<00:00,  6.46it/s]


Train Loss: 0.0601, Val Loss: 0.0005, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 22/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  5.97it/s]


Train Loss: 0.0372, Val Loss: 0.0163, Accuracy: 0.9931, F1: 0.9939, ROC AUC: 1.0000
Epoch 23/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.38it/s]


Train Loss: 0.0249, Val Loss: 0.0054, Accuracy: 0.9931, F1: 0.9939, ROC AUC: 1.0000
Epoch 24/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.76it/s]
100%|██████████| 9/9 [00:01<00:00,  6.51it/s]


Train Loss: 0.0192, Val Loss: 0.0001, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 25/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.74it/s]
100%|██████████| 9/9 [00:01<00:00,  6.37it/s]


Train Loss: 0.0184, Val Loss: 0.0005, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 26/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.76it/s]
100%|██████████| 9/9 [00:01<00:00,  6.35it/s]


Train Loss: 0.0301, Val Loss: 0.0030, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 27/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.76it/s]
100%|██████████| 9/9 [00:01<00:00,  6.41it/s]


Train Loss: 0.0200, Val Loss: 0.0010, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 28/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.75it/s]
100%|██████████| 9/9 [00:01<00:00,  6.37it/s]


Train Loss: 0.0196, Val Loss: 0.0007, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 29/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.76it/s]
100%|██████████| 9/9 [00:01<00:00,  6.38it/s]


Train Loss: 0.0095, Val Loss: 0.0005, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000
Epoch 30/30


  with torch.cuda.amp.autocast():
100%|██████████| 36/36 [00:13<00:00,  2.76it/s]
100%|██████████| 9/9 [00:01<00:00,  6.37it/s]

Train Loss: 0.0109, Val Loss: 0.0005, Accuracy: 1.0000, F1: 1.0000, ROC AUC: 1.0000





# Prediction

In [12]:

test_dir = "Test"  #add your test path
test_csv = "sample_submission.csv"
test_labels = pd.read_csv(test_csv)
preds = predict(test_labels, test_dir, model, img_size=(224, 224), batch_size=16, model_path='best_model_fold0.pth')

test_labels['label'] = preds
test_labels.to_csv('submission.csv', index=False)


  model.load_state_dict(torch.load(model_path))
100%|██████████| 12/12 [00:01<00:00,  6.75it/s]
