# Projeto deep learning

# Fine tuning da EfficientNet pré-treinada com o DINO usando o dataset COVIDGR

In [8]:
# Unpack dataset
import zipfile

path_to_zip_file = './datasets/COVIDGR_1.0.zip'
directory_to_extract_to = './datasets/COVIDGR_1.0'

with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
    zip_ref.extractall(directory_to_extract_to)

## Step 1: Setup Google Drive, Libraries and GPU

In [1]:
# Check GPU info
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Sat Nov 23 16:55:16 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.256.02   Driver Version: 470.256.02   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA TITAN Xp     Off  | 00000000:01:00.0 Off |                  N/A |
| 23%   31C    P8     8W / 250W |      2MiB / 12196MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
!which python

/home/CIN/tta/msc-deep-learning/deep_learning/bin/python


In [20]:
# versoes compativeis com a gpu
# ! pip --no-cache-dir install torch==1.11.0
# ! pip --no-cache-dir install torchvision==0.12.0 
# ! pip --no-cache-dir install scikit-learn

Collecting numpy
  Downloading numpy-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.3/16.3 MB[0m [31m43.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: numpy
Successfully installed numpy-2.1.3
Collecting scikit-learn
  Downloading scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.3/13.3 MB[0m [31m101.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: scikit-learn
Successfully installed scikit-learn-1.5.2


In [3]:
import torch
torch.cuda.is_available()

True

# Step 2: Data Loading and Augmentation
Define Dataset Paths: Set up the paths to the P and N folders for loading images.

Data Augmentation: Implement the augmentation protocol, including random resized cropping, color jittering, color dropping, and Gaussian blurring.

Load Dataset: Use torchvision.datasets.ImageFolder with the custom transformations.

In [4]:
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, random_split
import torch

# Path to the dataset
dataset_path = './datasets/COVIDGR_1.0'
positive_path = os.path.join(dataset_path, 'P')
negative_path = os.path.join(dataset_path, 'N')

# Data transformations
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.3, 0.9), ratio=(3/4, 4/3)),
    transforms.RandomApply(
            [transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.2, hue=0.1)],
            p=0.8
        ),
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomApply([transforms.GaussianBlur(kernel_size=25, sigma=(0.1, 2.0))], p=0.5),
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

# Load dataset with ImageFolder
dataset = datasets.ImageFolder(root=dataset_path, transform=train_transform)

In [5]:
dataset

Dataset ImageFolder
    Number of datapoints: 852
    Root location: ./datasets/COVIDGR_1.0
    StandardTransform
Transform: Compose(
               RandomResizedCrop(size=(224, 224), scale=(0.3, 0.9), ratio=(0.75, 1.3333), interpolation=bilinear)
               RandomApply(
               p=0.8
               ColorJitter(brightness=[0.6, 1.4], contrast=[0.6, 1.4], saturation=[0.8, 1.2], hue=[-0.1, 0.1])
           )
               RandomGrayscale(p=0.2)
               RandomApply(
               p=0.5
               GaussianBlur(kernel_size=(25, 25), sigma=(0.1, 2.0))
           )
               Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=None)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
           )

In [6]:
# Step 4: Map labels: 'P' to 1, 'N' to 0
dataset.class_to_idx = {'N': 0, 'P': 1}
dataset

Dataset ImageFolder
    Number of datapoints: 852
    Root location: ./datasets/COVIDGR_1.0
    StandardTransform
Transform: Compose(
               RandomResizedCrop(size=(224, 224), scale=(0.3, 0.9), ratio=(0.75, 1.3333), interpolation=bilinear)
               RandomApply(
               p=0.8
               ColorJitter(brightness=[0.6, 1.4], contrast=[0.6, 1.4], saturation=[0.8, 1.2], hue=[-0.1, 0.1])
           )
               RandomGrayscale(p=0.2)
               RandomApply(
               p=0.5
               GaussianBlur(kernel_size=(25, 25), sigma=(0.1, 2.0))
           )
               Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=None)
               CenterCrop(size=(224, 224))
               ToTensor()
               Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
           )

# Step 3: Data Splitting
90-10 Train-Test Split and Train-Validation Split.

In [7]:
# Splitting the dataset
train_size = int(0.9 * len(dataset))
test_size = len(dataset) - train_size
batch_size = 256
print(train_size, batch_size)

# Split off the test set
train_val_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# DataLoader for the test set (held out)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

766 256


In [8]:
len(dataset) * 0.9

766.8000000000001

# Step 4: Model Setup
Load Pretrained EfficientNet: Load EfficientNet with ImageNet pre-trained weights and adapt the final layer for binary classification.

Define Optimizer: Set up the SGD optimizer with momentum.

Hyperparameter Grid Search: You’ll need to run a grid search loop over learning rates and weight decay values.

In [15]:
checkpoint_path = "./output/DINOXray/checkpoint.pth"

In [24]:
best_params = {"learning_rate": 0.01, "weight_decay": 0.001}
best_params

{'learning_rate': 0.01, 'weight_decay': 0.001}

In [40]:
from torchvision.models import efficientnet_b0
import torch.nn as nn
import torch.optim as optim

def get_model():
    # Load the EfficientNet model
    model = efficientnet_b0()
    
    # Modify the final classification head for your dataset
    embed_dim = model.classifier[1].in_features
    model.classifier[1] = nn.Identity()
    
    # Load the DINO-pretrained weights
    checkpoint = torch.load(checkpoint_path, map_location="cpu")
    
    # Remove unnecessary keys (e.g., DINO projection head weights)
    state_dict = checkpoint["teacher"]  # Adjust the key if needed
    # remove `module.` prefix
    state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
    # remove `backbone.` prefix induced by multicrop wrapper
    state_dict = {k.replace("backbone.", ""): v for k, v in state_dict.items()}
    
    # Load the weights into the model
    msg = model.load_state_dict(state_dict, strict=False)
    print('Pretrained weights found at {} and loaded with msg: {}'.format(checkpoint_path, msg))
    
    for param in model.features.parameters():
        param.requires_grad = False
    return model, embed_dim

## Step 6: Training and Validation
Define Training and Evaluation Loops: Track metrics like precision, recall, accuracy, and F1-score per epoch.

Cross-Validation: Implement 5-fold cross-validation, recording average and standard deviation metrics.

In [41]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np
import time
from sklearn.model_selection import KFold

print (np.__version__)


def calculate_metrics(true_labels, predictions):
    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions)
    recall = recall_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions)
    return accuracy, precision, recall, f1


def train_model(model, classifier, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    classifier = classifier.to(device)
    train_losses, val_losses = [], []
    best_val_f1 = 0.0

    for epoch in range(num_epochs):
        classifier.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            outputs = classifier(outputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        scheduler.step()  # Adjust learning rate

        # Validation phase
        classifier.eval()
        val_loss = 0.0
        all_preds, all_labels = [], []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                outputs = classifier(outputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                preds = torch.argmax(outputs, dim=1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        # Metrics
        accuracy, precision, recall, f1 = calculate_metrics(all_labels, all_preds)
        train_losses.append(running_loss / len(train_loader))
        val_losses.append(val_loss / len(val_loader))

        if f1 > best_val_f1:
            best_val_f1 = f1

        print(f"[{device}] Epoch {epoch+1}/{num_epochs}, Train Loss: {running_loss / len(train_loader):.4f}, "
              f"Val Loss: {val_loss / len(val_loader):.4f}, F1 Score: {f1:.4f}")

    return best_val_f1

# Evaluate model on the test set
def evaluate_model(model, classifier, test_loader):
    classifier.eval()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    classifier = classifier.to(device)

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            outputs = classifier(outputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='binary')
    recall = recall_score(all_labels, all_preds, average='binary')
    f1 = f1_score(all_labels, all_preds, average='binary')
    return accuracy, precision, recall, f1

def grid_search(train_loader, val_loader, learning_rates, weight_decays, num_epochs):
    best_model = None
    best_f1 = 0
    best_params = {}
    for lr in learning_rates:
        for wd in weight_decays:
            model = get_model()
            optimizer = optim.SGD(model.classifier[1].parameters(), lr=lr, weight_decay=wd, momentum=0.9)
            scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda epoch: min(1.0, (epoch + 1) / 10))
            criterion = nn.CrossEntropyLoss()
            print(f"\nTraining with lr={lr}, weight_decay={wd}")
            f1_score = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs)
            if f1_score > best_f1:
                best_f1 = f1_score
                best_model = model
                best_params = {"learning_rate": lr, "weight_decay": wd}
    print(f"\nBest Model F1: {best_f1} with params {best_params}")
    return best_model, best_params

# Step 9: 5-Fold Cross-Validation
def cross_validation(best_params, dataset, test_loader, num_epochs=50, folds=5):
    fold_metrics = []
    kfold = KFold(n_splits=folds, shuffle=True, random_state=100)

    for fold, (train_idx, val_idx) in enumerate(kfold.split(dataset)):
        print(f"\nStarting fold {fold + 1}/{folds}")

        # Split dataset indices for training and validation
        train_subset = Subset(dataset, train_idx)
        val_subset = Subset(dataset, val_idx)

        # Create DataLoaders for this fold
        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

        model, embed_dim = get_model()
        linear_classifier = nn.Linear(embed_dim, 2)
        optimizer = optim.SGD(linear_classifier.parameters(), lr=best_params['learning_rate'], weight_decay=best_params['weight_decay'], momentum=0.9)
        scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda epoch: min(1.0, (epoch + 1) / 10))
        criterion = nn.CrossEntropyLoss()

        start_time = time.time()
        _ = train_model(model, linear_classifier, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs)
        end_time = time.time()

        # Evaluate on test set
        accuracy, precision, recall, f1 = evaluate_model(model, linear_classifier, test_loader)
        fold_metrics.append((accuracy, precision, recall, f1, end_time - start_time))

    return np.array(fold_metrics)

1.26.4


Run grid search to get best params


In [None]:
# Dataloaders
val_size = int(0.1 * len(train_val_dataset))
train_size = len(train_val_dataset) - val_size
train_dataset, val_dataset = random_split(train_val_dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Hyperparameters
learning_rates = [1e-2, 1e-3, 1e-4]
weight_decays = [1e-3, 1e-4, 1e-5]
num_epochs = 50

In [None]:
# Grid search
best_model, best_params = grid_search(train_loader, val_loader, learning_rates, weight_decays, num_epochs)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 187MB/s]



Training with lr=0.01, weight_decay=0.001
[cuda] Epoch 1/50, Train Loss: 0.6991, Val Loss: 0.6915, F1 Score: 0.5301
[cuda] Epoch 2/50, Train Loss: 0.6971, Val Loss: 0.7263, F1 Score: 0.6422
[cuda] Epoch 3/50, Train Loss: 0.6945, Val Loss: 0.7373, F1 Score: 0.6607
[cuda] Epoch 4/50, Train Loss: 0.6816, Val Loss: 0.7128, F1 Score: 0.6667
[cuda] Epoch 5/50, Train Loss: 0.6713, Val Loss: 0.6953, F1 Score: 0.6105
[cuda] Epoch 6/50, Train Loss: 0.6519, Val Loss: 0.7020, F1 Score: 0.4110
[cuda] Epoch 7/50, Train Loss: 0.6452, Val Loss: 0.6959, F1 Score: 0.4444
[cuda] Epoch 8/50, Train Loss: 0.6152, Val Loss: 0.6784, F1 Score: 0.4308
[cuda] Epoch 9/50, Train Loss: 0.6101, Val Loss: 0.6781, F1 Score: 0.5143
[cuda] Epoch 10/50, Train Loss: 0.5914, Val Loss: 0.6956, F1 Score: 0.4478
[cuda] Epoch 11/50, Train Loss: 0.6023, Val Loss: 0.7309, F1 Score: 0.4286
[cuda] Epoch 12/50, Train Loss: 0.5768, Val Loss: 0.6614, F1 Score: 0.5672
[cuda] Epoch 13/50, Train Loss: 0.5857, Val Loss: 0.6413, F1 Score

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[cuda] Epoch 2/50, Train Loss: 0.7068, Val Loss: 0.6963, F1 Score: 0.0000
[cuda] Epoch 3/50, Train Loss: 0.7135, Val Loss: 0.6961, F1 Score: 0.0513
[cuda] Epoch 4/50, Train Loss: 0.6989, Val Loss: 0.6997, F1 Score: 0.0930
[cuda] Epoch 5/50, Train Loss: 0.7002, Val Loss: 0.6970, F1 Score: 0.2000
[cuda] Epoch 6/50, Train Loss: 0.6984, Val Loss: 0.6906, F1 Score: 0.4407
[cuda] Epoch 7/50, Train Loss: 0.6945, Val Loss: 0.6889, F1 Score: 0.4000
[cuda] Epoch 8/50, Train Loss: 0.6869, Val Loss: 0.6966, F1 Score: 0.4545
[cuda] Epoch 9/50, Train Loss: 0.6856, Val Loss: 0.6967, F1 Score: 0.5294
[cuda] Epoch 10/50, Train Loss: 0.6833, Val Loss: 0.6771, F1 Score: 0.4194
[cuda] Epoch 11/50, Train Loss: 0.6865, Val Loss: 0.6891, F1 Score: 0.4928
[cuda] Epoch 12/50, Train Loss: 0.6791, Val Loss: 0.6985, F1 Score: 0.3729
[cuda] Epoch 13/50, Train Loss: 0.6703, Val Loss: 0.6987, F1 Score: 0.4444
[cuda] Epoch 14/50, Train Loss: 0.6672, Val Loss: 0.6913, F1 Score: 0.4333
[cuda] Epoch 15/50, Train Loss: 0

Run 5-fold cross validation

In [42]:
# Run the 5-fold cross-validation
metrics = cross_validation(best_params, train_val_dataset, test_loader, num_epochs=50, folds=5)

# Calculate average and standard deviation of metrics across folds
avg_metrics = metrics.mean(axis=0)
std_metrics = metrics.std(axis=0)

print(f"\nAverage metrics over 5 folds in test set:\n"
      f"Accuracy: {avg_metrics[0]:.4f} ± {std_metrics[0]:.4f}\n"
      f"Precision: {avg_metrics[1]:.4f} ± {std_metrics[1]:.4f}\n"
      f"Recall: {avg_metrics[2]:.4f} ± {std_metrics[2]:.4f}\n"
      f"F1 Score: {avg_metrics[3]:.4f} ± {std_metrics[3]:.4f}\n"
      f"Training Time per Fold: {avg_metrics[4]:.2f} ± {std_metrics[4]:.2f} seconds")


Starting fold 1/5
Pretrained weights found at ./output/DINOXray/checkpoint.pth and loaded with msg: _IncompatibleKeys(missing_keys=[], unexpected_keys=['head.mlp.0.weight', 'head.mlp.0.bias', 'head.mlp.2.weight', 'head.mlp.2.bias', 'head.mlp.4.weight', 'head.mlp.4.bias', 'head.last_layer.weight_g', 'head.last_layer.weight_v', 'classifier.1.weight', 'classifier.1.bias'])
[cuda] Epoch 1/50, Train Loss: 0.7329, Val Loss: 0.7004, F1 Score: 0.6783
[cuda] Epoch 2/50, Train Loss: 0.7433, Val Loss: 0.6941, F1 Score: 0.2772
[cuda] Epoch 3/50, Train Loss: 0.7277, Val Loss: 0.7209, F1 Score: 0.1538
[cuda] Epoch 4/50, Train Loss: 0.8247, Val Loss: 0.6885, F1 Score: 0.3934
[cuda] Epoch 5/50, Train Loss: 0.7886, Val Loss: 0.7353, F1 Score: 0.6578
[cuda] Epoch 6/50, Train Loss: 0.8030, Val Loss: 0.6875, F1 Score: 0.1333
[cuda] Epoch 7/50, Train Loss: 0.7869, Val Loss: 0.7064, F1 Score: 0.0909
[cuda] Epoch 8/50, Train Loss: 0.8366, Val Loss: 0.8979, F1 Score: 0.6377
[cuda] Epoch 9/50, Train Loss: 0.8

In [43]:
metrics

array([[4.88372093e-01, 6.00000000e-01, 6.66666667e-02, 1.20000000e-01,
        1.87156148e+03],
       [5.11627907e-01, 8.00000000e-01, 8.88888889e-02, 1.60000000e-01,
        1.87731211e+03],
       [5.93023256e-01, 6.00000000e-01, 6.66666667e-01, 6.31578947e-01,
        1.90341581e+03],
       [5.00000000e-01, 7.50000000e-01, 6.66666667e-02, 1.22448980e-01,
        1.87140900e+03],
       [5.69767442e-01, 5.95238095e-01, 5.55555556e-01, 5.74712644e-01,
        1.88219220e+03]])

In [44]:
import pandas as pd

results = pd.DataFrame(metrics, columns=['accuracy', 'precision', 'recall', 'f1', 'training time'])
results.head()

Unnamed: 0,accuracy,precision,recall,f1,training time
0,0.488372,0.6,0.066667,0.12,1871.561478
1,0.511628,0.8,0.088889,0.16,1877.312113
2,0.593023,0.6,0.666667,0.631579,1903.415806
3,0.5,0.75,0.066667,0.122449,1871.409004
4,0.569767,0.595238,0.555556,0.574713,1882.1922


In [45]:
results.to_csv('./results/dino_fine_tuned.csv')