## Transfer Learning for Computer Vision Tutorial

**Inspired by**: [Sasank Chilamkurthy](https://chsasank.github.io)

**Modified by**: 
- [win7](https://github.com/win7) at October 2023
- [win7](https://github.com/win7) at July 2023

In this tutorial, you will learn how to train a convolutional neural network for
image classification using transfer learning. You can read more about the transfer
learning at [cs231n notes](https://cs231n.github.io/transfer-learning/)_

Quoting these notes,

    In practice, very few people train an entire Convolutional Network
    from scratch (with random initialization), because it is relatively
    rare to have a dataset of sufficient size. Instead, it is common to
    pretrain a ConvNet on a very large dataset (e.g. ImageNet, which
    contains 1.2 million images with 1000 categories), and then use the
    ConvNet either as an initialization or a fixed feature extractor for
    the task of interest.

These two major transfer learning scenarios look as follows:

-  **Fine-tuning the convnet**: Instead of random initialization, we
   initialize the network with a pretrained network, like the one that is
   trained on imagenet 1000 dataset. Rest of the training looks as
   usual.
-  **ConvNet as fixed feature extractor**: Here, we will freeze the weights
   for all of the network except that of the final fully connected
   layer. This last fully connected layer is replaced with a new one
   with random weights and only this layer is trained.


#### Transfer Learning Summary

- Use a pre-trained model out-of-the-box (good if a model already exists for your problem).

- Use a pre-trained model as a **feature extractor** (good if you want to adapt a pre-trained model for a specific problem).

- **Fine-tune** a pre-trained model (same as 2 but generally yields better results, although at more computational cost).

### Imports

In [4]:
from __future__ import print_function, division
from PIL import Image, ImageStat
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from torchvision import datasets, models, transforms
from torch.utils.data.sampler import WeightedRandomSampler
from torchsampler import ImbalancedDatasetSampler
from torch.optim import lr_scheduler
from tempfile import TemporaryDirectory
from tqdm import tqdm

import copy
import matplotlib.pyplot as plt
import memory_profiler  # conda install -c anaconda memory_profiler
import neptune
import numpy as np
import os
import pandas as pd
import shutil
import sys
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torchvision

# cudnn.benchmark = True
# plt.ion()   # interactive mode

%matplotlib inline
%load_ext autotime

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 4.79 ms (started: 2023-07-12 10:32:50 -05:00)


### Functions



In [5]:
def move_split(df, type): # change this function according your dataset
    if not os.path.isdir("dataset"):
        os.makedirs("dataset")

        for item in np.unique(df.iloc[:, -1]):
            os.makedirs("dataset/train/class_{}".format(item))
            os.makedirs("dataset/val/class_{}".format(item))
    
    for index, row in tqdm(df.iterrows()):
        s = "source/train/{}_{}.jpg".format(row["ID"], row["location"])
        t = "dataset/{}/class_{}".format(type, row["level"])
        shutil.copy(s, t)

def set_parameter_requires_grad(model, feature_extract):
    if feature_extract:
        for param in model.parameters():
            param.requires_grad = False

# set initialize weight for custom model
def init_weights(m):
    if isinstance(m, torch.nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

def classifier_model(num_ftrs, num_classes, init_weight=False):
    # Custom model
    """ torch.nn.Conv2d(in_channels=8, out_channels=3, kernel_size=(3, 3), padding=1),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d((2, 2)),
    torch.nn.Conv2d(in_channels=3, out_channels=2, kernel_size=(3, 3), padding=1),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d((2, 2)),
    torch.nn.Flatten(), """

    """ model = torch.nn.Sequential(torch.nn.Linear(num_ftrs, 128),
                                torch.nn.ReLU(),

                                torch.nn.Linear(128, 64),
                                torch.nn.ReLU(),
                                torch.nn.Dropout(0.5),

                                torch.nn.Linear(64, num_classes)) """
    
    model = torch.nn.Sequential(torch.nn.Linear(num_ftrs, 1024),
                                torch.nn.Mish(),

                                torch.nn.Linear(1024, 512),
                                torch.nn.Mish(),

                                torch.nn.Linear(512, 128),
                                torch.nn.Mish(),
                                torch.nn.Dropout(0.5),

                                torch.nn.Linear(128, num_classes))
    
    if init_weight:
        model.apply(init_weights)
    return model

def initialize_model(model_name, num_classes, feature_extract):
    # Initialize these variables which will be set in this if statement. Each of these
    # variables is model specific.
    model = None
    input_size = 0

    if model_name == "alexnet":
        model = models.alexnet(weights="IMAGENET1K_V1")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.classifier[6].in_features
        model.classifier[6] = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "convnext_large":
        model = models.convnext_large(weights="IMAGENET1K_V1")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.classifier[2].in_features
        model.classifier[2] = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "densenet201":
        model = models.densenet201(weights="IMAGENET1K_V1")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.classifier.in_features
        model.classifier = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "efficientnet_v2_l":
        model = models.efficientnet_v2_l(weights="IMAGENET1K_V1")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.classifier[1].in_features
        model.classifier[1] = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs,num_classes)
        input_size = 480

    elif model_name == "googlenet":
        model = models.googlenet(weights="IMAGENET1K_V1")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.fc.in_features
        model.fc = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs, num_classes)
        input_size = 384

    elif model_name == "inception_v3":
        model = models.inception_v3(weights="IMAGENET1K_V1")
        set_parameter_requires_grad(model, feature_extract)
        # Handle the auxilary net
        num_ftrs = model.AuxLogits.fc.in_features
        model.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 299

        """ set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.fc.in_features
        model.fc = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs, num_classes)
        input_size = 299 """

    elif model_name == "mnasnet1_3":
        model = models.mnasnet1_3(weights="IMAGENET1K_V1")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.classifier[1].in_features
        model.classifier[1] = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "maxvit_t":
        model = models.maxvit_t(weights="IMAGENET1K_V1")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.classifier[5].in_features
        model.classifier[5] = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "mobilenet_v3_large":
        model = models.mobilenet_v3_large(weights="IMAGENET1K_V2")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.classifier[3].in_features
        model.classifier[3] = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "regnet_y_128gf":
        model = models.regnet_y_128gf(weights="IMAGENET1K_SWAG_E2E_V1")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.fc.in_features
        model.fc = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs, num_classes)
        input_size = 384

    elif model_name == "resnext101_64x4d":
        model = models.resnext101_64x4d(weights="IMAGENET1K_V1")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.fc.in_features
        model.fc = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "resnet152":
        model = models.resnet152(weights="IMAGENET1K_V2")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.fc.in_features
        model.fc = classifier_model(num_ftrs, num_classes, init_weight=True) # # nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "shufflenet_v2_x2_0":
        model = models.shufflenet_v2_x2_0(weights="IMAGENET1K_V1")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.fc.in_features
        model.fc = classifier_model(num_ftrs, num_classes, init_weight=True) # # nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "squeezenet1_1":
        model = models.squeezenet1_1(weights="IMAGENET1K_V1")
        set_parameter_requires_grad(model, feature_extract)
        model.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1, 1), stride=(1, 1))
        model.num_classes = num_classes
        input_size = 224

    elif model_name == "swin_v2_b":
        model = models.swin_v2_b(weights="IMAGENET1K_V1")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.head.in_features
        model.head = classifier_model(num_ftrs, num_classes, init_weight=True) # # nn.Linear(num_ftrs, num_classes)
        input_size = 256

    elif model_name == "vgg19":
        model = models.vgg19_bn(weights="IMAGENET1K_V1")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.classifier[6].in_features
        model.classifier[6] = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vit_h_14":
        model = models.vit_h_14(weights="IMAGENET1K_SWAG_E2E_V1")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.heads.head.in_features
        model.heads.head = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs,num_classes)
        input_size = 518

    elif model_name == "wide_resnet101_2":
        model = models.wide_resnet101_2(weights="IMAGENET1K_V2")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.fc.in_features
        model.fc = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs, num_classes)
        input_size = 224

    # ---    
    elif model_name == "resnet50":
        model = models.resnet50(weights="IMAGENET1K_V2")
        set_parameter_requires_grad(model, feature_extract)
        num_ftrs = model.fc.in_features
        model.fc = classifier_model(num_ftrs, num_classes, init_weight=True) # nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        weights = models.Inception_V3_Weights.IMAGENET1K_V1 #
        model = models.inception_v3(weights=weights, progress=True)
        set_parameter_requires_grad(model, feature_extract)
        # Handle the auxilary net
        num_ftrs = model.AuxLogits.fc.in_features
        model.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()

    return model, input_size

def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated

def train_model(run, model, device, dataloaders, dataset_sizes, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    train_losses = []
    val_losses = []

    train_accuracies = []
    val_accuracies = []

    # Create a temporary directory to save training checkpoints
    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, "best_model_params.pt")

        torch.save(model.state_dict(), best_model_params_path)
        best_acc = 0.0

        loop_obj = tqdm(range(num_epochs))
        for epoch in loop_obj:
            # print(f'Epoch {epoch}/{num_epochs - 1}')
            # print('-' * 10)
            loop_obj.set_description(f"Epoch: {epoch + 1}")
            run["params/epochs"] = epoch + 1

            # Each epoch has a training and validation phase
            for phase in ["train", "val"]:
                if phase == 'train':
                    model.train()  # Set model to training mode
                else:
                    model.eval()   # Set model to evaluate mode

                running_loss = 0.0
                running_corrects = 0

                # Iterate over data.
                for inputs, labels in dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == "train"):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        # backward + optimize only if in training phase
                        if phase == "train":
                            loss.backward()
                            optimizer.step()

                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)

                if phase == "train":
                    scheduler.step()

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                # print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
                loop_obj.set_postfix_str(f"{phase}, Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

                run["{}/accuracy".format(phase)].append(epoch_acc)
                run["{}/loss".format(phase)].append(epoch_loss)

                if phase == "train":
                    train_accuracies.append(epoch_acc)
                    train_losses.append(epoch_loss)
                else:
                    val_accuracies.append(epoch_acc)
                    val_losses.append(epoch_loss)

                # deep copy the model
                if phase == "val" and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    torch.save(model.state_dict(), best_model_params_path)

        time_elapsed = time.time() - since
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best val Acc: {best_acc:4f}')
        run["train/runtime"] = time_elapsed // 60

        # load best model weights
        model.load_state_dict(torch.load(best_model_params_path))
    return model, train_accuracies, train_losses, val_accuracies, val_losses

def visualize_model(model, num_images=6): # no used
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders["val"]):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis("off")
                ax.set_title(f"predicted: {class_names[preds[j]]}")
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

def show_results(train_losses, val_losses, train_accuracies, val_accuracies):# no used
    train_accuracies_ = [h.cpu().numpy() for h in train_accuracies]
    val_accuracies_ = [h.cpu().numpy() for h in val_accuracies]

    fig, axes = plt.subplots(1, 2, figsize=(14,4))
    ax1, ax2 = axes
    ax1.plot(train_losses, label="train")
    ax1.plot(val_losses, label="val")
    ax1.set_xlabel("Epoch"); ax1.set_ylabel("Loss")
    ax1.grid()
    ax2.plot(train_accuracies_, label="train")
    ax2.plot(val_accuracies_, label="val")
    ax2.set_xlabel("Epoch"); ax2.set_ylabel("Accuracy")
    ax2.grid()
    for ax in axes: ax.legend()

def get_predictions(model_ft, dataloaders, type_data):
    labels = []
    predictions = []
    predictions_proba = []

    with torch.no_grad():
        for batch_x_test, batch_y_test in dataloaders[type_data]:
            batch_x_test = batch_x_test.to(device).to(torch.float32)
            batch_y_test = batch_y_test.to(device).to(torch.int64)

            batch_test_predictions = model_ft(batch_x_test)
            # batch_test_predictions = model_conv(batch_x_test)

            batch_test_predictions = torch.nn.functional.softmax(batch_test_predictions, dim=-1)
            predictions_proba.append(batch_test_predictions)
            # print(batch_test_predictions)

            batch_test_predictions = batch_test_predictions.max(dim=1).indices
            # print(batch_test_predictions)

            labels.append(batch_y_test)
            predictions.append(batch_test_predictions)

    labels = torch.cat(labels, dim=0)
    predictions = torch.cat(predictions, dim=0)
    predictions_proba = torch.cat(predictions_proba, dim=0)

    labels = labels.cpu().numpy()
    predictions = predictions.cpu().numpy()
    predictions_proba = predictions_proba.cpu().numpy()
    return labels, predictions, predictions_proba

# Data augmentation and normalization for training
# Just normalization for validation
def get_data_transforms(option, input_size, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
    list_data_transforms = [
        # transforms 0
        {
            "train": transforms.Compose([
                transforms.RandomResizedCrop(input_size),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            "val": transforms.Compose([
                transforms.Resize(input_size),
                transforms.CenterCrop(input_size),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
        },
        # transforms 1
        {
            "train": transforms.Compose([
                transforms.RandomResizedCrop(input_size),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            "val": transforms.Compose([
                transforms.Resize(input_size),
                transforms.CenterCrop(input_size),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
        },
        # transforms 2
        {
            "train": transforms.Compose([
                transforms.Resize(400),
                transforms.RandomHorizontalFlip(),
                transforms.CenterCrop(input_size),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            "val": transforms.Compose([
                transforms.Resize(input_size),
                transforms.CenterCrop(input_size),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
        },
        # transforms 3
        {
            "train": transforms.Compose([
                transforms.Resize(400),
                transforms.RandomHorizontalFlip(),
                transforms.CenterCrop(input_size),
                transforms.RandomRotation(degrees=45),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            "val": transforms.Compose([
                transforms.Resize(input_size),
                transforms.CenterCrop(input_size),
                transforms.RandomRotation(degrees=20),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
        }
    ]
    data_transforms = list_data_transforms[option]
    return data_transforms

def get_optimizer(model, option):
    optimizers = [
        optim.Adadelta(model.parameters(), lr=1.0),             # 0
        optim.Adagrad(model.parameters(), lr=0.01),             # 1
        optim.Adam(model.parameters(), lr=0.001),               # 2
        optim.AdamW(model.parameters(), lr=0.001),              # 3
        optim.SparseAdam(model.parameters(), lr=0.001),         # 4, SparseAdam does not support dense gradients, please consider Adam instead
        optim.Adamax(model.parameters(), lr=0.002),             # 5
        optim.ASGD(model.parameters(), lr=0.01),                # 6
        optim.LBFGS(model.parameters(), lr=1),                  # 7, more cost
        optim.NAdam(model.parameters(), lr=0.002),              # 8
        optim.RAdam(model.parameters(), lr=0.001),              # 9   
        optim.RMSprop(model.parameters(), lr=0.01),             # 10
        optim.Rprop(model.parameters(), lr=0.01),               # 11
        optim.SGD(model.parameters(), lr=0.001, momentum=0.7)   # 12
    ]
    return optimizers[option]

time: 7.48 ms (started: 2023-07-12 10:32:50 -05:00)


### Split data

In [6]:
df = pd.read_csv("source/train.csv")
df

Unnamed: 0,ID,location,level
0,d3d9446802a44259755d38e6d163e820,left,0
1,d3d9446802a44259755d38e6d163e820,right,0
2,c51ce410c124a10e0db5e4b97fc2af39,left,0
3,c51ce410c124a10e0db5e4b97fc2af39,right,0
4,9bf31c7ff062936a96d3c8bd1f8f2ff3,left,1
...,...,...,...
24583,7dc5ece165388748790d8170245197ed,right,0
24584,e3c216d521607da146fa23d65cfcc6b6,left,0
24585,e3c216d521607da146fa23d65cfcc6b6,right,0
24586,54d9ee4df2caa006b994d148f23a9b10,left,0


time: 45.9 ms (started: 2023-07-12 10:32:50 -05:00)


In [7]:
# count classes
df["level"].value_counts()

0    18057
1     6531
Name: level, dtype: int64

time: 3.06 ms (started: 2023-07-12 10:32:51 -05:00)


In [8]:
df_X = df.iloc[:, :-1]
df_X

Unnamed: 0,ID,location
0,d3d9446802a44259755d38e6d163e820,left
1,d3d9446802a44259755d38e6d163e820,right
2,c51ce410c124a10e0db5e4b97fc2af39,left
3,c51ce410c124a10e0db5e4b97fc2af39,right
4,9bf31c7ff062936a96d3c8bd1f8f2ff3,left
...,...,...
24583,7dc5ece165388748790d8170245197ed,right
24584,e3c216d521607da146fa23d65cfcc6b6,left
24585,e3c216d521607da146fa23d65cfcc6b6,right
24586,54d9ee4df2caa006b994d148f23a9b10,left


time: 17.3 ms (started: 2023-07-12 10:32:51 -05:00)


In [9]:
df_y = df.iloc[:, -1:]
df_y

Unnamed: 0,level
0,0
1,0
2,0
3,0
4,1
...,...
24583,0
24584,0
24585,0
24586,0


time: 17.6 ms (started: 2023-07-12 10:32:51 -05:00)


In [10]:
# df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.2, random_state=42, stratify=df_y)
len(X_train), len(X_test), len(y_train), len(y_test)

(19670, 4918, 19670, 4918)

time: 95.4 ms (started: 2023-07-12 10:32:51 -05:00)


In [11]:
df_train = X_train.copy()
df_train["level"] = y_train
df_train

Unnamed: 0,ID,location,level
328,3dc4876f3f08201c7c76cb71fa1da439,left,0
16271,dd24bf1f94c244e91a4a783a50f36b6f,right,0
7110,a0b173044f2019316bebc411696e7d35,left,0
12530,d79bc606792288b59c64959c6bcbf5d5,left,0
8871,b0263bc40e0ff50f481b85a968c30ac1,right,0
...,...,...,...
13812,614794dbe82384e43122cc92e6a66f6f,left,1
16999,252a3893179658de41f437d975468205,right,0
9356,34f9679482b481012016f1f5c8b977f0,left,0
3042,66fae5b05c0f64c4d2bdcdf1ad85f7b2,left,0


time: 6.15 ms (started: 2023-07-12 10:32:51 -05:00)


In [12]:
df_test = X_test.copy()
df_test["level"] = y_test
df_test

Unnamed: 0,ID,location,level
11916,8bc96a461bd0ce6616f30b9383b7427d,left,1
23962,080261e4427a081fc6e637b654f590ee,left,0
8273,d16c8f18bdee715020ec90b5ec04e9d4,right,0
2103,62e7f2e090fe150ef8deb4466fdc81b3,right,0
5340,2bdfb48c5fa7d2344b71ef45c8a7d31c,left,1
...,...,...,...
16463,65742cafb273e12fc7bb968b5fca065e,right,1
7593,8e887cf8e64ab8e7173701a979476567,right,0
9146,5939cbe46f1512291125700ee2e7236a,left,0
708,36a1694bce9815b7e38a9dad05ad42e0,left,0


time: 17.7 ms (started: 2023-07-12 10:32:51 -05:00)


In [13]:
# move image into train and test folder
# move_split(df_train, "train") () # uncomment this to move the images
# move_split(df_test, "val") # uncomment this to move the images

time: 8.92 ms (started: 2023-07-12 10:32:51 -05:00)


### Initialize and Reshape the networks

Set Model Parameters’ .requires_grad attribute
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

This helper function sets the ``.requires_grad`` attribute of the
parameters in the model to False when we are feature extracting. By
default, when we load a pretrained model all of the parameters have
``.requires_grad=True``, which is fine if we are training from scratch
or finetuning. However, if we are feature extracting and only want to
compute gradients for the newly initialized layer then we want all of
the other parameters to not require gradients. This will make more sense
later.

#### Details

Now to the most interesting part. Here is where we handle the reshaping
of each network. Note, this is not an automatic procedure and is unique
to each model. Recall, the final layer of a CNN model, which is often
times an FC layer, has the same number of nodes as the number of output
classes in the dataset. Since all of the models have been pretrained on
Imagenet, they all have output layers of size 1000, one node for each
class. The goal here is to reshape the last layer to have the same
number of inputs as before, AND to have the same number of outputs as
the number of classes in the dataset. In the following sections we will
discuss how to alter the architecture of each model individually. But
first, there is one important detail regarding the difference between
finetuning and feature-extraction.

When feature extracting, we only want to update the parameters of the
last layer, or in other words, we only want to update the parameters for
the layer(s) we are reshaping. Therefore, we do not need to compute the
gradients of the parameters that we are not changing, so for efficiency
we set the .requires_grad attribute to False. This is important because
by default, this attribute is set to True. Then, when we initialize the
new layer and by default the new parameters have ``.requires_grad=True``
so only the new layer’s parameters will be updated. When we are
finetuning we can leave all of the .required_grad’s set to the default
of True.

Finally, notice that inception_v3 requires the input size to be
(299,299), whereas all of the other models expect (224,224).

```python 
model = models.alexnet(weights="IMAGENET1K_V1")
"""
(classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU(inplace=True)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
)
crop_size=[224]
"""

model = models.convnext_large(weights="IMAGENET1K_V1")
"""
(classifier): Sequential(
    (0): LayerNorm2d((1536,), eps=1e-06, elementwise_affine=True)
    (1): Flatten(start_dim=1, end_dim=-1)
    (2): Linear(in_features=1536, out_features=1000, bias=True)
)
crop_size=[224]
"""

model = models.densenet201(weights="IMAGENET1K_V1")
"""
(classifier): Linear(in_features=1920, out_features=1000, bias=True)
crop_size=[224]
"""

model = models.efficientnet_v2_l(weights="IMAGENET1K_V1")
"""
(classifier): Sequential(
    (0): Dropout(p=0.4, inplace=True)
    (1): Linear(in_features=1280, out_features=1000, bias=True)
)
crop_size=[480]
"""

model = models.googlenet(weights="IMAGENET1K_V1")
"""
(fc): Linear(in_features=1024, out_features=1000, bias=True)
crop_size=[384]
"""

model = models.inception_v3(weights="IMAGENET1K_V1")
"""
(fc): Linear(in_features=2048, out_features=1000, bias=True)
crop_size=[299]
"""

model = models.mnasnet1_3(weights="IMAGENET1K_V1")
"""
(classifier): Sequential(
    (0): Dropout(p=0.2, inplace=True)
    (1): Linear(in_features=1280, out_features=1000, bias=True)
)
crop_size=[224]
"""

model = models.maxvit_t(weights="IMAGENET1K_V1")
"""
(classifier): Sequential(
    (0): AdaptiveAvgPool2d(output_size=1)
    (1): Flatten(start_dim=1, end_dim=-1)
    (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (3): Linear(in_features=512, out_features=512, bias=True)
    (4): Tanh()
    (5): Linear(in_features=512, out_features=1000, bias=False)
)
crop_size=[224]
"""

model = models.mobilenet_v3_large(weights="IMAGENET1K_V2")
"""
(classifier): Sequential(
    (0): Linear(in_features=960, out_features=1280, bias=True)
    (1): Hardswish()
    (2): Dropout(p=0.2, inplace=True)
    (3): Linear(in_features=1280, out_features=1000, bias=True)
)
crop_size=[224]
"""

model = models.regnet_y_128gf(weights="IMAGENET1K_SWAG_E2E_V1")
"""
(fc): Linear(in_features=7392, out_features=1000, bias=True)
crop_size=[384]
"""

model = models.resnext101_64x4d(weights="IMAGENET1K_V1")
"""
(fc): Linear(in_features=2048, out_features=1000, bias=True)
crop_size=[224]
"""

model = models.resnet152(weights="IMAGENET1K_V2")
"""
(fc): Linear(in_features=2048, out_features=1000, bias=True)
crop_size=[224]
"""

model = models.shufflenet_v2_x2_0(weights="IMAGENET1K_V1")
"""
(fc): Linear(in_features=2048, out_features=1000, bias=True)
crop_size=[224]
"""

model = models.squeezenet1_1(weights="IMAGENET1K_V1")
"""
(classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Conv2d(512, 1000, kernel_size=(1, 1), stride=(1, 1))
    (2): ReLU(inplace=True)
    (3): AdaptiveAvgPool2d(output_size=(1, 1))
  )
crop_size=[224]
"""

model = models.swin_v2_b(weights="IMAGENET1K_V1")
"""
(head): Linear(in_features=1024, out_features=1000, bias=True)
crop_size=[256]
"""

model = models.vgg19_bn(weights="IMAGENET1K_V1")
"""
(classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
)
crop_size=[224]
"""

model = models.vit_h_14(weights="IMAGENET1K_SWAG_E2E_V1")
"""
(heads): Sequential(
    (head): Linear(in_features=1280, out_features=1000, bias=True)
)
crop_size=[518]
"""

model = models.wide_resnet101_2(weights="IMAGENET1K_V2")
"""
(fc): Linear(in_features=2048, out_features=1000, bias=True)
crop_size=[224]
"""
```

Notice, many of the models have similar output structures, but each must
be handled slightly differently. Also, check out the printed model
architecture of the reshaped network and make sure the number of output
features is the same as the number of classes in the dataset.

### Load data

We will use torchvision and torch.utils.data packages for loading the
data.

```
├── transfer_learning.ipynb
└── dataset ← data dir here
    ├── train
    │   ├── class_1
    │   └── class_2
    ├── test
    │   ├── class_1
    └── └── class_2
```

In [14]:
!ls

dataset		 plot	    test_neptune.ipynb		      weights
environment.yml  README.md  test_neptune.py
output		 source     transfer_learning_template.ipynb
time: 674 ms (started: 2023-07-12 10:33:19 -05:00)


In [3]:
# images information
data_dir = "dataset"
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x)) for x in ["train", "val"]}

for x in ["train", "val"]:
    print(x)
    mem = memory_profiler.memory_usage()[0]
    train_dataset = image_datasets[x]
    print(f"Memory consumed: {memory_profiler.memory_usage()[0] - mem:.0f} mb")

    print(f"Classes: {train_dataset.classes}")
    print(f"Class count: {train_dataset.targets.count(0)}, {train_dataset.targets.count(1)}")
    print(f"Samples:",len(train_dataset))
    print(f"First sample: {train_dataset.samples[0]}")

    img, target = next(iter(train_dataset))
    print(f"Class: {train_dataset.classes[target]}")
    img

    print(f"Image data type: {type(img)}")
    print(f"Image size: {img.size}")
    print()

train
Memory consumed: 0 mb
Classes: ['class_0', 'class_1']
Class count: 14445, 5225
Samples: 19670
First sample: ('dataset/train/class_0/000c076c390a4c357313fca29e390ece_left.jpg', 0)
Class: class_0
Image data type: <class 'PIL.Image.Image'>
Image size: (800, 533)

val
Memory consumed: 0 mb
Classes: ['class_0', 'class_1']
Class count: 3612, 1306
Samples: 4918
First sample: ('dataset/val/class_0/00126b47d5502dfb7d01f750ad23d813_right.jpg', 0)
Class: class_0
Image data type: <class 'PIL.Image.Image'>
Image size: (800, 533)

time: 529 ms (started: 2023-07-12 10:22:25 -05:00)


In [38]:
# get mean and std
train_dataset = image_datasets["train"]
stat = ImageStat.Stat(img)

print("""
Min/max values for each band in the image:
    {.extrema}

Total number of pixels for each band in the image:
    {.count}

Sum of all pixels for each band in the image:
    {.sum}

Squared sum of all pixels for each band in the image:
    {.sum2}

Average (arithmetic mean) pixel level for each band in the image:
    {.mean}

Median pixel level for each band in the image:
    {.median}

RMS (root-mean-square) for each band in the image:
    {.rms}

Variance for each band in the image:
    {.var}

Standard deviation for each band in the image:
    {.stddev}
""".format(*((stat, ) * 9)))


Min/max values for each band in the image:
    [(0, 255), (0, 253), (0, 232)]

Total number of pixels for each band in the image:
    [426400, 426400, 426400]

Sum of all pixels for each band in the image:
    [47027351.0, 39486656.0, 34987839.0]

Squared sum of all pixels for each band in the image:
    [7462613199.0, 5171815206.0, 4000350675.0]

Average (arithmetic mean) pixel level for each band in the image:
    [110.28928470919324, 92.60472795497186, 82.05403142589118]

Median pixel level for each band in the image:
    [131, 109, 99]

RMS (root-mean-square) for each band in the image:
    [132.293000888024, 110.13184157246526, 96.85910100726834]

Variance for each band in the image:
    [5337.711762293237, 3553.3868885282427, 2648.821374695073]

Standard deviation for each band in the image:
    [73.05964523793719, 59.610291800395025, 51.46670161079951]

time: 5.1 ms (started: 2023-07-10 11:18:36 -05:00)


In [40]:
# mean and std for normalization
max_ = stat.extrema
mean_ = stat.mean
std_ = stat.stddev

mean = []
std = []

for k in range(len(max_)):
    mean.append(mean_[k] / max_[k][1])
    std.append(std_[k] / max_[k][1])

print("mean:", mean)
print("std:", std)

mean: [0.4325069988595813, 0.3660265927073987, 0.35368116993918614]
std: [0.2865084126977929, 0.23561380158258904, 0.22183923108103237]
time: 1.96 ms (started: 2023-07-10 11:18:43 -05:00)


#### Visualize a few images

To-Do

## Experiments

In [28]:
# Top level data directory. Here we assume the format of the directory conforms
# to the ImageFolder structure
data_dir = "dataset"

# Models to choose
models_name = ["alexnet", "convnext_large", "densenet201",
                "efficientnet_v2_l", "googlenet", "inception_v3",
                "mnasnet1_3", "maxvit_t", "mobilenet_v3_large",
                "regnet_y_128gf", "resnext101_64x4d", "resnet152",
                "shufflenet_v2_x2_0", "squeezenet1_1", "swin_v2_b",
                "vgg19", "vit_h_14", "wide_resnet101_2"]
                
models_name = ["alexnet", "convnext_large", "densenet201",
                "googlenet",
                "mnasnet1_3", "maxvit_t", "mobilenet_v3_large",
                "resnext101_64x4d", "resnet152",
                "shufflenet_v2_x2_0", "squeezenet1_1", "swin_v2_b",
                "vgg19", "wide_resnet101_2"]

models_name = ["convnext_large", "densenet201",
                "resnext101_64x4d", "resnet152",
                "shufflenet_v2_x2_0", "swin_v2_b",
                "wide_resnet101_2"]

models_name = ["convnext_large"]

# number of classes in the dataset
num_classes = 2

# batch size for training (change depending on how much memory you have)
batch_size = 12 # 8, 12, 16, 32

# number of epochs to train for
num_epochs = 25 # 20, 25, 35, 100

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

for option in [("fe", True)]: # ("ft", False), ("fe", True) # transfer learning
    for model_name in models_name: # models
        for transf in [0]: # [0, 1, 2], data augmentations
            for opti in [3]: # [0, 1, 3, 4, 5, 6, 8, 9, 10, 11, 12], optimizers
                # Setup
                run = neptune.init_run(
                    project="GoTeam/Ka-GC",
                    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIwZjM0NzgxNy1lMDI1LTQxMzgtYTE2NS03OTIwOWY5NzgwNTkifQ==",
                )  # your credentials

                """ run = neptune.init_run(
                    project="GoTeam/Go-KGC",
                    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIwZjM0NzgxNy1lMDI1LTQxMzgtYTE2NS03OTIwOWY5NzgwNTkifQ==",
                )  # your credentials """

                run["sys/tags"].add(option[0])
                run["sys/tags"].add("transf {}".format(transf))

                # Init model
                model, input_size = initialize_model(model_name, num_classes, feature_extract=option[1])
                model = model.to(device)

                # data augmentation
                data_transforms = get_data_transforms(transf, input_size)
                # data_transforms = get_data_transforms(transf, input_size, mean=mean, std=std)

                image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ["train", "val"]}

                # option 1: dataloaders with WeightedRandomSampler
                """ 
                run["sys/tags"].add("WRS")
                train_dataset = image_datasets["train"]
                targets = torch.tensor(train_dataset.targets, dtype=torch.int8)
                class_counts = torch.bincount(targets)
                # weights = 1.0 / class_counts.float()
                weights = [1 / class_counts[i].float() for i in targets]
                sampler = WeightedRandomSampler(weights, len(train_dataset), replacement=True)

                dataloaders = {}
                for x in ["train", "val"]:
                    if x == "train":
                        dataloaders[x] = torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, sampler=sampler,
                                                                    shuffle=False, drop_last=False, num_workers=4)
                    else: 
                        dataloaders[x] = torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                                                    shuffle=True, drop_last=False, num_workers=4) """
                
                # option 2: dataloaders with ImbalancedDatasetSampler
                """ 
                run["sys/tags"].add("IDS")
                dataloaders = {}
                for x in ["train", "val"]:
                    if x == "train":
                        dataloaders[x] = torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, sampler=ImbalancedDatasetSampler(train_dataset),
                                                                    shuffle=False, drop_last=False, num_workers=4)
                    else: 
                        dataloaders[x] = torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                                                    shuffle=True, drop_last=False, num_workers=4) """

                # option 3: dataloaders without WeightedRandomSampler and ImbalancedDatasetSampler
                dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                                                shuffle=True, drop_last=False, num_workers=4) for x in ["train", "val"]}
                            
                dataset_sizes = {x: len(image_datasets[x]) for x in ["train", "val"]}
                class_names = image_datasets["train"].classes
     
                # Hyperparameters
                criterion = nn.CrossEntropyLoss() # nn.BCELoss() # nn.CrossEntropyLoss()

                # observe that all parameters are being optimized
                optimizer = get_optimizer(model, opti)

                # decay LR by a factor of 0.1 every 7 epochs
                exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

                parameters = {
                    "model": model_name,
                    "classes": num_classes,
                    # "epochs": num_epochs,
                    "batch size": batch_size,
                    "criterion": "CrossEntropyLoss",
                    "optimazer": type(optimizer).__name__,
                    "lr": optimizer.param_groups[0]["lr"]
                }
                run["params"] = parameters

                #---
                # Train
                model, \
                train_losses, \
                val_losses, \
                train_accuracies, \
                val_accuracies = train_model(run, model, device, dataloaders, dataset_sizes, 
                                            criterion, optimizer, exp_lr_scheduler, num_epochs=num_epochs)
                # save model
                # torch.save(model.state_dict(), "weights/{}_{}_transf{}".format(model_name, option[0], trans))
                torch.save(model, "weights/{}_{}_transf{}_{}_{}_{}_{}.pt".format(parameters["model"], option[0],
                                                                                 transf, parameters["optimazer"], num_epochs,
                                                                                 parameters["batch size"], parameters["lr"]))

                # Evaluation
                for data in ["train", "val"]:
                    labels, predictions, predictions_proba = get_predictions(model, dataloaders, data)
                    auc = roc_auc_score(labels, predictions_proba[:, 1])
                    run["{}/auc".format(data)] = auc
                
                run.stop()

https://app.neptune.ai/GoTeam/Ka-GC/e/KAG-187


Epoch: 25: 100%|██████████| 25/25 [1:38:29<00:00, 236.39s/it, val, Loss: 0.4366 Acc: 0.8109]  


Training complete in 98m 31s
Best val Acc: 0.813135
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/GoTeam/Ka-GC/e/KAG-187/metadata
time: 1h 42min 30s (started: 2023-07-09 22:38:51 -05:00)


## Prepared test data to Kaggle

#### Read test image

In [22]:
df_test = pd.read_csv("source/sample.csv")
df_test

Unnamed: 0,ID,score
0,c74d97b01eae257e44aa9d5bade97baf_left,0
1,c74d97b01eae257e44aa9d5bade97baf_right,0
2,70efdf2ec9b086079795c442636b55fb_left,0
3,70efdf2ec9b086079795c442636b55fb_right,0
4,1f0e3dad99908345f7439f8ffabdffc4_left,0
...,...,...
10533,e261790d3a4cb0c69c17d7c183830289_right,0
10534,d21137447f7fdc47f7e6677ae35522a7_left,0
10535,d21137447f7fdc47f7e6677ae35522a7_right,0
10536,ec2951e5afb60d72a4a3e0be6d3e9c0a_left,0


time: 25.4 ms (started: 2023-07-09 22:26:36 -05:00)


#### Get probabilities

In [23]:
model_name = "convnext_large"
option = "fe"
transf = 0
optimazer = "AdamW"
lr = 0.001
epochs = 25
batch_size = 12
input_size = 224
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model = torch.load("weights/{}_{}_transf{}_{}_{}_{}_{}.pt".format(model_name, option, transf, optimazer, epochs, batch_size, lr))

cuda:0
time: 1.9 s (started: 2023-07-09 22:27:05 -05:00)


In [24]:
data_transforms = get_data_transforms(transf)

time: 1.83 ms (started: 2023-07-09 22:27:13 -05:00)


In [25]:
y_prob = []

for index, row in tqdm(df_test.iterrows()):
    img_path = "source/test/{}.jpg".format(row["ID"])

    was_training = model.training
    model.eval()

    img = Image.open(img_path)
    img = data_transforms["val"](img)
    img = img.unsqueeze(0)
    img = img.to(device)

    with torch.no_grad():
        outputs = model(img)
        
        # _, preds = torch.max(outputs, 1)
        preds = torch.nn.functional.softmax(outputs, dim=-1)
        # print(pred)
        y_prob.append(preds)

        model.train(mode=was_training)
y_prob = torch.cat(y_prob, dim=0)
y_prob

10538it [02:42, 64.71it/s]


tensor([[0.0207, 0.9793],
        [0.0035, 0.9965],
        [0.8226, 0.1774],
        ...,
        [0.7469, 0.2531],
        [0.9045, 0.0955],
        [0.9154, 0.0846]], device='cuda:0')

time: 2min 42s (started: 2023-07-09 22:27:18 -05:00)


In [26]:
score = [h.cpu().numpy().tolist() for h in y_prob]
score = np.array(score)[:, 1]

df_test["score"] = score
df_test

Unnamed: 0,ID,score
0,c74d97b01eae257e44aa9d5bade97baf_left,0.979291
1,c74d97b01eae257e44aa9d5bade97baf_right,0.996499
2,70efdf2ec9b086079795c442636b55fb_left,0.177402
3,70efdf2ec9b086079795c442636b55fb_right,0.361335
4,1f0e3dad99908345f7439f8ffabdffc4_left,0.127075
...,...,...
10533,e261790d3a4cb0c69c17d7c183830289_right,0.326341
10534,d21137447f7fdc47f7e6677ae35522a7_left,0.066853
10535,d21137447f7fdc47f7e6677ae35522a7_right,0.253099
10536,ec2951e5afb60d72a4a3e0be6d3e9c0a_left,0.095472


time: 165 ms (started: 2023-07-09 22:30:22 -05:00)


In [27]:
# save
from datetime import datetime

# datetime object containing current date and time
now = datetime.now()
dt_string = now.strftime("%d-%m-%Y-%H:%M:%S")

df_test.to_csv("output/{}_{}_{}_transf{}_{}_{}_{}_{}.csv".format(dt_string, model_name, option, transf, optimazer, epochs, batch_size, lr), sep=",", index=False, header=True)

time: 41.4 ms (started: 2023-07-09 22:30:28 -05:00)
