https://rumn.medium.com/part-1-ultimate-guide-to-fine-tuning-in-pytorch-pre-trained-model-and-its-configuration-8990194b71e

Plan for this notebook:
1. Modify architecture of pretrained model
    - modify head (for regression, classification or other image size)
    - add extra layers 
2. Training the model
    - Defining optimization algorithm, learning rate, momentum, weight decay
    - Choosing loss function and metrics-based loss
    - Regularization
3. Freezing partial network (to keep weight the same during training process)
    - freezing entire network
    - freezing only convolutional layers
    - freezing specific layers
4. Defining Model Floating-point precision
5. Single vs Multiple GPU

In [26]:
from torchvision import models
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import torch
import timm
import numpy as np
import pandas as pd

## 1. Modify Model Architecture

In [12]:
model = models.resnet50(pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)

In [25]:
# model.fc.in_features
# model.fc.out_features
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [23]:
class CustomModel(nn.Module):

    def __init__(self, num_classes):
        super(CustomModel, self).__init__()
        resnet = models.resnet50(pretrained=True)

        # remove final fully connected layer
        self.features = nn.Sequential(*list(resnet.children())[:-1])

        # add custom fully connected layers
        self.fc = nn.Sequential(
            nn.Linear(resnet.fc.in_features, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5), 
            nn.Linear(256, num_classes)
        )

new_model = CustomModel(num_classes=4)

In [24]:
new_model

CustomModel(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d

## 2. Training the Model

In [30]:
# classic optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

In [31]:
# L2 regularization - Ridge - squared => prevent overfitting by penalizing large weights more
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, weight_decay=0.01)

In [None]:
# L1 regularization - Lasso - abs val =>  
regularization_loss = 0.0
for param in model.parameters():
    regularization_loss += torch.norm(param, 1)
loss += 0.01 * regularization_loss

## 3. Freezing layers of the model 