# Deep Learning for Business Applications course

## TOPIC 3: Computer Vision advanced. Finetuning for image classification task

#### 1. Libraries

In [None]:
import os
import copy
import time
import json
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, models
from torchvision.transforms import v2
from PIL import Image

# check if GPU available
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(DEVICE)

In [None]:
# parameters for training
# you may try to tune them
# for the HOME ASSIGNMENT part

# batch size depends on resources
# GPU or RAM memory
BATCH_SIZE = 4
# learning rate should be smaller
# than the training LR for backbone CNN
LR = .001
# number of epochs to train
N_EPOCHS = 20

#### 2. Data

We are going to use part of [Food-101 dataset](https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/). Wiil take two classes omly to finetune our classification CNN.

In [None]:
DATA_PATH = '/home/jovyan/__DATA/DLBA_F24/topic_03/food-101/'

In [None]:
!ls -la $DATA_PATH

In [None]:
!ls $DATA_PATH/train/donuts | wc -l

In [None]:
!ls $DATA_PATH/val/donuts | wc -l

In [None]:
!ls $DATA_PATH/train/macarons | wc -l

#### 3. Data processing

In [None]:
# Data augmentation and normalization for training
# just normalization for validation data

MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]

data_transforms = {
    # HOME ASSIGNMENT part
    # you will have to make more data augmentations
    # with help of Pytorch https://pytorch.org/vision/stable/transforms.html
    # and make an experiment to answer the question
    # how data augmentation affects training process
    'train': v2.Compose([
        v2.RandomResizedCrop(224),
        # basic transformations
        #v2.RandomHorizontalFlip(),
        #v2.RandomVerticalFlip(),
        #v2.RandomRotation([-15, 15]),
        # color transformations use with caution
        # and play with propabilities to apply
        #v2.RandomChoice(
        #    [
        #        v2.ColorJitter(brightness=.5, hue=.3),
        #        v2.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.)),
        #        v2.ElasticTransform(alpha=250.0)
        #    ],
        #    p=[.1, .1, .1]
        #),
        # more about transformations you may read here
        # https://pytorch.org/vision/stable/auto_examples/transforms/plot_transforms_illustrations.html
        v2.ToTensor(),
        v2.Normalize(MEAN, STD)
    ]),
    'val': v2.Compose([
        v2.Resize(256),
        v2.CenterCrop(224),
        v2.ToTensor(),
        v2.Normalize(MEAN, STD)
    ]),
}

In [None]:
image_datasets = {
    x: datasets.ImageFolder(
        os.path.join(DATA_PATH, x),
        data_transforms[x]
    )
    for x in ['train', 'val']
}
dataloaders = {
    x: torch.utils.data.DataLoader(
        image_datasets[x],
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=4
    )
    for x in ['train', 'val']
}
class_names = image_datasets['train'].classes
print('new classes:', class_names)

#### 4. Data sample visualization

In [None]:
def imshow(inp, title=None):
    """
    Plot image for input tensor.

    """
    inp = inp.numpy().transpose((1, 2, 0))
    mean, std = np.array(MEAN), np.array(STD)
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title:
        plt.title(title)


# get a batch of training data
inputs, classes = next(iter(dataloaders['train']))
# make a grid from batch
out = torchvision.utils.make_grid(inputs)
imshow(out, title=[class_names[x] for x in classes])

In [None]:
# number of data samples (images) in dataloader
len(dataloaders['train'].dataset)

In [None]:
# number of batches in dataloader
len(dataloaders['train'])

#### 5. Training loop

In [None]:
def train_model(model, criterion, optimizer, 
                scheduler, device, n_epochs):
    start_time = time.time()
    best_model_weights = copy.deepcopy(model.state_dict())
    best_acc = 0
    best_epoch = 0
    losses =[]
    val_losses = []
    accs = []
    val_accs = []

    for epoch in range(n_epochs):

        # TRAIN PART

        model.train()
        running_loss = 0
        running_corrects = 0
        for i, (inputs, labels) in enumerate(dataloaders['train']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            # backpropagation part
            optimizer.zero_grad()
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # collecting stats
            running_loss += loss.item()
            running_corrects += torch.sum(preds == labels.data)

            # tarining progress bar
            if i % 10 == 0:
                print(
                    'Epoch {} - training [{}/{} ({:.0f}%)] loss: {:.3f}, accuracy: {:.2f}%'.format(
                        epoch,
                        i * len(inputs),
                        len(dataloaders['train'].dataset),
                        100 * i / len(dataloaders['train']),
                        running_loss / (i + 1),
                        float(running_corrects * 100) / float(BATCH_SIZE * (i + 1))
                    ),
                    end='\r'
                )

        # epoch training stats
        epoch_loss = running_loss / len(dataloaders['train'])
        epoch_acc = running_corrects.double() / len(dataloaders['train'].dataset)
        losses.append(epoch_loss)
        accs.append(epoch_acc)

        # VALIDATION PART

        model.eval()
        running_loss = 0
        running_corrects = 0
        with torch.no_grad():
            for i, (inputs, labels) in enumerate(dataloaders['val']):

                # no training here just predictions
                inputs = inputs.to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                # collecting stats
                running_loss += loss.item()
                running_corrects += torch.sum(preds == labels.data)

        # epoch validation stats
        epoch_val_loss = running_loss / len(dataloaders['val'])
        epoch_val_acc = running_corrects.double() / len(dataloaders['val'].dataset)
        val_losses.append(epoch_val_loss)
        val_accs.append(epoch_val_acc)

        # run step for learning rate scheduler
        scheduler.step()

        # saving best results
        # NOTE: it is a good practice to use
        # validation loss as an indicator
        # where to stop training process
        if epoch_val_acc > best_acc:
            best_acc = epoch_val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            best_epoch = epoch

        print('Epoch {} - validation loss: {:.3f}, validation accuracy: {:.2f}%        '.format(
            epoch,
            epoch_val_loss,
            epoch_val_acc * 100
        ))

    # final results
    time_elapsed = time.time() - start_time
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best validation accuracy: {best_acc * 100:.2f}%, best epoch {best_epoch}')

    # load best model weights
    model.load_state_dict(best_model_wts)

    return model, losses, accs, val_losses, val_accs

#### 6. Model to finetune

Let's take [ResNet18](https://pytorch.org/vision/main/models/generated/torchvision.models.resnet18.html) image classification pre-trained model as a backbone for our CNN.

In [None]:
model_ft = models.resnet18(weights='IMAGENET1K_V1')

##### 6.1. About the model

In [None]:
model_ft

In [None]:
model_ft.fc

In [None]:
model_ft.fc.in_features

In [None]:
def json_data(file_path):
    with open(file_path) as file:
        access_data = json.load(file)
    return access_data


imagenet_classes = json_data(
    file_path=f'{DATA_PATH.replace("food-101/", "imagenet_class_index.json")}'
)
print('all classes:', len(imagenet_classes.keys()))

In [None]:
imagenet_classes['100']

In [None]:
for k, v in imagenet_classes.items():
    print(v[1], end=' ')

In [None]:
def show_model_prediction(model, img_path, device,
                          classes=None, class_names=None):
    was_training = model.training
    model.eval()

    img = Image.open(img_path)
    img = data_transforms['val'](img)
    img = img.unsqueeze(0)
    img = img.to(device)

    with torch.no_grad():
        outputs = model(img)
        _, preds = torch.max(outputs, 1)

        ax = plt.subplot(2, 2, 1)
        ax.axis('off')
        if classes:
            title = classes[str(preds[0].numpy())]
        if class_names:
            title = class_names[preds[0]]
        ax.set_title(title)
        imshow(img.cpu().data[0])

        model.train(mode=was_training)

In [None]:
show_model_prediction(
    model_ft,
    img_path='/home/jovyan/__DATA/DLBA_F24/topic_03/test.jpg',
    device=DEVICE,
    classes=imagenet_classes
)

##### 6.2. Modify model

In [None]:
# we have 2 classes only, so we will replace last layer
# with the new one with 20 outputs only
# it can be generalized to `nn.Linear(num_ftrs, len(class_names))`
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)

# ...and put model to our device to work with
model_ft = model_ft.to(DEVICE)

#### 7. Train and evaluate

In [None]:
# our criterion for loss
criterion = nn.CrossEntropyLoss()

# set parameters for optimizer
optimizer_ft = torch.optim.SGD(
    model_ft.parameters(),
    lr=LR,
    momentum=.9
)
# or we can use another optimizer
# you can do this experiment
# for HOME ASSIGNMENT part
#optimizer_ft = torch.optim.Adam(model_ft.parameters(), lr=0.001)

# decay learning rate (LR) by a factor of .1 every 5 epochs
# you may also experiment with parameters of `step_size` and `decay`
# for HOME ASSIGNMENT part
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer_ft,
    step_size=5,
    gamma=.1
)

##### 7.1. Finetuning

In [None]:
model_ft, losses, accs, val_losses, val_accs = train_model(
    model_ft,
    criterion,
    optimizer_ft,
    lr_scheduler,
    device=DEVICE,
    n_epochs=N_EPOCHS
)

In [None]:
plt.figure(figsize=(16, 4))
plt.subplot(1, 2, 1)
plt.xlabel('epochs')
plt.title('Loss')
plt.plot(losses, label='train loss')
plt.plot(val_losses, label='val loss')
plt.legend()
plt.subplot(1, 2, 2)
plt.xlabel('epochs')
plt.title('Accuracy')
plt.plot(accs, label='train accuracy')
plt.plot(val_accs, label='val accuracy')
plt.legend()
plt.show()

##### 7.2. Result

In [None]:
show_model_prediction(
    model_ft,
    img_path='/home/jovyan/__DATA/DLBA_F24/topic_03/food-101/macarons/1003207.jpg', 
    device=DEVICE,
    class_names=class_names
)

In [None]:
show_model_prediction(
    model_ft,
    img_path='/home/jovyan/__DATA/DLBA_F24/topic_03/food-101/donuts/1006079.jpg',
    device=DEVICE,
    class_names=class_names
)

### <font color='red'>HOME ASSIGNMENT</font>

There are many thing you can do with the finetuning process. Here are few of them:
1. __(BASE)__ Try different data augmentation techniques and monitor the model finetuning performance. Does it get better? How has the speed of training changed? What about accuracy?
2. __(ADVANCED)__ Make a few trials with different hyperparameters (learning rate, number of epochs, batch size) and observe the model's performance (speed and final accuracy).
3. __(HARDCORE)__ Add one more class for the model to learn.