# Exercise

## 0. Prerequisites

In [None]:
# import scripts from GitHub
!git clone https://github.com/yhs2773/PyTorch-for-Deep-Learning-Machine-Learning-Full-Course
!mv PyTorch-for-Deep-Learning-Machine-Learning-Full-Course/going_modular .
!mv PyTorch-for-Deep-Learning-Machine-Learning-Full-Course/helper_functions.py .
!rm -rf PyTorch-for-Deep-Learning-Machine-Learning-Full-Course

Cloning into 'PyTorch-for-Deep-Learning-Machine-Learning-Full-Course'...
remote: Enumerating objects: 298, done.[K
remote: Counting objects: 100% (80/80), done.[K
remote: Compressing objects: 100% (48/48), done.[K
remote: Total 298 (delta 39), reused 71 (delta 32), pack-reused 218[K
Receiving objects: 100% (298/298), 139.19 MiB | 16.75 MiB/s, done.
Resolving deltas: 100% (153/153), done.
Updating files: 100% (45/45), done.


In [None]:
# load libraries
import torch
import torchvision

import matplotlib.pyplot as plt
import pathlib
import pandas as pd
import numpy as np

from torch import nn
from torchvision import transforms, models

from going_modular import data_setup, engine, predictions, utils
from helper_functions import download_data, set_seeds, plot_loss_curves

from PIL import Image
from pathlib import Path
from tqdm.auto import tqdm

In [None]:
# device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
# get data
data_20 = download_data(source='https://github.com/yhs2773/PyTorch-for-Deep-Learning-Machine-Learning-Full-Course/raw/main/data/pizza_steak_sushi_20_percent.zip',
                        destination="pizza_steak_sushi_20_percent")

[INFO] Did not find data/pizza_steak_sushi_20_percent directory, creating one...
[INFO] Downloading pizza_steak_sushi_20_percent.zip from https://github.com/yhs2773/PyTorch-for-Deep-Learning-Machine-Learning-Full-Course/raw/main/data/pizza_steak_sushi_20_percent.zip...
[INFO] Unzipping pizza_steak_sushi_20_percent.zip data...


In [None]:
# set directories
train_dir = data_20 / "train"
test_dir = data_20 / "test"

train_dir, test_dir

(PosixPath('data/pizza_steak_sushi_20_percent/train'),
 PosixPath('data/pizza_steak_sushi_20_percent/test'))

In [None]:
# create model function
def create_model(num_classes: int=3,
                 seed: int=42,
                 is_effnetb2: bool=True):
    if is_effnetb2:
        weights = models.EfficientNet_B2_Weights.DEFAULT
        transforms = weights.transforms()
        model = models.efficientnet_b2(weights=weights)

        for param in model.parameters():
            param.requires_grad = False

        torch.manual_seed(seed)
        model.classifier = nn.Sequential(
            nn.Dropout(0.3, inplace=True),
            nn.Linear(in_features=1408, out_features=num_classes)
        )
    else:
        weights = models.ViT_B_16_Weights.DEFAULT
        transforms = weights.transforms()
        model = models.vit_b_16(weights=weights)

        for param in model.parameters():
            param.requires_grad = False

        torch.manual_seed(seed)
        model.heads = nn.Sequential(
            nn.Linear(in_features=768,
                      out_features=num_classes)
        )

    return model, transforms

In [None]:
# EffNetB2 model
effnetb2, effnetb2_transforms = create_model(num_classes=3,
                                             seed=42,
                                             is_effnetb2=True)

In [None]:
# ViT model
vit, vit_transforms = create_model(num_classes=3,
                                   seed=42,
                                   is_effnetb2=False)

In [None]:
# create EffNetB2 dataloaders
train_dataloader_effnetb2, test_dataloader_effnetb2, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=effnetb2_transforms,
    batch_size=32
)

In [None]:
# create ViT dataloaders
train_dataloader_vit, test_dataloader_vit, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=vit_transforms,
    batch_size=32
)

In [None]:
# optimizer
optimizer = torch.optim.Adam(params=effnetb2.parameters(),
                             lr=1e-3)

# loss function
loss_fn = torch.nn.CrossEntropyLoss()

# set seeds and train EffNetB2 model
set_seeds()
effnetb2_results = engine.train(model=effnetb2,
                                train_dataloader=train_dataloader_effnetb2,
                                test_dataloader=test_dataloader_effnetb2,
                                loss_fn=loss_fn,
                                optimizer=optimizer,
                                epochs=10,
                                device=device)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 0 | Train loss: 0.9839 | Train acc: 0.5667 | Test loss: 0.7393 | Test acc: 0.9409
Epoch: 1 | Train loss: 0.7135 | Train acc: 0.8396 | Test loss: 0.5862 | Test acc: 0.9409
Epoch: 2 | Train loss: 0.5874 | Train acc: 0.8958 | Test loss: 0.4891 | Test acc: 0.9563
Epoch: 3 | Train loss: 0.4488 | Train acc: 0.9146 | Test loss: 0.4338 | Test acc: 0.9409
Epoch: 4 | Train loss: 0.4277 | Train acc: 0.9125 | Test loss: 0.3907 | Test acc: 0.9443
Epoch: 5 | Train loss: 0.4392 | Train acc: 0.8896 | Test loss: 0.3525 | Test acc: 0.9688
Epoch: 6 | Train loss: 0.4246 | Train acc: 0.8771 | Test loss: 0.3263 | Test acc: 0.9563
Epoch: 7 | Train loss: 0.3885 | Train acc: 0.8979 | Test loss: 0.3465 | Test acc: 0.9443
Epoch: 8 | Train loss: 0.3795 | Train acc: 0.8812 | Test loss: 0.3127 | Test acc: 0.9193
Epoch: 9 | Train loss: 0.3752 | Train acc: 0.8688 | Test loss: 0.2811 | Test acc: 0.9625


In [None]:
# optimizer
optimizer = torch.optim.Adam(params=vit.parameters(),
                             lr=1e-3)

# loss function
loss_fn = torch.nn.CrossEntropyLoss()

# set seeds and train ViT model
set_seeds()
vit_results = engine.train(model=vit,
                           train_dataloader=train_dataloader_vit,
                           test_dataloader=test_dataloader_vit,
                           loss_fn=loss_fn,
                           optimizer=optimizer,
                           epochs=10,
                           device=device)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 0 | Train loss: 0.7020 | Train acc: 0.7521 | Test loss: 0.2714 | Test acc: 0.9381
Epoch: 1 | Train loss: 0.2532 | Train acc: 0.9062 | Test loss: 0.1672 | Test acc: 0.9602
Epoch: 2 | Train loss: 0.1764 | Train acc: 0.9542 | Test loss: 0.1273 | Test acc: 0.9693
Epoch: 3 | Train loss: 0.1276 | Train acc: 0.9625 | Test loss: 0.1074 | Test acc: 0.9722
Epoch: 4 | Train loss: 0.1159 | Train acc: 0.9646 | Test loss: 0.0953 | Test acc: 0.9784
Epoch: 5 | Train loss: 0.1274 | Train acc: 0.9375 | Test loss: 0.0832 | Test acc: 0.9722
Epoch: 6 | Train loss: 0.0897 | Train acc: 0.9771 | Test loss: 0.0845 | Test acc: 0.9784
Epoch: 7 | Train loss: 0.0919 | Train acc: 0.9812 | Test loss: 0.0764 | Test acc: 0.9722
Epoch: 8 | Train loss: 0.0922 | Train acc: 0.9792 | Test loss: 0.0734 | Test acc: 0.9784
Epoch: 9 | Train loss: 0.0658 | Train acc: 0.9833 | Test loss: 0.0644 | Test acc: 0.9847


In [None]:
# save models
utils.save_model(model=effnetb2,
                 target_dir="models",
                 model_name="pretrained_effnetb2_3_cls.pth")

utils.save_model(model=vit,
                 target_dir="models",
                 model_name="pretrained_vit_3_cls.pth")

[INFO] Saving model to: models/pretrained_effnetb2_3_cls.pth
[INFO] Saving model to: models/pretrained_vit_3_cls.pth


In [None]:
# get the model size
effnetb2_model_size = Path("models/pretrained_effnetb2_3_cls.pth").stat().st_size // (1024**2)
vit_model_size = Path("models/pretrained_vit_3_cls.pth").stat().st_size // (1024**2)

In [None]:
# count the num of params in the models
effnetb2_total_params = sum(torch.numel(param) for param in effnetb2.parameters())
vit_total_params = sum(torch.numel(param) for param in vit.parameters())

In [None]:
# get model stats
effnetb2_stats = {"test_loss": effnetb2_results["test_loss"][-1],
                  "test_acc": effnetb2_results["test_acc"][-1],
                  "num_params": effnetb2_total_params,
                  "model_size (MB)": effnetb2_model_size}

vit_stats = {"test_loss": vit_results["test_loss"][-1],
             "test_acc": vit_results["test_acc"][-1],
             "num_params": vit_total_params,
             "model_size (MB)": vit_model_size}

## 1. Make and time predictions with both feature extractor models on the test dataset using the GPU (`device="cuda"`). Compare the model's prediction times on GPU vs CPU - does this close the gap between them? As in, does making predictions on the GPU make the ViT feature extractor prediction times closer to the EffNetB2 feature extractor prediction times?
- You'll find code to do these steps in [section 5. Making predictions with our trained models and timing them](https://www.learnpytorch.io/09_pytorch_model_deployment/#5-making-predictions-with-our-trained-models-and-timing-them) and [section 6. Comparing model results, prediction times and size](https://www.learnpytorch.io/09_pytorch_model_deployment/#6-comparing-model-results-prediction-times-and-size).

In [None]:
# get test data paths
test_data_paths = list(Path(test_dir).glob("*/*.jpg"))

In [None]:
# GPU model results
effnetb2_results_gpu = predictions.pred_and_store(paths=test_data_paths,
                                                  model=effnetb2,
                                                  transform=effnetb2_transforms,
                                                  class_names=class_names,
                                                  device="cuda")

vit_results_gpu = predictions.pred_and_store(paths=test_data_paths,
                                             model=vit,
                                             transform=vit_transforms,
                                             class_names=class_names,
                                             device="cuda")

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

In [None]:
# create a dataframe
effnetb2_results_gpu_df = pd.DataFrame(effnetb2_results_gpu)
vit_results_gpu_df = pd.DataFrame(vit_results_gpu)

In [None]:
# calculate the average time for prediction
effnetb2_gpu_average_time = round(effnetb2_results_gpu_df.time_for_pred.mean(), 4)
vit_gpu_average_time = round(vit_results_gpu_df.time_for_pred.mean(), 4)

In [None]:
# add it to the dataframe
effnetb2_results_gpu_df['time_per_pred_gpu'] = effnetb2_gpu_average_time
vit_results_gpu_df['time_per_pred_gpu'] = vit_gpu_average_time

In [None]:
# CPU model results
effnetb2_results_cpu = predictions.pred_and_store(paths=test_data_paths,
                                                  model=effnetb2,
                                                  transform=effnetb2_transforms,
                                                  class_names=class_names,
                                                  device="cpu")

vit_results_cpu = predictions.pred_and_store(paths=test_data_paths,
                                             model=vit,
                                             transform=vit_transforms,
                                             class_names=class_names,
                                             device="cpu")

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

In [None]:
# create a dataframe
effnetb2_results_cpu_df = pd.DataFrame(effnetb2_results_cpu)
vit_results_cpu_df = pd.DataFrame(vit_results_cpu)

In [None]:
# calculate the average time for prediction
effnetb2_cpu_average_time = round(effnetb2_results_cpu_df.time_for_pred.mean(), 4)
vit_cpu_average_time = round(vit_results_cpu_df.time_for_pred.mean(), 4)

In [None]:
# add it to the dataframe
effnetb2_results_cpu_df['time_per_pred_cpu'] = effnetb2_cpu_average_time
vit_results_cpu_df['time_per_pred_cpu'] = vit_cpu_average_time

In [None]:
# add average prediction time of the models
effnetb2_stats['time_per_pred_gpu'] = effnetb2_gpu_average_time
effnetb2_stats['time_per_pred_cpu'] = effnetb2_cpu_average_time
vit_stats['time_per_pred_gpu'] = vit_gpu_average_time
vit_stats['time_per_pred_cpu'] = vit_cpu_average_time

In [None]:
# create a stats dataframe
df = pd.DataFrame([effnetb2_stats, vit_stats])

# add/update columns
df["model"] = ["EffNetB2", "ViT"]
df["test_acc"] = round(df["test_acc"] * 100, 2)
df["GPU vs CPU"] = round(df['time_per_pred_cpu'] / df['time_per_pred_gpu'], 2)

df

Unnamed: 0,test_loss,test_acc,num_params,model_size (MB),time_per_pred_gpu,time_per_pred_cpu,model,GPU vs CPU
0,0.281087,96.25,7705221,29,0.0247,0.1139,EffNetB2,4.61
1,0.064435,98.47,85800963,327,0.0238,0.4298,ViT,18.06


As seen in the last column of the dataframe, EffNetB2 on GPU is **4.61x** faster and ViT on GPU is **18.06x** faster than CPU models

## 2. The ViT feature extractor seems to have more learning capacity (due to more parameters) than EffNetB2, how does it go on the larger 20% split of the entire Food101 dataset?
- Train a ViT feature extractor on the 20% Food101 dataset for 5 epochs, just like we did with EffNetB2 in [section 10. Creating FoodVision Big](https://www.learnpytorch.io/09_pytorch_model_deployment/#10-creating-foodvision-big).


In [None]:
# create a model corresponding to the data labels
vit_food101, vit_transforms = create_model(num_classes=101,
                                           is_effnetb2=False)

In [None]:
# add augmentation to vit_transforms
food101_train_transforms_vit = transforms.Compose([
    transforms.TrivialAugmentWide(),
    vit_transforms
])

In [None]:
# get Food101 dataset
train_food101 = torchvision.datasets.Food101(root='data',
                                             split='train',
                                             transform=food101_transforms_vit,
                                             download=True)

test_food101 = torchvision.datasets.Food101(root='data',
                                            split='test',
                                            transform=vit_transforms,
                                            download=True)

food101_class_names = train_food101.classes

In [None]:
# get 20% from Food101 dataset
train_food101_20, _ = torch.utils.data.random_split(dataset=train_food101,
                                                    lengths=[0.2, 0.8],
                                                    generator=torch.manual_seed(42))

test_food101_20, _ = torch.utils.data.random_split(dataset=test_food101,
                                                   lengths=[0.2, 0.8],
                                                   generator=torch.manual_seed(42))

len(train_food101_20), len(test_food101_20)

In [None]:
# create dataloaders
train_dataloader_food101_20 = torch.utils.data.DataLoader(dataset=train_food101_20,
                                                          batch_size=32,
                                                          shuffle=True)

test_dataloader_food101_20 = torch.utils.data.DataLoader(dataset=test_food101_20,
                                                         batch_size=32,
                                                         shuffle=False)

In [None]:
# set loss fn and optimizer
loss_fn = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(params=vit_food101.parameters(),
                             lr=1e-3)

# train the model
set_seeds()
vit_food101_20_results = engine.train(model=vit_food101,
                                      train_dataloder=train_dataloader_food101_20,
                                      test_dataloader=test_dataloader_food101_20,
                                      loss_fn=loss_fn,
                                      optimizer=optimizer,
                                      epochs=5,
                                      device=device)

In [None]:
plot_loss_curves(vit_food101_20_results)

## 3. Make predictions across the 20% Food101 test dataset with the ViT feature extractor from exercise 2 and find the "most wrong" predictions.
- The predictions will be the ones with the highest prediction probability but with the wrong predicted label.
- Write a sentence or two about why you think the model got these predictions wrong.

In [None]:
# create a test_dataloader with batch_size 1
test_dataloader_food101_20_bs_1 = torch.utils.data.DataLoader(dataset=test_food101_20,
                                                              batch_size=1,
                                                              shuffle=False)

In [None]:
# make predictions
vit_food101_pred_list = []

vit_food101 = vit_food101.to(device)
vit_food101.eval()


for X, y in tqdm(test_dataloader_food101_20_bs_1):
    pred_dict = {}

    X, y = X.to(device), y.to(device)

    with torch.inference_mode():
        pred_probs = torch.softmax(vit_food101(X), dim=1)
        pred_label = torch.argmax(pred_probs, dim=1)

        pred_dict['pred_probs'] = pred_probs.max().cpu().numpy()
        pred_dict['pred_label'] = pred_class.cpu().numpy().item()
        pred_dict['true_label'] = y.cpu().numpy().item()

    vit_food101_pred_list.append(pred_dict)
    print(pred_dict)
    break

In [None]:
# turn it into a dataframe
vit_food101_pred_df = pd.DataFrame(vit_food101_pred_list)

In [None]:
# create various columns
vit_food101_pred_df['correct'] = vit_food101_pred_df['pred_label'] == vit_food101_pred_df['true_label']
vit_food101_pred_df['pred_class'] = food101_class_names[vit_food101_pred_df['pred_label']]
vit_food101_pred_df['true_class'] = food101_class_names[vit_food101_pred_df['true_label']]

In [None]:
# count the number of correct & incorrect
vit_food101_pred_df['correct'].value_counts()

In [None]:
# see only False and sort values by probabilities
vit_food101_pred_df[vit_food101_pred_df['correct'] == False].sort_values(ascending=False)['pred_probs']

## 4. Evaluate the ViT feature extractor across the whole Food101 test dataset rather than just the 20% version, how does it perform?
- Does it beat the original Food101 paper's best result of 56.4% accuracy?

## 5. Head to Paperswithcode.com and find the current best performing model on the Food101 dataset.
- What model architecture does it use?

## 6. Write down 1-3 potential failure points of our deployed FoodVision models and what some potential solutions might be.
- For example, what happens if someone was to upload a photo that wasn't of food to our FoodVision Mini model?

## 7. Pick any dataset from [`torchvision.datasets`](https://pytorch.org/vision/stable/datasets.html) and train a feature extractor model on it using a model from [`torchvision.models`](https://pytorch.org/vision/stable/models.html) (you could use one of the model's we've already created, e.g. EffNetB2 or ViT) for 5 epochs and then deploy your model as a Gradio app to Hugging Face Spaces.
- You may want to pick smaller dataset/make a smaller split of it so training doesn't take too long.
- I'd love to see your deployed models! So be sure to share them in Discord or on the [course GitHub Discussions page](https://github.com/mrdbourke/pytorch-deep-learning/discussions).