# Waste Classifier Improved PyTorch Model

## Goal

Improve previously attempted waste classifier `resnet50` feature extractor model by applying the following techniques:
- Increase dataset size (get more data, or better data) -> a great way to do this would be ***data augmentation*** - https://pytorch.org/vision/main/transforms.html
- Add a ***`Dropout`* layer** before the output layer - https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html
- Use ***learning rate decay*** - PyTorch learning rate scheduler - https://pytorch.org/docs/stable/optim.html

## 0 - Setup

In [1]:
import torch
import torchvision

print(torch.__version__) # 1.12+
print(torchvision.__version__) # 0.13+

import matplotlib.pyplot as plt

from torch import nn
from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode

from torchinfo import summary

try:
    from scripts import data_setup_augmented, engine, engine_lr_decay, utils, predictions
    from helper_functions import download_data, set_seeds, plot_loss_curves
    print("Base imports done.")
except:
    print("Couldn't find helper scripts, downloading from Github...")
    !git clone https://github.com/tznpau/waste-classifier
    !mv waste-classifier/scripts .
    !mv waste-classifier/helper_functions.py .
    !rm -rf waste-classifier
    from scripts import data_setup_augmented, engine, engine_lr_decay, utils, predictions
    from helper_functions import download_data, set_seeds, plot_loss_curves

2.1.1
0.16.1
Base imports done.


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

## 1 - Getting data

In [3]:
trash_dataset_path = download_data(source="https://github.com/tznpau/waste-classifier/raw/main/data/trash_dataset.zip",
                                   destination="trash_dataset")
trash_dataset_path

[INFO] data\trash_dataset directory exists, skipping download.


WindowsPath('data/trash_dataset')

In [4]:
# Setup training and test data directories
train_dir = trash_dataset_path / "train"
test_dir = trash_dataset_path / "test"

train_dir, test_dir

(WindowsPath('data/trash_dataset/train'),
 WindowsPath('data/trash_dataset/test'))

## 2 - ResNet50 feature extractors

https://pytorch.org/vision/main/models/generated/torchvision.models.resnet50.html

In [5]:
resnet50 = torchvision.models.resnet50()
# resnet50

In [None]:
try:
    from torchinfo import summary
except:
    print("[INFO] Couldn't find torchinfo... installing it.")
    !pip install -q torchinfo
    from torchinfo import summary

In [6]:
# summary(resnet50,
#         input_size=(1, 3, 224, 224),
#         col_names=["input_size", "output_size", "num_params", "trainable"],
#         col_width=20,
#         row_settings=["var_names"])

In [7]:
resnet50.fc

Linear(in_features=2048, out_features=1000, bias=True)

The last layer of resnet50 is called **`fc`**.

### 2.1 ResNet50 feature extractor - `v1`

In [None]:
def create_resnet50_model_v1(num_classes:int=6,
                          seed:int=42):
    weights = torchvision.models.ResNet50_Weights.DEFAULT
    transforms = weights.transforms()
    model = torchvision.models.resnet50(weights=weights).to(device)

    for param in model.parameters():
        param.requires_grad = False

    torch.manual_seed(seed)
    model.fc = nn.Linear(in_features=2048, out_features=num_classes).to(device)

    return model, transforms

In [None]:
resnet50_v1, resnet50_transforms_auto = create_resnet50_model(num_classes=6, seed=42)

In [None]:
resnet50_v1.fc

#### Auto created transforms

In [None]:
resnet50_transforms_auto

#### Dataloaders for Resnet50 `v1`

In [None]:
from scripts import data_setup

train_dataloader_resnet50, test_dataloader_resnet50, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                                                 test_dir=test_dir,
                                                                                                 transform=resnet50_transforms_auto,
                                                                                                 batch_size=32)

In [None]:
len(train_dataloader_resnet50), len(test_dataloader_resnet50), class_names

### 2.2 ResNet50 augmented feature extractors - `v2` & `v3` (learning rate decay)

#### Dropout layer

Adding a `Dropout` layer after the last pooling layer `avgpool` and before the `Linear` ouput layer `fc` should introduce **regularization** and prevent overfitting.

In [11]:
def create_resnet50_model_v2(num_classes:int=6,
                          seed:int=42,
                          dropout_prob:float=0.5):
    weights = torchvision.models.ResNet50_Weights.DEFAULT
    model = torchvision.models.resnet50(weights=weights).to(device)

    for param in model.parameters():
        param.requires_grad = False

    torch.manual_seed(seed)
    model.fc = nn.Sequential(
        nn.Dropout(p=dropout_prob),
        nn.Linear(in_features=2048, out_features=num_classes)
    ).to(device)

    return model

In [12]:
resnet50_v2= create_resnet50_model_v2(num_classes=6, seed=42, dropout_prob=0.2)
resnet50_v3= create_resnet50_model_v2(num_classes=6, seed=42, dropout_prob=0.2)

In [None]:
resnet50_v2.fc

#### Custom transforms ~ Data augmentation

In [8]:
resnet50_train_transform_custom = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(232, interpolation=InterpolationMode.BILINEAR),
    transforms.GaussianBlur(kernel_size=3),
    transforms.RandomGrayscale(),
    transforms.RandomHorizontalFlip(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

resnet50_test_transform_custom = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(232, interpolation=InterpolationMode.BILINEAR, antialias=True),
    transforms.CenterCrop(224),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [None]:
resnet50_train_transform_custom

In [None]:
resnet50_test_transform_custom

#### Dataloaders for ResNet50 `v2` & `v3`

In [9]:
from scripts import data_setup_augmented

train_dataloader_resnet50_custom, test_dataloader_resnet50_custom, class_names = data_setup_augmented.create_dataloaders(train_dir=train_dir,
                                                                                                           test_dir=test_dir,
                                                                                                           train_transform=resnet50_train_transform_custom,
                                                                                                           test_transform=resnet50_test_transform_custom,
                                                                                                           batch_size=32)

In [10]:
len(train_dataloader_resnet50_custom), len(test_dataloader_resnet50_custom), class_names

(64, 16, ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash'])

#### ResNet50 `v2` summary

In [14]:
summary(resnet50_v2,
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                  Input Shape          Output Shape         Param #              Trainable
ResNet (ResNet)                          [1, 3, 224, 224]     [1, 6]               --                   Partial
├─Conv2d (conv1)                         [1, 3, 224, 224]     [1, 64, 112, 112]    (9,408)              False
├─BatchNorm2d (bn1)                      [1, 64, 112, 112]    [1, 64, 112, 112]    (128)                False
├─ReLU (relu)                            [1, 64, 112, 112]    [1, 64, 112, 112]    --                   --
├─MaxPool2d (maxpool)                    [1, 64, 112, 112]    [1, 64, 56, 56]      --                   --
├─Sequential (layer1)                    [1, 64, 56, 56]      [1, 256, 56, 56]     --                   False
│    └─Bottleneck (0)                    [1, 64, 56, 56]      [1, 256, 56, 56]     --                   False
│    │    └─Conv2d (conv1)               [1, 64, 56, 56]      [1, 64, 56, 56]      (4,096)              False
│    │    

All the original layers of the ResNet50 architecture have been frozen, **except** for the **output layer `fc`**.

## 3 - Training

### 3.1 ResNet `v1` training

In [None]:
from scripts import engine

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=resnet50_v1.parameters(),
                             lr=1e-3)

set_seeds(42)

resnet50_results_v1 = engine.train(model=resnet50_v1,
                                train_dataloader=train_dataloader_resnet50,
                                test_dataloader=test_dataloader_resnet50,
                                epochs=30,
                                optimizer=optimizer,
                                loss_fn=loss_fn,
                                device=device)

#### Loss & Accuracy curves ~ `v1`

In [None]:
from helper_functions import plot_loss_curves

plot_loss_curves(resnet50_results_v1)

### 3.2 ResNet `v2` training ~ without learning rate decay

In [None]:
from scripts import engine

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=resnet50_v2.parameters(),
                             lr=1e-3)

set_seeds(42)

resnet50_results_v2 = engine.train(model=resnet50_v2,
                                train_dataloader=train_dataloader_resnet50_custom,
                                test_dataloader=test_dataloader_resnet50_custom,
                                epochs=30,
                                optimizer=optimizer,
                                loss_fn=loss_fn,
                                device=device)

### 3.3 ResNet `v3` training ~ with learning rate decay

In [None]:
from scripts import engine_lr_decay
from torch.optim.lr_scheduler import StepLR

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=resnet50_v3.parameters(),
                             lr=0.1)
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

set_seeds(42)

resnet50_results_v3 = engine_lr_decay.train(model=resnet50_v3,
                                train_dataloader=train_dataloader_resnet50_custom,
                                test_dataloader=test_dataloader_resnet50_custom,
                                optimizer=optimizer,
                                loss_fn=loss_fn,
                                scheduler=scheduler,
                                epochs=30,
                                device=device)

#### Loss & Accuracy curves ~ `v2` & `v3`

In [None]:
plot_loss_curves(resnet50_results_v2)

In [None]:
plot_loss_curves(resnet50_results_v3)

## 4 - Saving models & Collecting stats

In [None]:
from scripts import utils

utils.save_model(model=resnet50_v1,
                 target_dir="models",
                 model_name="resnet50_v1_model.pth")

utils.save_model(model=resnet50_v2,
                 target_dir="models",
                 model_name="resnet50_v2_model.pth")

utils.save_model(model=resnet50_v3,
                 target_dir="models",
                 model_name="resnet50_v3_model.pth")

In [None]:
from pathlib import Path

size_resnet50_v1 = Path("models/resnet50_v1_model.pth").stat().st_size // (1024*1024)
size_resnet50_v2 = Path("models/resnet50_v2_model.pth").stat().st_size // (1024*1024)
size_resnet50_v3 = Path("models/resnet50_v3_model.pth").stat().st_size // (1024*1024)

In [None]:
resnet50_v1_total_params = sum(torch.numel(param) for param in resnet50_v1.parameters())
resnet50_v2_total_params = sum(torch.numel(param) for param in resnet50_v2.parameters())
resnet50_v3_total_params = sum(torch.numel(param) for param in resnet50_v3.parameters())

In [None]:
resnet50_v1_stats = {
    "test_loss" : resnet50_results_v1["test_loss"][-1],
    "test_acc" : resnet50_results_v1["test_acc"][-1],
    "total_number_of_parameters" : resnet50_v1_total_params,
    "model_size (MB)" : size_resnet50_v1
}

resnet50_v2_stats = {
    "test_loss" : resnet50_results_v2["test_loss"][-1],
    "test_acc" : resnet50_results_v2["test_acc"][-1],
    "total_number_of_parameters" : resnet50_v2_total_params,
    "model_size (MB)" : size_resnet50_v2
}

resnet50_v3_stats = {
    "test_loss" : resnet50_results_v3["test_loss"][-1],
    "test_acc" : resnet50_results_v3["test_acc"][-1],
    "total_number_of_parameters" : resnet50_v3_total_params,
    "model_size (MB)" : size_resnet50_v3
}

## 5 - Making predictions

In [None]:
from pathlib import Path

print(f"Finding all files ending with '.jpg' in directory: {test_dir}")
test_data_paths = list(Path(test_dir).glob("*/*.jpg"))
test_data_paths[:5]

In [None]:
from scripts import predictions

resnet50_v1_predictions = predictions.pred_and_store(paths=test_data_paths,
                                                     model=resnet50_v1,
                                                     transform=resnet50_transforms_auto,
                                                     class_names=class_names,
                                                     device="cpu")

resnet50_v2_predictions = predictions.pred_and_store(paths=test_data_paths,
                                                     model=resnet50_v2,
                                                     transform=resnet50_test_transform_custom,
                                                     class_names=class_names,
                                                     device="cpu")

resnet50_v3_predictions = predictions.pred_and_store(paths=test_data_paths,
                                                     model=resnet50_v3,
                                                     transform=resnet50_test_transform_custom,
                                                     class_names=class_names,
                                                     device="cpu")

In [None]:
resnet50_v1_predictions[:2]

In [None]:
resnet50_v2_predictions[:2]

In [None]:
resnet50_v3_predictions[:2]

In [None]:
import pandas as pd
resnet50_v1_predictions_df = pd.DataFrame(resnet50_v1_predictions)
resnet50_v2_predictions_df = pd.DataFrame(resnet50_v2_predictions)
resnet50_v3_predictions_df = pd.DataFrame(resnet50_v3_predictions)

#### resnet `v1`

In [None]:
resnet50_v1_predictions_df.head()

In [None]:
resnet50_v1_predictions_df.correct.value_counts()

In [None]:
resnet50_v1_average_time_per_pred = round(resnet50_v1_predictions_df.time_for_pred.mean(), 4)
print(f"Average time per prediction: {resnet50_v1_average_time_per_pred} seconds")

In [None]:
resnet50_v1_stats["time_per_pred_cpu"] = resnet50_v1_average_time_per_pred

#### resnet `v2`

In [None]:
resnet50_v2_predictions_df.head()

In [None]:
resnet50_v2_predictions_df.correct.value_counts()

In [None]:
resnet50_v2_average_time_per_pred = round(resnet50_v2_predictions_df.time_for_pred.mean(), 4)
print(f"Average time per prediction: {resnet50_v2_average_time_per_pred} seconds")

In [None]:
resnet50_v2_stats["time_per_pred_cpu"] = resnet50_v2_average_time_per_pred

#### resnet `v3`

In [None]:
resnet50_v3_predictions_df.head()

In [None]:
resnet50_v3_predictions_df.correct.value_counts()

In [None]:
resnet50_v3_average_time_per_pred = round(resnet50_v3_predictions_df.time_for_pred.mean(), 4)
print(f"Average time per prediction: {resnet50_v3_average_time_per_pred} seconds")

In [None]:
resnet50_v3_stats["time_per_pred_cpu"] = resnet50_v3_average_time_per_pred

## 6 - Comparing results

In [None]:
df = pd.DataFrame([resnet50_v1_stats, resnet50_v2_stats, resnet50_v3_stats])
df["model"] = ["resnet50_v1", "resnet50_v2", "resnet50_v3"]
df["test_acc"] = round(df["test_acc"] * 100, 2)
df

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
scatter = ax.scatter(data=df, 
                     x="time_per_pred_cpu", 
                     y="test_acc", 
                     c=["blue", "green", "orange"],
                     s="model_size (MB)") # size the dots by the model sizes

ax.set_title("Waste Classifier Models - Inference Speed vs Performance", fontsize=18)
ax.set_xlabel("Prediction time per image (seconds)", fontsize=14)
ax.set_ylabel("Test accuracy (%)", fontsize=14)
ax.tick_params(axis='both', labelsize=12)
ax.grid(True)

for index, row in df.iterrows():
    ax.annotate(text=row["model"], 
                xy=(row["time_per_pred_cpu"]+0.0006, row["test_acc"]+0.03),
                size=12)

handles, labels = scatter.legend_elements(prop="sizes", alpha=0.5)
model_size_legend = ax.legend(handles, 
                              labels, 
                              loc="lower right", 
                              title="Model size (MB)",
                              fontsize=12)

plt.savefig("resnet50-versions-inference-speed-vs-performance.jpg")
plt.show()