# Waste Classifier PyTorch Model

## Goal
Create a PyTorch model that can distinguish between 6 classes:
- cardboard
- glass
- metal
- paper
- plastic
- trash

## 0 - Setup

In [1]:
import torch
import torchvision

print(torch.__version__) # 1.12+
print(torchvision.__version__) # 0.13+

import matplotlib.pyplot as plt

from torch import nn
from torchvision import transforms

try:
    from scripts import data_setup, engine, utils
    from helper_functions import download_data, set_seeds, plot_loss_curves
    print("Base imports done.")
except:
    print("Couldn't find helper scripts, downloading from Github...")
    !git clone https://github.com/tznpau/waste-classifier
    !mv waste-classifier/scripts .
    !mv waste-classifier/helper_functions.py .
    !rm -rf waste-classifier
    from scripts import data_setup, engine, utils
    from helper_functions import download_data, set_seeds, plot_loss_curves

2.2.1+cu121
0.17.1+cu121
Base imports done.


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [3]:
torch.cuda.is_available()

True

## 1. Getting data

In [4]:
trash_dataset_path = download_data(source="https://github.com/tznpau/waste-classifier/raw/main/data/trash_dataset.zip",
                                   destination="trash_dataset")
trash_dataset_path

[INFO] data/trash_dataset directory exists, skipping download.


PosixPath('data/trash_dataset')

In [5]:
# Setup training and test data directories
train_dir = trash_dataset_path / "train"
test_dir = trash_dataset_path / "test"

train_dir, test_dir

(PosixPath('data/trash_dataset/train'), PosixPath('data/trash_dataset/test'))

## 2. Deployment questions
1. ideal model scenario ?
2. where is the model going to go ?
3. how is the model going to function ?

**ideal use case**
- performs at 90%+ accuracy
- performs fast: 30fps

## 3. EffNetB2 feature extractor

https://pytorch.org/vision/stable/models/generated/torchvision.models.efficientnet_b2.html#torchvision.models.EfficientNet_B2_Weights

In [6]:
!rm -rf /root/.cache/torch/hub/checkpoints/*

In [7]:
effnetb2_weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT

effnetb2_transforms = effnetb2_weights.transforms()

effnetb2 = torchvision.models.efficientnet_b2(weights=effnetb2_weights)

# freeze base layers
for param in effnetb2.parameters():
    param.requires_grad = False

Downloading: "https://download.pytorch.org/models/efficientnet_b2_rwightman-c35c1473.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b2_rwightman-c35c1473.pth
100%|██████████| 35.2M/35.2M [00:00<00:00, 70.1MB/s]


In [9]:
try:
    from torchinfo import summary
except:
    print("[INFO] Couldn't find torchinfo... installing it.")
    !pip install -q torchinfo
    from torchinfo import summary

[INFO] Couldn't find torchinfo... installing it.


In [10]:


# summary(effnetb2,
#         input_size=(32, 3, 224, 224), # make sure this is "input_size", not "input_shape" (batch_size, color_channels, height, width)
#         verbose=0,
#         col_names=["input_size", "output_size", "num_params", "trainable"],
#         col_width=20,
#         row_settings=["var_names"]
# )

In [11]:
effnetb2.classifier

Sequential(
  (0): Dropout(p=0.3, inplace=True)
  (1): Linear(in_features=1408, out_features=1000, bias=True)
)

In [12]:
set_seeds()

effnetb2.classifier = nn.Sequential(
    nn.Dropout(p=0.3, inplace=True),
    nn.Linear(in_features=1408, out_features=6, bias=True)
)

In [13]:
effnetb2.classifier

Sequential(
  (0): Dropout(p=0.3, inplace=True)
  (1): Linear(in_features=1408, out_features=6, bias=True)
)

So after freezing the base layer I adapted the EffNetB2 architecture to suit my use case.

### 3.1 Creating an EffNetB2 feature extractor

In [14]:
def create_effnetb2_model(num_classes:int=6,
                          seed:int=42):
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    transforms = weights.transforms()
    model = torchvision.models.efficientnet_b2(weights=weights).to(device)

    for param in model.parameters():
        param.requires_grad = False

    torch.manual_seed(seed)
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),
        nn.Linear(in_features=1408, out_features=num_classes)
    ).to(device)

    return model, transforms

In [15]:
effnetb2, effnetb2_transforms = create_effnetb2_model(num_classes=6, seed=42)

In [16]:
effnetb2_transforms

ImageClassification(
    crop_size=[288]
    resize_size=[288]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)

### 3.2 DataLoaders for EffNetB2

In [17]:
from scripts import data_setup

train_dataloader_effnetb2, test_dataloader_effnetb2, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                                                  test_dir=test_dir,
                                                                                                  transform=effnetb2_transforms,
                                                                                                  batch_size=32)

Let's check that the split was done correctly during `data_setup.create_dataloaders`.

In [18]:
len(train_dataloader_effnetb2), len(test_dataloader_effnetb2), class_names

(64, 16, ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash'])

We have 64 batches for the train dataloader and 16 batches for the test dataloader.

### 3.3 Training EffNetB2 feature extractor

In [19]:
from scripts import engine

LEARNING_RATE = 1e-3
EPOCHS = 10

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=effnetb2.parameters(),
                             lr=LEARNING_RATE)

set_seeds(42)

# effnetb2_results = engine.train(model=effnetb2,
#                                 train_dataloader=train_dataloader_effnetb2,
#                                 test_dataloader=test_dataloader_effnetb2,
#                                 epochs=EPOCHS,
#                                 optimizer=optimizer,
#                                 loss_fn=loss_fn,
#                                 device=device)

In [20]:
from helper_functions import plot_loss_curves

# plot_loss_curves(effnetb2_results)

## 4. ViT feature extractor

https://pytorch.org/vision/main/models/generated/torchvision.models.vit_b_16.html#torchvision.models.ViT_B_16_Weights

In [21]:
vit = torchvision.models.vit_b_16()
vit.heads

Sequential(
  (head): Linear(in_features=768, out_features=1000, bias=True)
)

### 4.1 Creating a ViT feature extractor

In [34]:
def create_vit_model(num_classes:int=6,
                     seed:int=42):
    weights = torchvision.models.ViT_B_16_Weights.DEFAULT
    transforms = weights.transforms()
    model = torchvision.models.vit_b_16(weights=weights).to(device)

    for param in model.parameters():
        param.requires_grad = False

    torch.manual_seed(seed)
    model.heads = nn.Sequential(
        nn.Linear(in_features=768, out_features=num_classes)
    )

    return model, transforms

In [35]:
vit, vit_transforms = create_vit_model()

In [36]:
from torchinfo import summary

# Print ViT feature extractor model summary (uncomment for full output)
summary(vit,
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
VisionTransformer (VisionTransformer)                        [1, 3, 224, 224]     [1, 6]               768                  Partial
├─Conv2d (conv_proj)                                         [1, 3, 224, 224]     [1, 768, 14, 14]     (590,592)            False
├─Encoder (encoder)                                          [1, 197, 768]        [1, 197, 768]        151,296              False
│    └─Dropout (dropout)                                     [1, 197, 768]        [1, 197, 768]        --                   --
│    └─Sequential (layers)                                   [1, 197, 768]        [1, 197, 768]        --                   False
│    │    └─EncoderBlock (encoder_layer_0)                   [1, 197, 768]        [1, 197, 768]        (7,087,872)          False
│    │    └─EncoderBlock (encoder_layer_1)                   [1, 197, 768]        [1, 1

### 4.2 Creating DataLoaders for ViT

In [37]:
from scripts import data_setup

train_dataloader_vit, test_dataloader_vit, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                                                  test_dir=test_dir,
                                                                                                  transform=vit_transforms,
                                                                                                  batch_size=32)

In [38]:
len(train_dataloader_vit), len(test_dataloader_vit), class_names

(64, 16, ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash'])

### 4.3 Training ViT feature extractor

In [39]:
from scripts import engine

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=vit.parameters(),
                             lr=1e-3)

set_seeds()

vit_results = engine.train(
    model=vit,
    train_dataloader=train_dataloader_vit,
    test_dataloader=test_dataloader_vit,
    epochs=10,
    optimizer=optimizer,
    loss_fn=loss_fn,
    device=device
)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.8425 | train_acc: 0.7214 | test_loss: 0.5532 | test_acc: 0.8460
Epoch: 2 | train_loss: 0.4359 | train_acc: 0.8625 | test_loss: 0.4552 | test_acc: 0.8541
Epoch: 3 | train_loss: 0.3405 | train_acc: 0.8979 | test_loss: 0.4146 | test_acc: 0.8717
Epoch: 4 | train_loss: 0.2846 | train_acc: 0.9165 | test_loss: 0.3918 | test_acc: 0.8733
Epoch: 5 | train_loss: 0.2435 | train_acc: 0.9277 | test_loss: 0.3728 | test_acc: 0.8756
Epoch: 6 | train_loss: 0.2152 | train_acc: 0.9409 | test_loss: 0.3671 | test_acc: 0.8714
Epoch: 7 | train_loss: 0.1888 | train_acc: 0.9512 | test_loss: 0.3539 | test_acc: 0.8761
Epoch: 8 | train_loss: 0.1747 | train_acc: 0.9577 | test_loss: 0.3535 | test_acc: 0.8775
Epoch: 9 | train_loss: 0.1563 | train_acc: 0.9624 | test_loss: 0.3498 | test_acc: 0.8778
Epoch: 10 | train_loss: 0.1388 | train_acc: 0.9707 | test_loss: 0.3428 | test_acc: 0.8820
