In [None]:
%pip install torch torchvision pytorch-lightning wandb matplotlib numpy scikit-learn --quiet

In [None]:
import os
import torch
import torchvision

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision import transforms, datasets
from torch.utils.data import Dataset, DataLoader

import matplotlib.pyplot as plt
import numpy as np
from torchmetrics import Accuracy
from pathlib import Path
from tqdm.auto import tqdm
from torch.utils.tensorboard import SummaryWriter
from helper_functions import train_model, eval_model, save_model

In [None]:
data_path = '100-bird-species'

# Data Loading


### Data Augmentation

In [None]:
class CustomDataset(Dataset):
    def __init__(self, data_path, transform=None):
        self.data_path = data_path
        self.transform = transform
        self.dataset = datasets.ImageFolder(root=self.data_path, transform=self.transform)
        self.classes = self.dataset.classes

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        return self.dataset[idx]

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])

train_dataset = CustomDataset(f'{data_path}/train', transform)
test_dataset = CustomDataset(f'{data_path}/test', transform)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
class_names = train_dataset.classes
class_names

# Model Building

In [None]:
### HYPERPARAMETERS
# image
HEIGHT = 224
WIDTH = 224
CHANNELS = 3
CLASSES = len(class_names)

# training
SEED = 42
DEVICE = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
DEVICE

## MobileNet

### MobileNet Class

In [None]:
import torch
import torch.nn as nn

class DepthwiseSeparableConv(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(DepthwiseSeparableConv, self).__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels, bias=False)
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

class MobileNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(MobileNet, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),

            DepthwiseSeparableConv(32, 64, stride=1),
            DepthwiseSeparableConv(64, 128, stride=2),
            DepthwiseSeparableConv(128, 128, stride=1),
            DepthwiseSeparableConv(128, 256, stride=2),
            DepthwiseSeparableConv(256, 256, stride=1),
            DepthwiseSeparableConv(256, 512, stride=2),

            # 5 Depthwise Separable Conv layers with stride 1
            DepthwiseSeparableConv(512, 512, stride=1),
            DepthwiseSeparableConv(512, 512, stride=1),
            DepthwiseSeparableConv(512, 512, stride=1),
            DepthwiseSeparableConv(512, 512, stride=1),
            DepthwiseSeparableConv(512, 512, stride=1),

            DepthwiseSeparableConv(512, 1024, stride=2),
            DepthwiseSeparableConv(1024, 1024, stride=1),

            nn.AdaptiveAvgPool2d((1, 1)),
        )
        self.fc = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = self.model(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc(x)
        return x

In [None]:
# create a model instance
torch.manual_seed(SEED)
mobilenet = MobileNet(num_classes=CLASSES)
mobilenet.state_dict()

### Functions

In [None]:
# define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mobilenet.parameters(), lr=0.01)
accuracy_fn = Accuracy(task="multiclass", num_classes=CLASSES).to(DEVICE)

In [None]:
from torch.utils.tensorboard import SummaryWriter

# default `log_dir` is "runs" - we'll be more specific here
writer = SummaryWriter('runs/mobilenet_experiment_1')

In [None]:
# get some random training images
images, labels = next(iter(train_loader))

In [None]:
# helper function to show an image
# (used in the `plot_classes_preds` function below)
def matplotlib_imshow(img, one_channel=False):
    if one_channel:
        img = img.mean(dim=0)
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))

In [None]:
writer.add_graph(mobilenet, images)
writer.close()

### Training

In [None]:
history = {}
torch.manual_seed(SEED)
mobilenet = mobilenet.to(DEVICE)
for epoch in tqdm(range(1, EPOCHS + 1), desc="Epoch", leave=True):
    train_result = train_model(model=mobilenet,
                               data_loader=train_loader,
                               loss_fn=criterion,
                               optimizer=optimizer,
                               accuracy_fn=accuracy_fn,
                               device=DEVICE,
                               epoch=epoch,
                               writer=writer)
    
    eval_result = eval_model(model=mobilenet,
                             data_loader=test_loader,
                             loss_fn=criterion,
                             accuracy_fn=accuracy_fn,
                             device=DEVICE,
                             epoch=epoch)
    
    history[epoch] = {'train': train_result, 'eval': eval_result}
    print()

## EfficientNetV2

### Class and Instance

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

class MBConv(nn.Module):
    def __init__(self, in_channels, out_channels, expand_ratio, stride):
        super(MBConv, self).__init__()
        self.use_residual = in_channels == out_channels and stride == 1
        hidden_dim = in_channels * expand_ratio
        
        self.expand = nn.Sequential(
            nn.Conv2d(in_channels, hidden_dim, 1, bias=False),
            nn.BatchNorm2d(hidden_dim),
            nn.SiLU()
        ) if expand_ratio != 1 else nn.Identity()
        
        self.depthwise = nn.Sequential(
            nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
            nn.BatchNorm2d(hidden_dim),
            nn.SiLU()
        )
        
        self.project = nn.Sequential(
            nn.Conv2d(hidden_dim, out_channels, 1, bias=False),
            nn.BatchNorm2d(out_channels)
        )

    def forward(self, x):
        residual = x
        x = self.expand(x)
        x = self.depthwise(x)
        x = self.project(x)
        if self.use_residual:
            x += residual
        return x

class EfficientNetV2(nn.Module):
    def __init__(self, num_classes=1000):
        super(EfficientNetV2, self).__init__()
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, 3, 2, 1, bias=False),
            nn.BatchNorm2d(32),
            nn.SiLU()
        )
        
        self.mbconv_layers = nn.Sequential(
            MBConv(32, 16, 1, 1),
            MBConv(16, 32, 4, 2),
            MBConv(32, 32, 4, 1),
            MBConv(32, 64, 4, 2),
            MBConv(64, 64, 4, 1),
            MBConv(64, 128, 6, 2),
            MBConv(128, 128, 6, 1)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(128, 1280, 1, bias=False),
            nn.BatchNorm2d(1280),
            nn.SiLU()
        )
        
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(1280, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.mbconv_layers(x)
        x = self.conv2(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [None]:
# create a model instance
torch.manual_seed(SEED)
effnet = EfficientNetV2(num_classes=CLASSES)
effnet.state_dict()

### Functions

In [None]:
# define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(effnet.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
accuracy_fn = Accuracy(task="multiclass", num_classes=CLASSES).to(DEVICE)

In [None]:
writer = SummaryWriter('runs/effnet_experiment_1')

### Training

In [42]:
EPOCHS = 10

history = {}
torch.manual_seed(SEED)
effnet = effnet.to(DEVICE)
for epoch in tqdm(range(1, EPOCHS + 1), desc="Epoch", leave=True):
    train_result = train_model(model=effnet,
                               data_loader=train_loader,
                               loss_fn=criterion,
                               optimizer=optimizer,
                               accuracy_fn=accuracy_fn,
                               device=DEVICE,
                               epoch=epoch,
                               writer=writer)
    
    eval_result = eval_model(model=effnet,
                             data_loader=test_loader,
                             loss_fn=criterion,
                             accuracy_fn=accuracy_fn,
                             device=DEVICE,
                             epoch=epoch)
    
    history[epoch] = {'train': train_result, 'eval': eval_result}
    print()

KeyboardInterrupt: 

In [None]:
# Save the model
save_model(effnet, 'effnetv1.pth')