# Classification of cars using CNN and transfer learning

## Introduction

In this project, I will tackle a classification problem using the [stanford car dataset](https://ai.stanford.edu/~jkrause/cars/car_dataset.html). This dataset contains 16,185 images of 196 classes of cars. The data is split into 8,144 training images and 8,041 testing images, where each class has been split roughly in a 50-50 split. Classes are typically at the level of Make, Model, Year, e.g. 2012 Tesla Model S or 2012 BMW M3 coupe. **The objective of this project is to train a deep learning model that takes an image of a car as input and then output its class**.

Due to the limited number of images, it is difficult to train a deep learning model from scratch with this dataset. Therefore I will leverage the power of **transfer learning**, a common approch used in deep learning (especially in computer vision) and use a pretrained efficientnet model and fine-tune it on our own dataset.

## Import

In [15]:
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms

import timm

import time
import os
import tqdm
import PIL.Image as Image
from IPython.display import display
import logging

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
# print(torch.cuda.get_device_name(device))

cpu


In [13]:
DATASET_DIR = './stanford_car_dataset/car_data/car_data/'
NUM_CAR_CLASSES = 196
LOG_PATH = ''

## Dataset

The dataset is stored in the following structure:

└── car_data

    ├── test ── <car_class> ── images
    
    └── train ── <car_class> ── images
    
In other words, in each of the train/test directory, there are 196 subdirectories with unique identifiers (the name of the car class). In each of these subdirectories, images belonging to the corresponding car class are stored.

Given the structure of this dataset, the convenient utility tool `torchvision.datasets.ImageFolder`, which is a subclass of `torch.utils.data.Dataset` will prove quite handy.

In [2]:
# ! tree -d ./stanford_car_dataset/car_data/

In [14]:
def get_dataloaders(dataset_dir):
    # To help prevent overfitting, I did some simple augmentation including horizontal flip and rotation here.
    # For more image augmentation options, the albumentations package works really well with pytorch
    # https://github.com/albumentations-team/albumentations

    # On another note, I recently found another helpful package - torchIO which is good for 3d volume dataset
    # https://github.com/fepegar/torchio

    # note: no data augmentation for test data

    width, height = 224, 224
    train_tfms = transforms.Compose([transforms.Resize((width, height)),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.RandomRotation(15),
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    test_tfms = transforms.Compose([transforms.Resize((width, height)),
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    # create datasets
    train_dset = torchvision.datasets.ImageFolder(root=dataset_dir + "train", transform=train_tfms)
    train_dl = torch.utils.data.DataLoader(train_dset, batch_size=32, shuffle=True, num_workers=2)

    test_dset = torchvision.datasets.ImageFolder(root=dataset_dir + "test", transform = test_tfms)
    test_dl = torch.utils.data.DataLoader(test_dset, batch_size=32, shuffle=False, num_workers=2)
    
    return train_dl, test_dl

## Training function

In [6]:
def train_model(model, train_dl, loss_fn, optimizer, scheduler, n_epochs=5):
    
    losses = []
    accuracies = []
    test_accuracies = []

    # set the model to train mode initially
    model.train()
    for epoch in tqdm.tqdm(range(1, n_epochs+1)):
        since = time.time()
        running_loss = 0.0
        running_correct = 0.0
        for i, data in enumerate(train_dl, 0):

            # get the inputs and assign them to cuda
            inputs, labels = data
            #inputs = inputs.to(device).half() # uncomment for half precision model
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            # forward + backward + optimize
            outputs = model(inputs)
            predicted = F.softmax(outputs, dim=-1).argmax(dim=-1)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            
            # calculate the loss/acc later
            running_loss += loss.item() * inputs.shape[0]
            running_correct += (labels==predicted).sum().item()

        epoch_duration = time.time() - since
        epoch_loss = running_loss / len(train_dl.dataset)
        epoch_acc = running_correct / len(train_dl.dataset) * 100.0
        msg = f"Train Epoch: {epoch}\tduration: {epoch_duration}s\ttrain_loss: {epoch_loss:.4f}\taccuracy: {epoch_acc:.4f}%"
        logging.info(msg)
        
        losses.append(epoch_loss)
        accuracies.append(epoch_acc)
        
        # switch the model to eval mode to evaluate on test data
        model.eval()
        test_acc = eval_model(model)
        test_accuracies.append(test_acc)
        
        # re-set the model to train mode after validating
        model.train()
        scheduler.step(test_acc)
        since = time.time()
    print('Finished Training')
    return model, losses, accuracies, test_accuracies

## Testing function

In [7]:
def eval_model(model):
    correct = 0.0
    total = 0.0
    with torch.no_grad():
        for i, data in enumerate(test_dl, 0):
            images, labels = data
            #images = images.to(device).half() # uncomment for half precision model
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_acc = 100.0 * correct / total
    
    msg = f'Test accuracy: {test_acc:.4f}%'
    logging.info(msg)
    
    return test_acc

## Main function

In [None]:
def main():
    train_dl, test_dl = get_dataloaders(DATASET_DIR)
    
    # I use the wonderful timm package to load the pretrained efficientnet model
    # https://github.com/rwightman/pytorch-image-models
    model_ft = timm.create_model('efficientnet_b3', pretrained=True)
    
    # replace the last fc layer with an untrained one (requires grad by default)
    num_ftrs = model_ft.classifier.in_features
    model_ft.classifier = nn.Linear(num_ftrs, NUM_CAR_CLASSES)
    model_ft.to(device)
    
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model_ft.parameters(), lr=0.001)
    
    lrscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       mode='max',
                                                       patience=3,
                                                       threshold = 0.9,
                                                       min_lr=1e-6,
                                                       verbose=True,
                                                      )
    
    # Use this format (%Y-%m-%dT%H:%M:%SZ) to record timestamp of the metrics.
    # If log_path is empty print log to StdOut, otherwise print log to the file.
    if LOG_PATH == "":
        logging.basicConfig(
            format="%(asctime)s %(levelname)-8s %(message)s",
            datefmt="%Y-%m-%dT%H:%M:%SZ",
            level=logging.INFO)
    else:
        logging.basicConfig(
            format="%(asctime)s %(levelname)-8s %(message)s",
            datefmt="%Y-%m-%dT%H:%M:%SZ",
            level=logging.INFO,
            filename=LOG_PATH)
    
    model_ft, training_losses, training_accs, test_accs = train_model(model_ft,
                                                                      train_dl,
                                                                      loss_fn,
                                                                      optimizer,
                                                                      lrscheduler,
                                                                      n_epochs=20
                                                                     )
    
    # plot the stats

    f, axarr = plt.subplots(2,2, figsize = (12, 8))
    axarr[0, 0].plot(training_losses)
    axarr[0, 0].set_title("Training loss")
    axarr[0, 1].plot(training_accs)
    axarr[0, 1].set_title("Training acc")
    axarr[1, 0].plot(test_accs)

    axarr[1, 0].set_title("Test acc")

## Plot metrics

In [None]:
# plot the stats

f, axarr = plt.subplots(2,2, figsize = (12, 8))
axarr[0, 0].plot(training_losses)
axarr[0, 0].set_title("Training loss")
axarr[0, 1].plot(training_accs)
axarr[0, 1].set_title("Training acc")
axarr[1, 0].plot(test_accs)

axarr[1, 0].set_title("Test acc")