In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# copy dataset from google drive to colab
!cp ./drive/My\ Drive/stanford_car_dataset.zip ./

In [None]:
%%time
# unzip dataset in colab
!mkdir ./stanford_car_dataset
!unzip -q stanford_car_dataset.zip -d ./stanford_car_dataset

CPU times: user 231 ms, sys: 44.3 ms, total: 275 ms
Wall time: 49.1 s


In [None]:
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms

import time
import os
import tqdm
import PIL.Image as Image
from IPython.display import display

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.get_device_name(device))

In [None]:
dataset_dir = "stanford_car_dataset/car_data/car_data/"

# data transformation, you can try different transformation/ data augmentation here
# note: no data augmentation for test data

# width, height = 224, 224 # models except to inception_V3
width, height = 299, 299  # for inception_v3
train_tfms = transforms.Compose([transforms.Resize((width, height)),
                                 # transforms.RandomHorizontalFlip(),
                                 # transforms.RandomRotation(15),
                                 transforms.ToTensor(),
                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
test_tfms = transforms.Compose([transforms.Resize((width, height)),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

# create datasets
dataset = torchvision.datasets.ImageFolder(root=dataset_dir + "train", transform=train_tfms)
trainloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2)

dataset2 = torchvision.datasets.ImageFolder(root=dataset_dir+"test", transform = test_tfms)
testloader = torch.utils.data.DataLoader(dataset2, batch_size=32, shuffle=False, num_workers=2)


In [None]:
def train_model(model, criterion, optimizer, scheduler, n_epochs=5):
    
    losses = []
    accuracies = []
    test_accuracies = []

    # set the model to train mode initially
    model.train()
    for epoch in tqdm.tqdm(range(n_epochs)):
        since = time.time()
        running_loss = 0.0
        running_correct = 0.0
        for i, data in enumerate(trainloader, 0):

            # get the inputs and assign them to cuda
            inputs, labels = data
            #inputs = inputs.to(device).half() # uncomment for half precision model
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            
            # forward + backward + optimize
            outputs = model(inputs)
            # print(outputs)
            _, predicted = torch.max(outputs.data, 1)
            
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            # calculate the loss/acc later
            running_loss += loss.item()
            running_correct += (labels==predicted).sum().item()

        epoch_duration = time.time() - since
        epoch_loss = running_loss / len(trainloader)
        epoch_acc = 100 / 32 * running_correct / len(trainloader)
        print("Epoch %s, duration: %d s, loss: %.4f, acc: %.4f" % (epoch+1, epoch_duration, epoch_loss, epoch_acc))
        
        losses.append(epoch_loss)
        accuracies.append(epoch_acc)
        
        # switch the model to eval mode to evaluate on test data
        model.eval()
        test_acc = eval_model(model)
        test_accuracies.append(test_acc)
        
        # re-set the model to train mode after validating
        model.train()
        scheduler.step(test_acc)
        since = time.time()
    print('Finished Training')
    return model, losses, accuracies, test_accuracies

In [None]:
def eval_model(model):
    correct = 0.0
    total = 0.0
    with torch.no_grad():
        for i, data in enumerate(testloader, 0):
            images, labels = data
            #images = images.to(device).half() # uncomment for half precision model
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model_ft(images)
            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_acc = 100.0 * correct / total
    print('Accuracy of the network on the test images: %d %%' % (
        test_acc))
    return test_acc

In [None]:
NUM_CAR_CLASSES = 196
model_ft = models.inception_v3(pretrained=True)
model_ft.aux_logits = False

# Freezing model parameters and defining the fully connected network to be attached to the model, 
# loss function and the optimizer.
# We there after put the model on the GPUs
# for param in model_ft.parameters():
#     param.require_grad = False

# replace the last fc layer with an untrained one (requires grad by default)

# for inception_V3
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, NUM_CAR_CLASSES)
model_ft = model_ft.to(device)

# uncomment this block for half precision model
"""
model_ft = model_ft.half()
for layer in model_ft.modules():
    if isinstance(layer, nn.BatchNorm2d):
        layer.float()
"""

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

"""
probably not the best metric to track, but we are tracking the training accuracy and measuring whether
it increases by atleast 0.9 per epoch and if it hasn't increased by 0.9 reduce the lr by 0.1x.
However in this model it did not benefit me.
"""
lrscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3, threshold=0.9)

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth


HBox(children=(FloatProgress(value=0.0, max=108949747.0), HTML(value='')))




In [None]:
# use inception_v3
model_ft, training_losses, training_accs, test_accs = train_model(model_ft, criterion, optimizer, lrscheduler, n_epochs=20)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch 1, duration: 118 s, loss: 4.0262, acc: 12.6471


  5%|▌         | 1/20 [03:25<1:05:09, 205.77s/it]

Accuracy of the network on the test images: 34 %
Epoch 2, duration: 119 s, loss: 1.7556, acc: 51.9240


 10%|█         | 2/20 [06:53<1:01:52, 206.24s/it]

Accuracy of the network on the test images: 61 %
Epoch 3, duration: 119 s, loss: 0.8683, acc: 75.0000


 15%|█▌        | 3/20 [10:20<58:30, 206.53s/it]  

Accuracy of the network on the test images: 70 %
Epoch 4, duration: 119 s, loss: 0.4664, acc: 86.7279


 20%|██        | 4/20 [13:48<55:11, 206.99s/it]

Accuracy of the network on the test images: 76 %
Epoch 5, duration: 119 s, loss: 0.2588, acc: 92.7328


 25%|██▌       | 5/20 [17:15<51:43, 206.91s/it]

Accuracy of the network on the test images: 80 %
Epoch 6, duration: 119 s, loss: 0.1462, acc: 96.1275


 30%|███       | 6/20 [20:42<48:19, 207.13s/it]

Accuracy of the network on the test images: 82 %
Epoch 7, duration: 119 s, loss: 0.0953, acc: 97.6471


 35%|███▌      | 7/20 [24:09<44:49, 206.89s/it]

Accuracy of the network on the test images: 83 %
Epoch 8, duration: 120 s, loss: 0.0468, acc: 98.9828


 40%|████      | 8/20 [27:35<41:20, 206.70s/it]

Accuracy of the network on the test images: 87 %
Epoch 9, duration: 119 s, loss: 0.0295, acc: 99.4363


 45%|████▌     | 9/20 [31:01<37:52, 206.58s/it]

Accuracy of the network on the test images: 87 %
Epoch 10, duration: 120 s, loss: 0.0246, acc: 99.4975


 50%|█████     | 10/20 [34:28<34:26, 206.62s/it]

Accuracy of the network on the test images: 87 %
Epoch 11, duration: 119 s, loss: 0.0218, acc: 99.5466


 55%|█████▌    | 11/20 [37:54<30:58, 206.47s/it]

Accuracy of the network on the test images: 88 %
Epoch 12, duration: 119 s, loss: 0.0192, acc: 99.5956


 60%|██████    | 12/20 [41:20<27:30, 206.36s/it]

Accuracy of the network on the test images: 87 %
Epoch 13, duration: 119 s, loss: 0.0197, acc: 99.5221


 65%|██████▌   | 13/20 [44:47<24:04, 206.42s/it]

Accuracy of the network on the test images: 87 %
Epoch 14, duration: 119 s, loss: 0.0208, acc: 99.5588


 70%|███████   | 14/20 [48:13<20:38, 206.39s/it]

Accuracy of the network on the test images: 88 %
Epoch 15, duration: 119 s, loss: 0.0183, acc: 99.6201


 75%|███████▌  | 15/20 [51:39<17:10, 206.17s/it]

Accuracy of the network on the test images: 87 %
Epoch 16, duration: 119 s, loss: 0.0185, acc: 99.5466


 80%|████████  | 16/20 [55:07<13:47, 206.83s/it]

Accuracy of the network on the test images: 87 %
Epoch 17, duration: 119 s, loss: 0.0191, acc: 99.6201


 85%|████████▌ | 17/20 [58:33<10:19, 206.50s/it]

Accuracy of the network on the test images: 87 %
Epoch 18, duration: 119 s, loss: 0.0187, acc: 99.6569


 90%|█████████ | 18/20 [1:01:58<06:52, 206.17s/it]

Accuracy of the network on the test images: 87 %
Epoch 19, duration: 119 s, loss: 0.0180, acc: 99.6569


 95%|█████████▌| 19/20 [1:05:23<03:25, 205.86s/it]

Accuracy of the network on the test images: 88 %
Epoch 20, duration: 119 s, loss: 0.0186, acc: 99.5588


100%|██████████| 20/20 [1:08:49<00:00, 206.49s/it]

Accuracy of the network on the test images: 87 %
Finished Training





In [None]:
# plot the stats

f, axarr = plt.subplots(2,2, figsize = (12, 8))
axarr[0, 0].plot(training_losses)
axarr[0, 0].set_title("Training loss")
axarr[0, 1].plot(training_accs)
axarr[0, 1].set_title("Training acc")
axarr[1, 0].plot(test_accs)

axarr[1, 0].set_title("Test acc")

In [None]:
# tie the class indices to their names

def find_classes(dir):
    classes = os.listdir(dir)
    classes.sort()
    class_to_idx = {classes[i]: i for i in range(len(classes))}
    return classes, class_to_idx
classes, c_to_idx = find_classes(dataset_dir+"train")

In [None]:
# test the model on random images

# switch the model to evaluation mode to make dropout and batch norm work in eval mode
model_ft.eval()

# transforms for the input image
loader = transforms.Compose([transforms.Resize((400, 400)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
image = Image.open(dataset_dir+"test/Mercedes-Benz C-Class Sedan 2012/01977.jpg")
image = loader(image).float()
image = torch.autograd.Variable(image, requires_grad=True)
image = image.unsqueeze(0)
image = image.cuda()
output = model_ft(image)
conf, predicted = torch.max(output.data, 1)

In [None]:
# get the class name of the prediction
display(Image.open(dataset_dir+"test/Mercedes-Benz C-Class Sedan 2012/01977.jpg"))
print(classes[predicted.item()], "confidence: ", conf.item())