In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import torchvision.models as models
from PIL import Image
import json
from matplotlib.ticker import FormatStrFormatter
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from pathlib import Path


from torch.utils.data import *

import time
import copy
import os

from collections import OrderedDict

In [None]:
!pip install kaggle

In [None]:
from google.colab import files
files.upload()

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d jutrera/stanford-car-dataset-by-classes-folder

In [None]:
from zipfile import ZipFile
file_name = "stanford-car-dataset-by-classes-folder.zip"
with ZipFile(file_name,'r') as zip:
  zip.extractall()
  print('done')

In [None]:
import pandas as pd
df = pd.read_csv('names.csv')
df.head()

In [None]:

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(30),
        transforms.RandomResizedCrop(224),

        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([

        transforms.CenterCrop(224),
        transforms.CenterCrop(299),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),

    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),

        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
    ]),
}

In [None]:


train_dir = 'car_data/car_data/train'
test_dir = 'car_data/car_data/test'
label_dir = 'names.csv'

batch_size=32
dataset = datasets.ImageFolder(train_dir,transform=data_transforms['train'])

valid_size  = int(0.1 * len(dataset))
train_size = len(dataset) - valid_size
dataset_sizes = {'train': train_size, 'valid': valid_size}


train_dataset, valid_dataset = torch.utils.data.random_split(dataset, [train_size, valid_size])


dataloaders = {'train': DataLoader(train_dataset, batch_size = batch_size, shuffle = True),
              'valid': DataLoader(valid_dataset, batch_size = batch_size, shuffle = False)}

print("Total Number of Samples: ",len(dataset))
print("Number of Samples in Train: ",len(train_dataset))
print("Number of Samples in Valid: ",len(valid_dataset))
print("Number of Classes: ",len(dataset.classes))

print(dataset.classes[0])

In [None]:
import pandas as pd
df = pd.read_csv('names.csv')
df.head()

In [None]:

def imshow(image, ax=None, title=None, normalize=True):
    """Imshow for Tensor."""
    if ax is None:
        fig, ax = plt.subplots()
    image = image.numpy().transpose((1, 2, 0))

    if normalize:
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        image = std * image + mean
        image = np.clip(image, 0, 1)

    ax.imshow(image)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.tick_params(axis='both', length=0)
    ax.set_xticklabels('')
    ax.set_yticklabels('')
    return ax


print(" Sizes of Datasets: ", len(valid_dataset), len(train_dataset))

images, labels = next(iter(dataloaders['train']))

fig, axes = plt.subplots(figsize=(16,5), ncols=5)
for ii in range(5):
    ax = axes[ii]

    imshow(images[ii], ax=ax, normalize=True)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name = 'resnet' #vgg
if model_name == 'densenet':
    model = models.densenet161(pretrained=True)
    num_in_features = 2208
    print(model)

elif model_name == 'vgg':
    model = models.vgg19(pretrained=True)
    num_in_features = 25088
    print(model.classifier)

elif model_name == 'resnet':
    model = models.resnet152(pretrained=True)
    #model = models.resnet34(pretrained=True)
    num_in_features = 2048 #512
    print(model.fc)

elif model_name == 'inception':
  model = models.inception_v3(pretrained=True)
  model.aux_logits=False
  num_in_features = 2048
  print(model.fc)
else:
    print("Unknown model, please choose 'densenet' or 'vgg'")

In [None]:

for param in model.parameters():
  param.require_grad = False


def build_classifier(num_in_features, hidden_layers, num_out_features):
    classifier = nn.Sequential()

    if hidden_layers == None:
        classifier.add_module('fc0', nn.Linear(num_in_features, 196))

    else:
        layer_sizes = zip(hidden_layers[:-1], hidden_layers[1:])
        classifier.add_module('fc0', nn.Linear(num_in_features, hidden_layers[0]))
        classifier.add_module('relu0', nn.ReLU())
        classifier.add_module('drop0', nn.Dropout(.6))

        for i, (h1, h2) in enumerate(layer_sizes):
            classifier.add_module('fc'+str(i+1), nn.Linear(h1, h2))
            classifier.add_module('relu'+str(i+1), nn.ReLU())
            classifier.add_module('drop'+str(i+1), nn.Dropout(.5))
        classifier.add_module('output', nn.Linear(hidden_layers[-1], num_out_features))

    return classifier

In [None]:

hidden_layers = None #[1050,500]
classifier = build_classifier(num_in_features, hidden_layers, 196)
print(classifier)

if model_name == 'densenet':
    model.classifier = classifier
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adadelta(model.parameters())
    sched = optim.lr_scheduler.StepLR(optimizer, step_size=4)
elif model_name == 'vgg':
    model.classifier = classifier
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.classifier.parameters(), lr=0.0001)
    sched = optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)
elif model_name == 'resnet':
  model.fc = classifier
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
  sched = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3, threshold = 0.9)
elif model_name == 'inception':
    model.fc = classifier
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001,momentum=0.9)
else:
    pass

In [None]:
# Training
def train_model(model, criterion, optimizer, sched, num_epochs=5,device='cuda'):
    start = time.time()
    train_results = []
    valid_results = []
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        for phase in ['train', 'valid']:
            if phase == 'train':
              model.train()
            else:
              model.eval()

            running_loss = 0.0
            running_corrects = 0


            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)


                optimizer.zero_grad()


                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)


                    if phase == 'train':

                        loss.backward()

                        optimizer.step()


                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            if(phase == 'train'):
              train_results.append([epoch_loss,epoch_acc])
            if(phase == 'valid'):
              valid_results.append([epoch_loss,epoch_acc])

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))


            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                model_save_name = "resnetCars.pt"
                path = F"/content/drive/My Drive/{model_save_name}"
                torch.save(model.state_dict(), path)

        print()


    time_elapsed = time.time() - start
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))


    model.load_state_dict(best_model_wts)

    return model,train_results,valid_results


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
epochs = 60
model.to(device)
model,train_results,valid_results = train_model(model, criterion, optimizer, sched, epochs)

In [None]:
model_save_name = 'classifier.pt'
path = F"/content/drive/My Drive/{model_save_name}"
torch.save(model.state_dict(), path)

In [None]:
model_save_name = 'classifier.pt'
path = F"/content/drive/My Drive/{model_save_name}"
model.load_state_dict(torch.load(path))

In [None]:
model.load_state_dict(torch.load('/content/drive/My Drive/resnetCars.pt'))
model.to(device)

In [None]:


# Load the datasets with ImageFolder
label_df = pd.read_csv('names.csv', names=["label"])

test_dir = 'car_data/car_data/test'

with torch.no_grad():
  print("Predictions on Test Set:")
  model.eval()
  dataset = datasets.ImageFolder(test_dir,transform=data_transforms['test'])

  testloader = torch.utils.data.DataLoader(dataset, batch_size=64,
                                             shuffle=False, num_workers=2)

  image_names = []
  pred = []
  for index in testloader.dataset.imgs:
    image_names.append(Path(index[0]).stem)

  results = []
  file_names = []
  predicted_car = []
  predicted_class = []

  for inputs,labels in testloader:
    inputs = inputs.to(device)
    #labels = labels.to(device)
    outputs = model(inputs)
    _, pred = torch.max(outputs, 1)

    for i in range(len(inputs)):
      file_names.append(image_names[i])
      predicted_car.append(int(pred[i] + 1))

results.append((file_names, predicted_car))

In [None]:
print("Predictions on Test Set:")
a={'Id': image_names, 'Predicted': results}
df = pd.DataFrame(a )
pd.set_option('display.max_colwidth', -1)

# df = df.sort_values(by=['Id'])
df.to_csv('/content/drive/My Drive/predictions.csv')
df