# Cars' Year Make Model Prediction using CNN
I have divided the complete modeling activity into 3 parts and created one notebook for each part.


1.   **Pre-processing**
2.   **Training (Transfer Learning) and Validation**
3.   **Prediction**



# Step 1. Pre-processing of image dataset
The steps involved in preparing image dataset are below:


1.   Download the dataset and unzip it into google colab workspace
2.   Create training, validation sets from initial training set and organize according to class label
3. Put test dataset in test folder



In [22]:
!pip install console_progressbar

Collecting console_progressbar
  Downloading https://files.pythonhosted.org/packages/70/dd/5103450098d904eb276c188fe500e1d757cb82ffdc02c02aa4de8faaccea/console_progressbar-1.1.1.tar.gz
Building wheels for collected packages: console-progressbar
  Building wheel for console-progressbar (setup.py) ... [?25l[?25hdone
  Stored in directory: /root/.cache/pip/wheels/76/75/2c/d26751f4f3d2cd72d58d10313113691e87995630c89aab231b
Successfully built console-progressbar
Installing collected packages: console-progressbar
Successfully installed console-progressbar-1.1.1


In [0]:
from google.colab import drive
import tarfile
import scipy.io
import numpy as np
import os
import cv2 as cv
import shutil
import random
from console_progressbar import ProgressBar

In [0]:
path='/content'

In [0]:
drive.mount(path+'/gdrive')

Downloading and unzipping the dataset from the given website.

In [0]:
!wget http://imagenet.stanford.edu/internal/car196/cars_train.tgz
!wget http://imagenet.stanford.edu/internal/car196/cars_test.tgz
!wget https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz -P
!unzip -qq /content/cars_train.tgz
!unzip -qq /content/cars_test.tgz
!unzip -qq /content/car_devkit.tgz

Now, created some functions to get bounding boxes of cars and extracted that box with a margin of 16 pixels out of full images. The training datset is distributed into two sets i.e. train and valid sets with 80:20 ratio. The folder structure for train and valid will look like:

```
train
        /Class 1
                /00003.jpg
                /00234.jpg
                .
                .
                .
        /Class 2
                /00043.jpg
                /00004.jpg
                .
                .
                .
       /Class 196
                /03003.jpg
                /00034.jpg
                .
                .
                .
valid
        /Class 1
                /00003.jpg
                /00234.jpg
                .
                .
                .
        /Class 2
                /00043.jpg
                /00004.jpg
                .
                .
                .
       /Class 196
                /03003.jpg
                /00034.jpg
                .
                .
                .
test
          /00012.jpg
          /00239.jpg
          .
          .
          .
```


                

In [0]:
def ensure_folder(folder):
    if not os.path.exists(folder):
        os.makedirs(folder)


def save_train_data(fnames, labels, bboxes):
    src_folder = path+'/cars_train'
    num_samples = len(fnames)

    train_split = 0.8
    num_train = int(round(num_samples * train_split))
    train_indexes = random.sample(range(num_samples), num_train)

    pb = ProgressBar(total=100, prefix='Save train data', suffix='', decimals=3, length=50, fill='=')

    for i in range(num_samples):
        fname = fnames[i]
        label = labels[i]
        (x1, y1, x2, y2) = bboxes[i]

        src_path = os.path.join(src_folder, fname)
        src_image = cv.imread(src_path)
        height, width = src_image.shape[:2]
        # margins of 16 pixels
        margin = 16
        x1 = max(0, x1 - margin)
        y1 = max(0, y1 - margin)
        x2 = min(x2 + margin, width)
        y2 = min(y2 + margin, height)
        # print("{} -> {}".format(fname, label))
        pb.print_progress_bar((i + 1) * 100 / num_samples)

        if i in train_indexes:
            dst_folder = path+'/train'
        else:
            dst_folder = path+'/valid'

        dst_path = os.path.join(dst_folder, label)
        if not os.path.exists(dst_path):
            os.makedirs(dst_path)
        dst_path = os.path.join(dst_path, fname)

        crop_image = src_image[y1:y2, x1:x2]
        dst_img = cv.resize(src=crop_image, dsize=(img_height, img_width))
        cv.imwrite(dst_path, dst_img)


def save_test_data(fnames, bboxes):
    src_folder = path+'/cars_test'
    dst_folder = path+'/test'
    num_samples = len(fnames)

    pb = ProgressBar(total=100, prefix='Save test data', suffix='', decimals=3, length=50, fill='=')

    for i in range(num_samples):
        fname = fnames[i]
        (x1, y1, x2, y2) = bboxes[i]
        src_path = os.path.join(src_folder, fname)
        src_image = cv.imread(src_path)
        height, width = src_image.shape[:2]
        # margins of 16 pixels
        margin = 16
        x1 = max(0, x1 - margin)
        y1 = max(0, y1 - margin)
        x2 = min(x2 + margin, width)
        y2 = min(y2 + margin, height)
        # print(fname)
        pb.print_progress_bar((i + 1) * 100 / num_samples)

        dst_path = os.path.join(dst_folder, fname)
        crop_image = src_image[y1:y2, x1:x2]
        dst_img = cv.resize(src=crop_image, dsize=(img_height, img_width))
        cv.imwrite(dst_path, dst_img)


def process_train_data():
    print("Processing train data...")
    cars_annos = scipy.io.loadmat(path+'/devkit/cars_train_annos')
    annotations = cars_annos['annotations']
    annotations = np.transpose(annotations)

    fnames = []
    class_ids = []
    bboxes = []
    labels = []

    for annotation in annotations:
        bbox_x1 = annotation[0][0][0][0]
        bbox_y1 = annotation[0][1][0][0]
        bbox_x2 = annotation[0][2][0][0]
        bbox_y2 = annotation[0][3][0][0]
        class_id = annotation[0][4][0][0]
        labels.append('%04d' % (class_id,))
        fname = annotation[0][5][0]
        bboxes.append((bbox_x1, bbox_y1, bbox_x2, bbox_y2))
        class_ids.append(class_id)
        fnames.append(fname)

    labels_count = np.unique(class_ids).shape[0]
    print(np.unique(class_ids))
    print('The number of different cars is %d' % labels_count)

    save_train_data(fnames, labels, bboxes)


def process_test_data():
    print("Processing test data...")
    cars_annos = scipy.io.loadmat(path+'/devkit/cars_test_annos')
    annotations = cars_annos['annotations']
    annotations = np.transpose(annotations)

    fnames = []
    bboxes = []

    for annotation in annotations:
        bbox_x1 = annotation[0][0][0][0]
        bbox_y1 = annotation[0][1][0][0]
        bbox_x2 = annotation[0][2][0][0]
        bbox_y2 = annotation[0][3][0][0]
        fname = annotation[0][4][0]
        bboxes.append((bbox_x1, bbox_y1, bbox_x2, bbox_y2))
        fnames.append(fname)

    save_test_data(fnames, bboxes)


In [0]:
img_width, img_height = 224, 224
cars_meta = scipy.io.loadmat(path+'/devkit/cars_meta')
class_names = cars_meta['class_names']  # shape=(1, 196)
class_names = np.transpose(class_names)
print('class_names.shape: ' + str(class_names.shape))
print('Sample class_name: [{}]'.format(class_names[8][0][0]))

ensure_folder(path+'/train')
ensure_folder(path+'/valid')
ensure_folder(path+'/test')

process_train_data()
process_test_data()

# clean up
shutil.rmtree(path+'/cars_train')
shutil.rmtree(path+'/cars_test')
# shutil.rmtree('devkit')