In [122]:
from pathlib import Path
import requests
from tqdm.auto import tqdm
import tarfile
import matplotlib.pyplot as plt
import numpy as np
import scipy
import pandas as pd

# Download and Untar Data

In [2]:
WD_PATH = Path(".")
DATA_PATH = WD_PATH / "Data"

In [3]:
if not DATA_PATH.exists():
    DATA_PATH.mkdir()

In [228]:
train_url = "http://imagenet.stanford.edu/internal/car196/cars_train.tgz"
test_url = "http://imagenet.stanford.edu/internal/car196/cars_test.tgz"
devkit_url = "https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz"
test_annos_withlabels_url = "http://imagenet.stanford.edu/internal/car196/cars_test_annos_withlabels.mat"

In [8]:
train_url.split('/')[-1]

'cars_train.tgz'

In [18]:
def dl(url, dest_dir):
    assert isinstance(dest_dir, Path), "dest_dir must be a Path object"
    filename = url.split('/')[-1]
    file_path = dest_dir / filename
    if not file_path.exists():
        with open(f'{dest_dir}/{filename}', 'wb') as f:
            response = requests.get(url, stream=True)
            total = int(response.headers.get('content-length'))
            with tqdm(total=total, unit='B', unit_scale=True, desc=filename) as pbar:
                for data in response.iter_content(chunk_size=1024*1024):
                    f.write(data)
                    pbar.update(1024*1024)
    else:
        return file_path
    return file_path

In [19]:
train_tar_path = dl(train_url, DATA_PATH)

In [20]:
test_tar_path = dl(test_url, DATA_PATH)

In [45]:
devkit_tar_path = dl(devkit_url, DATA_PATH)

HBox(children=(IntProgress(value=0, description='car_devkit.tgz', max=330960, style=ProgressStyle(description_…

HBox(children=(IntProgress(value=0, description='cars_test_annos_withlabels.mat', max=185758, style=ProgressSt…

PosixPath('Data/cars_test_annos_withlabels.mat')

In [53]:
def untar_tgz(tgz_path, dest_dir):
    assert isinstance(tgz_path, Path), "tgz_path must be a Path object"
    assert tgz_path.exists(), "tgz_path does not exists"
    assert tgz_path.suffix == ".tgz", "tgz_path is not a .tgz file"
    if tgz_path.stem == "car_devkit":
        final_path = dest_dir / "devkit"
    else:
        final_path = dest_dir / tgz_path.stem
    if not final_path.exists():
        tar = tarfile.open(tgz_path, 'r:gz')
        tar.extractall(dest_dir)
    return final_path

In [54]:
train_path = untar_tgz(train_tar_path, DATA_PATH)

In [55]:
test_path = untar_tgz(test_tar_path, DATA_PATH)

In [56]:
devkit_path = untar_tgz(devkit_tar_path, DATA_PATH)

In [230]:
dl(test_annos_withlabels_url, devkit_path)

HBox(children=(IntProgress(value=0, description='cars_test_annos_withlabels.mat', max=185758, style=ProgressSt…

PosixPath('Data/devkit/cars_test_annos_withlabels.mat')

# Explore

In [231]:
!ls {str(devkit_path.resolve())}

cars_meta.mat			cars_train_annos.mat  train_perfect_preds.txt
cars_test_annos.mat		eval_train.m
cars_test_annos_withlabels.mat	README.txt


In [115]:
cars_meta = scipy.io.loadmat(devkit_path / "cars_meta.mat")

In [116]:
cars_meta['class_names']

array([[array(['AM General Hummer SUV 2000'], dtype='<U26'),
        array(['Acura RL Sedan 2012'], dtype='<U19'),
        array(['Acura TL Sedan 2012'], dtype='<U19'),
        array(['Acura TL Type-S 2008'], dtype='<U20'),
        array(['Acura TSX Sedan 2012'], dtype='<U20'),
        array(['Acura Integra Type R 2001'], dtype='<U25'),
        array(['Acura ZDX Hatchback 2012'], dtype='<U24'),
        array(['Aston Martin V8 Vantage Convertible 2012'], dtype='<U40'),
        array(['Aston Martin V8 Vantage Coupe 2012'], dtype='<U34'),
        array(['Aston Martin Virage Convertible 2012'], dtype='<U36'),
        array(['Aston Martin Virage Coupe 2012'], dtype='<U30'),
        array(['Audi RS 4 Convertible 2008'], dtype='<U26'),
        array(['Audi A5 Coupe 2012'], dtype='<U18'),
        array(['Audi TTS Coupe 2012'], dtype='<U19'),
        array(['Audi R8 Coupe 2012'], dtype='<U18'),
        array(['Audi V8 Sedan 1994'], dtype='<U18'),
        array(['Audi 100 Sedan 1994'], dtype='<U

In [222]:
cars_train_annos = scipy.io.loadmat(devkit_path / "cars_test_annos.mat")

In [223]:
cars_train_annos = cars_train_annos['annotations'].squeeze()

In [224]:
cars_train_annos.shape

(8041,)

In [225]:
df = pd.DataFrame(cars_train_annos)
 

In [226]:
df.head()

Unnamed: 0,bbox_x1,bbox_y1,bbox_x2,bbox_y2,fname
0,[[30]],[[52]],[[246]],[[147]],[00001.jpg]
1,[[100]],[[19]],[[576]],[[203]],[00002.jpg]
2,[[51]],[[105]],[[968]],[[659]],[00003.jpg]
3,[[67]],[[84]],[[581]],[[407]],[00004.jpg]
4,[[140]],[[151]],[[593]],[[339]],[00005.jpg]


In [209]:
df = df.applymap(np.squeeze)

In [210]:
df.head()

Unnamed: 0,bbox_x1,bbox_y1,bbox_x2,bbox_y2,class,fname
0,39,116,569,375,14,00001.jpg
1,36,116,868,587,3,00002.jpg
2,85,109,601,381,91,00003.jpg
3,621,393,1484,1096,134,00004.jpg
4,14,36,133,99,106,00005.jpg


In [211]:
df_labels = df[['fname', 'class']]

In [214]:
df_labels.to_csv('./Data/train_labels.csv')

In [235]:
def get_labels_df(annos_matfile):
    assert annos_matfile in ['cars_train_annos.mat', 'cars_test_annos_withlabels.mat'], \
           "Please select 'cars_train_annos.mat' or 'cars_test_annos_withlabels.mat'"
           
    cars_annos = scipy.io.loadmat(devkit_path / annos_matfile)
    cars_annos = cars_annos['annotations'].squeeze()
    df = pd.DataFrame(cars_annos)
    df = df.applymap(np.squeeze)
    return df[['fname', 'class']]

In [236]:
train_labels = get_labels_df('cars_train_annos.mat')
test_labels = get_labels_df('cars_test_annos_withlabels.mat')

In [None]:
train_imgs = list(train_path.glob("*"))
def show_img(imgs_list):
    idx = np.random.randint(len(train_imgs))
    img = plt.imread(imgs_list[idx])
    plt.imshow(img)