#load CSV and marco

In [None]:
import os
import numpy as np
import keras
from keras.models import Sequential
from keras import layers, models
from keras.layers import Dense, Activation, Flatten, Conv2D, MaxPooling2D
from sklearn.metrics import accuracy_score
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image as img
import pandas as pd
from keras.callbacks import ModelCheckpoint

In [None]:
# run on colab
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
!ls
%cd "drive/My Drive/Colab Notebooks/HW1"
!ls

Mounted at /content/drive
drive  sample_data
/content/drive/My Drive/Colab Notebooks/HW1
best_model.h5  datas  HW1_H.ipynb


In [None]:
labels = pd.read_csv("datas/training_labels.csv")
print(labels.columns)
# add labels to dict
# labels to index
label_dic = dict()
# index to labels
cnt_dic = dict()
cnt = 0
for i in range(labels.shape[0]):
    if (label_dic.get(labels['label'][i]) is None):
        label_dic[labels['label'][i]] = cnt
        cnt_dic[cnt] = labels['label'][i]
        cnt += 1
print(label_dic)
print(cnt_dic)

Index(['id', 'label'], dtype='object')
{'Ford F-150 Regular Cab 2007': 0, 'BMW X6 SUV 2012': 1, 'BMW 1 Series Coupe 2012': 2, 'Fisker Karma Sedan 2012': 3, 'Dodge Ram Pickup 3500 Crew Cab 2010': 4, 'Dodge Dakota Crew Cab 2010': 5, 'Chevrolet Monte Carlo Coupe 2007': 6, 'Aston Martin Virage Coupe 2012': 7, 'MINI Cooper Roadster Convertible 2012': 8, 'Audi S5 Coupe 2012': 9, 'Chevrolet Express Van 2007': 10, 'Dodge Dakota Club Cab 2007': 11, 'Dodge Ram Pickup 3500 Quad Cab 2009': 12, 'Nissan Leaf Hatchback 2012': 13, 'Volvo 240 Sedan 1993': 14, 'Chevrolet Sonic Sedan 2012': 15, 'Ford Freestar Minivan 2007': 16, 'Geo Metro Convertible 1993': 17, 'Jeep Liberty SUV 2012': 18, 'Ford Mustang Convertible 2007': 19, 'Lamborghini Diablo Coupe 2001': 20, 'Land Rover Range Rover SUV 2012': 21, 'Ford Focus Sedan 2007': 22, 'Hyundai Veracruz SUV 2012': 23, 'Audi TT RS Coupe 2012': 24, 'Aston Martin V8 Vantage Convertible 2012': 25, 'Ford Ranger SuperCab 2011': 26, 'Lamborghini Gallardo LP 570-4 Supe

In [None]:
num_classes = 196

# CNN Architecture

In [25]:
def identity_block(model, filters):
    filters1, filters2, filters3 = filters
    id_m = layers.Conv2D(filters1, (1, 1),
                         kernel_initializer='he_uniform')(model)
    id_m = layers.BatchNormalization(axis=3)(id_m)
    id_m = layers.Activation('relu')(id_m)
    id_m = layers.Conv2D(filters2, (3, 3),
                         padding='same',
                         kernel_initializer='he_uniform')(id_m)
    id_m = layers.BatchNormalization(axis=3)(id_m)
    id_m = layers.Activation('relu')(id_m)
    id_m = layers.Conv2D(filters3, (1, 1),
                         kernel_initializer='he_uniform')(model)
    id_m = layers.BatchNormalization(axis=3)(id_m)
    id_m = layers.Activation('relu')(id_m)
    # connect two path
    id_m = layers.add([id_m, model])
    id_m = layers.Activation('relu')(id_m)
    return id_m

In [26]:
def conv_block(model, filters):
    filters1, filters2, filters3 = filters
    cb = layers.Conv2D(filters1, (1, 1), strides=(2, 2),
                       kernel_initializer='he_uniform')(model)
    cb = layers.BatchNormalization(axis=3)(cb)
    cb = layers.Activation('relu')(cb)
    cb = layers.Conv2D(filters2, (3, 3), padding='same',
                       kernel_initializer='he_uniform')(cb)
    cb = layers.BatchNormalization(axis=3)(cb)
    cb = layers.Activation('relu')(cb)
    cb = layers.Conv2D(filters3, (1, 1), strides=(2, 2),
                       kernel_initializer='he_uniform')(model)
    cb = layers.BatchNormalization(axis=3)(cb)
    cb = layers.Activation('relu')(cb)
    # shortcut
    shortcut = layers.Conv2D(filters3, (1, 1), strides=(2, 2),
                             kernel_initializer='he_uniform')(model)
    shortcut = layers.BatchNormalization(axis=3)(shortcut)
    # connect two path
    x = layers.add([cb, shortcut])
    x = layers.Activation('relu')(cb)
    return cb

In [28]:
# transfer learning
transfer = keras.applications.Xception(weights='imagenet',
                                       input_shape=(128, 128, 3),
                                       include_top=False)
transfer.trainable = True

input_layer = layers.Input(shape=(128, 128, 3))
model = transfer(input_layer, training=True)
model = layers.GlobalAveragePooling2D()(model)

model = layers.Dense(num_classes,
                     activation='softmax',)(model)

m = models.Model(input_layer, model)
# m.summary()
opt = keras.optimizers.SGD()
m.compile(loss='categorical_crossentropy',
          optimizer=opt, metrics=['accuracy'])

# Training

# load training data

In [11]:
try:
    train_img = np.load("datas/train_data.npy")
    train_label = np.load("datas/train_labels.npy")
except:
    print("Please run LoadDate.ipynb first")

In [12]:
train_label = keras.utils.to_categorical(train_label, num_classes)

In [13]:
edge1 = int(train_img.shape[0]/10)
edge2 = int(2*train_img.shape[0]/10)
main_train = np.concatenate((train_img[0:edge1], train_img[edge2:]))
main_train_label = np.concatenate((train_label[0:edge1], train_label[edge2:]))
val_train = train_img[edge1:edge2]
val_train_label = train_label[edge1:edge2]

print(main_train.shape[0])
print(main_train_label.shape[0])
print(val_train.shape[0])
print(val_train_label.shape[0])

10066
10066
1119
1119


#Data Augmentation

In [16]:
datagen = ImageDataGenerator(
    zca_whitening=False,
    rotation_range=50,
    width_shift_range=0.6,
    height_shift_range=0.6,
    shear_range=0.6,
    zoom_range=0.6,
    horizontal_flip=True,
    fill_mode='nearest')

# Save model

In [14]:
checkpoint = ModelCheckpoint("best_model.h5", monitor='loss', verbose=1,
                             save_best_only=True, save_weights_only=True,
                             mode='auto', save_freq='epoch')

# Training the model

In [None]:
batch_size = 64
epochs = 150

try:
    m.load_weights("best_model.h5")
except:
    print("no save model")
datagen.fit(train_img)
m.fit(datagen.flow(main_train, main_train_label, batch_size=batch_size),
      epochs=epochs,
      validation_data=(val_train, val_train_label),
      shuffle=True,
      callbacks=[checkpoint])

no save model
Epoch 1/150

# Second Training

In [None]:
datagen.fit(train_img)
batch_size = 64
epochs = 100

try:
    m.load_weights("best_model.h5")
except:
    print("no save model")

m.fit(datagen.flow(train_img, train_label, batch_size=batch_size),
      epochs=epochs,
      shuffle=True,
      callbacks=[checkpoint])

#Testing

# load testing data

In [None]:
try:
    test_img = np.load("datas/test.npy")
    test_id = np.load("datas/test_ids.npy")
except:
    print("Please run LoadDate.ipynb first")

# predict

In [21]:
test_img = test_img.astype('float32')
try:
    m.load_weights("best_model.h5")
except:
    print("no pretrained model")
pred = m.predict(test_img)
pred = np.argmax(pred, axis=1)
pred_label = np.array([], dtype="object")

for i in pred:
    pl = cnt_dic[i]
    pred_label = np.concatenate((pred_label, [pl]))
print(pred_label.shape)
print(pred_label)

(5000,)
['Dodge Durango SUV 2012' 'Volvo C30 Hatchback 2012'
 'Dodge Caravan Minivan 1997' ... 'Audi S4 Sedan 2007'
 'Volvo 240 Sedan 1993' 'Mazda Tribute SUV 2011']


#Save CSV

In [22]:
# id = test_id, label = pred_label
dict = {'id': test_id, "label": pred_label}
df = pd.DataFrame(dict)
df.to_csv('pred.csv', index=False)