# Dude, Where's My House?
## Part 4: Training Models
## Authors: Aman Hafez, Kavan Pandya, Yan Nusinovich
Notebook drafted by Yan Nusinovich

## References

- https://www.analyticsvidhya.com/blog/2020/02/learn-image-classification-cnn-convolutional-neural-networks-3-datasets/
- https://medium.com/@vijayabhaskar96/tutorial-image-classification-with-keras-flow-from-directory-and-generators-95f75ebe5720
- https://machinelearningmastery.com/how-to-load-large-datasets-from-directories-for-deep-learning-with-keras/

## Code

In [41]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense, InputLayer, BatchNormalization, Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
import numpy as np
import pandas as pd
import pickle

## Model 1

In [2]:
imagegen = ImageDataGenerator()
train = imagegen.flow_from_directory("data/train/", class_mode="categorical", shuffle=True, batch_size=64, target_size=(256, 256))
val = imagegen.flow_from_directory("data/validation/", class_mode="categorical", shuffle=True, batch_size=64, target_size=(256, 256))


Found 820 images belonging to 3 classes.
Found 423 images belonging to 3 classes.


In [11]:
model = Sequential()
model.add(InputLayer(input_shape=(256, 256, 3)))

model.add(Conv2D(25, (5, 5), activation='relu', strides=(1, 1), padding='same')) # add batch size and ES
model.add(MaxPool2D(pool_size=(2, 2), padding='same'))
model.add(Conv2D(50, (5, 5), activation='relu', strides=(2, 2), padding='same'))
model.add(MaxPool2D(pool_size=(2, 2), padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(70, (3, 3), activation='relu', strides=(2, 2), padding='same'))
model.add(MaxPool2D(pool_size=(2, 2), padding='valid'))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(units=3, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
model.fit_generator(train, epochs=30, validation_data=val)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x65a74bfd0>

## Model 2

In [None]:
imagegen = ImageDataGenerator()
train = imagegen.flow_from_directory("data/train/", class_mode="categorical", shuffle=True, batch_size=64, target_size=(256, 256))
val = imagegen.flow_from_directory("data/validation/", class_mode="categorical", shuffle=True, batch_size=64, target_size=(256, 256))


In [3]:
pretrained_model = VGG16(include_top=False, weights='imagenet')
pretrained_model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0     

In [4]:
vgg_features_train = pretrained_model.predict(train)
vgg_features_val = pretrained_model.predict(val)

In [5]:
train_target = to_categorical(train.labels)
val_target = to_categorical(val.labels)

In [12]:
model2 = Sequential()

model2.add(Conv2D(25, (5, 5), activation='relu', strides=(1, 1), padding='same', input_shape=(8,8,512)))
model2.add(MaxPool2D(pool_size=(2, 2), padding='same'))
model2.add(Conv2D(50, (5, 5), activation='relu', strides=(2, 2), padding='same'))
model2.add(MaxPool2D(pool_size=(2, 2), padding='same'))

model2.add(Flatten())
model2.add(Dense(100, activation='relu'))
model2.add(Dropout(0.5))
model2.add(BatchNormalization())
model2.add(Dense(3, activation='softmax'))

model2.compile(optimizer='adam', metrics=['accuracy'], loss='categorical_crossentropy')

model2.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 8, 8, 25)          320025    
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 4, 4, 25)          0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 2, 2, 50)          31300     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 1, 1, 50)          0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 50)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 100)               5100      
_________________________________________________________________
dropout_3 (Dropout)          (None, 100)              

In [13]:
model2.fit(vgg_features_train, train_target, epochs=50, batch_size=128, validation_data=(vgg_features_val, val_target))


Train on 820 samples, validate on 423 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.callbacks.History at 0x64a149ad0>

## Model 3

In [23]:
imagegen = ImageDataGenerator()
train = imagegen.flow_from_directory("data/train/", class_mode="categorical", shuffle=True, batch_size=32, target_size=(256, 256))
val = imagegen.flow_from_directory("data/validation/", class_mode="categorical", shuffle=True, batch_size=32, target_size=(256, 256))


Found 730 images belonging to 3 classes.
Found 365 images belonging to 3 classes.


In [24]:
model3 = Sequential()
model3.add(InputLayer(input_shape=(256, 256, 3)))

model3.add(Conv2D(25, (5, 5), activation='relu', strides=(1, 1), padding='same')) # add batch size and ES
model3.add(MaxPool2D(pool_size=(2, 2), padding='same'))
model3.add(Conv2D(50, (5, 5), activation='relu', strides=(2, 2), padding='same'))
model3.add(MaxPool2D(pool_size=(2, 2), padding='same'))
model3.add(BatchNormalization())
model3.add(Conv2D(70, (3, 3), activation='relu', strides=(2, 2), padding='same'))
model3.add(MaxPool2D(pool_size=(2, 2), padding='valid'))
model3.add(BatchNormalization())
model3.add(Flatten())
model3.add(Dense(units=100, activation='relu'))
model3.add(Dropout(0.25))
model3.add(Dense(units=100, activation='relu'))
model3.add(Dropout(0.25))
model3.add(Dense(units=3, activation='softmax'))

model3.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])

early_stop = EarlyStopping(monitor = "val_accuracy", 
                           patience = 10,
                           min_delta = 0,
                           restore_best_weights = True)

model3.fit_generator(train, epochs=30, validation_data=val, callbacks = [early_stop])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30


<keras.callbacks.callbacks.History at 0x653247a10>

**We kept Model 3 because it gave us the best results.**

## Running Model 3 on Test Data

In [25]:
test = imagegen.flow_from_directory("data/test/", class_mode=None, shuffle=False, batch_size=1, target_size=(256, 256))

test.reset()
pred = model3.predict_generator(test, verbose=1)

Found 8 images belonging to 1 classes.


In [26]:
predicted_class_indices=np.argmax(pred,axis=1)


In [36]:
train.class_indices

{'Damaged': 0, 'Destroyed': 1, 'Good': 2}

In [27]:
labels = (train.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [35]:
filenames = test.filenames
results = pd.DataFrame({"Filename": filenames,
                        "Predictions": predictions})
results.to_csv("test_results.csv", index = False)

## Save Model 3 and Train Indices

Reference for saving train indices:<br>
https://stackoverflow.com/questions/19201290/how-to-save-a-dictionary-to-a-file/32216025

In [42]:
def save_obj(obj, name):
    with open('model/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

save_obj(train.class_indices, "train_class_indices")

In [33]:
model3.save("./saved_model/final_model.h5")