# Train CNN-Model
## Extra images

This notebook will train our model based on pictures in our extra_images_sorted folder and sub-folders.

In [None]:
#load the modules
import keras
from keras import models, layers
from tensorflow.keras import regularizers
from keras.activations import relu, softmax
from tensorflow.keras.applications import VGG19
from keras.models import Sequential, load_model, Model
from tensorflow.keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Activation, Dropout, Dense, Flatten, concatenate
from matplotlib import pyplot as plt
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
import pandas as pd
import numpy as np
from PIL import Image
from tensorflow.keras.metrics import TopKCategoricalAccuracy

from sklearn.preprocessing import OneHotEncoder

sys.modules['Image'] = Image

In [None]:
# Keras' data generator can be used to pass the images through the convolutional neural network and apply
#rotation and zoom transformations to the images. Check https://keras.io/preprocessing/image/ for more transformations

train_data = ImageDataGenerator(
        rescale = 1./255,
        rotation_range=40,
        zoom_range=0.2,
        #width_shift_range=0.2,
        #height_shift_range=0.2,
        #shear_range=0.2,
        #fill_mode='nearest'
)

train_generator = train_data.flow_from_directory(
        directory=r"../sorted_extra_images/train",
        target_size=(224, 224),
        batch_size=100,
        shuffle=True)

In [None]:
#defining the validation data generator
val_data = ImageDataGenerator(rescale = 1./255)
                                 
val_generator = val_data.flow_from_directory(
        directory=r"../sorted_extra_images/val",
        target_size=(224, 224),
        batch_size=100,
        shuffle=True)

## Convolutional neural network: 

### VGG19

In [None]:
#load the pre-trained VGG19 from keras
vgg19 = VGG19(input_shape=(224,224,3), weights='imagenet', include_top=False)

'''
#the transfer learned model should be not trainable
for layer in vgg19.layers:
    layer.trainable = False
'''

x = vgg19.layers[-1].output
#add dropout and the fully connected layer
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(1024, activation='relu')(x)

#add a dense layer with a value equal to the number of classes
predictors = Dense(2231, activation='softmax')(x)
# Create the model
vgg19model = Model(vgg19.input, predictors)

In [None]:
# define where to save the model after each epoch
filepath = "../models/extra_images_vgg19_trainable2.h5"
# add a critera to save only if there was an improvement in the model comparing
# to the previous epoch (in this caset the model is saved if there was a decrease in the loss value)
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
# stop training if there is no improvement in model for 4 consecutives epochs.
early_stopping_monitor = EarlyStopping(patience=3)
callbacks_list = [checkpoint, early_stopping_monitor]

In [None]:
# Compile the model
vgg19model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=5e-5),#define the optimizer and the learning rate
              metrics=tf.keras.metrics.TopKCategoricalAccuracy(k=5))

In [None]:
model=load_model("../models/extra_images_vgg19_trainable.h5")

In [None]:
#train the model
batch_size=100
model_history=model.fit(
        train_generator,
        steps_per_epoch=8427//batch_size,#number of pictures in training data set divided by the batch size
        epochs=25,
        validation_data=val_generator,
        validation_steps= 2231// batch_size,#number of pictures in validation data set divided by the batch size
        callbacks=callbacks_list)

### A second iteration with smaller learning rate 

In [None]:
# define where to save the model after each epoch
filepath = "../models/Inception_model_lre-6.h5"
# add a critera to save only if there was an improvement in the model comparing
# to the previous epoch (in this caset the model is saved if there was a decrease in the loss value)
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
# stop training if there is no improvement in model for 3 consecutives epochs.
early_stopping_monitor = EarlyStopping(patience=3)
callbacks_list = [checkpoint, early_stopping_monitor]

In [None]:
#load the model
model=load_model("../models/Inception_model.h5")

# Compile the model
model.compile(loss='categorical_crossentropy',
             optimizer=SGD(lr=1e-6),
             metrics=tf.keras.metrics.TopKCategoricalAccuracy(k=5))
#train the model
batch_size=64
model_history_2=model.fit_generator(
        train_generator,
    #! BEWARE: steps_per_epoch needs to be adapted: containing number of images in train // batch_size
        steps_per_epoch=1822//batch_size,
        epochs=20,
        validation_data=val_generator,
        validation_steps= 300// batch_size,
        callbacks=callbacks_list)

In [None]:
#size of the plots
fig=plt.figure(figsize=(15,5))
columns = 2
rows = 1

#plot loss
#the accuracy and loss are stored in the "model_history"
fig.add_subplot(rows, columns, 1)
plt.plot(model_history.history['loss']+ model_history_2.history['loss']) #merge the loss from the two training steps
plt.plot(model_history.history['val_loss']+ model_history_2.history['val_loss'])
plt.title('loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')