# This is a short notebook to show sample code for my master's application to MILA. For this sample, I decided to show case the training of a DenseNet model. 

## Thank you for taking the time to read this. Feel free to reach out if you have any questions or comments regarding this. 

#### Vincent Dandenault - vincent.dandenault@polymtl.ca 

## Import

In [None]:
%autosave 30

In [28]:
%%capture 
!pip install keras_tqdm
!pip install utils2
!pip install keras

In [40]:
%%capture 
#Tools
import importlib
import utils2
from utils2 import *
import os 
import cv2
import json
from multiprocessing import Process
from multiprocessing import Queue

#tf
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import datasets, layers, models

#Keras 
import keras as keras
from keras import regularizers
from keras.layers import concatenate
from keras.models import Model
from keras_tqdm import TQDMNotebookCallback
from keras.models import load_model
from keras.layers.merge import add, concatenate, Multiply
from keras import regularizers
from keras.utils import to_categorical

# Libs
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

#sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

#SciPy
from scipy.ndimage import gaussian_filter
from scipy.ndimage import map_coordinates

## Import Data 

In [80]:
PATH = ''

In [81]:
def load_data():
    (train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
    return (train_images, train_labels), (test_images, test_labels)

In [82]:
 (x_train, y_train), (x_test, y_test) = load_data()

In [105]:
print(y_train.shape)

(50000, 1)


In [114]:
class Dataset:
    def __init__(self, name, preprocess = False, amplitude = [5], stdDev = [2], rotate_augment = False, angles = [60]):
        self.name = name
        self.isPreprocessed = preprocess
        self.amplitude = amplitude
        self.stdDev = stdDev
        self.angles = angles
        self.IMAGE_SIZE_CROP = 32
        self.IMAGE_SIZE = 100
        
        (x_train, y_train), (x_test, y_test) = load_data()
        
        x_train = x_train/255.
        x_test = x_test/255.
        
        self.X_train = x_train
        
        self.oneHotTransformer = OneHotEncoder(handle_unknown='ignore', sparse = False)
        
        self.Y_train = self.oneHotTransformer.fit_transform(y_train.reshape(-1, 1))
        self.y_test = self.oneHotTransformer.fit_transform(y_test.reshape(-1, 1))
    
        self.splitData()
        
        if rotate_augment == True:
            print("Augmentation of the dataset by rotation")
            Y_list = []
            q = Queue()
            p_ = []
            for angle in self.angles:
                p = Process(target=self.applyRotationToDataset, args=(X_train_original, angle, q,))
                p_.append(p)
                p.start()
                print('*', end='')
                Y = np.asarray(list(Y_train_original))
                Y_list.append(Y)
                print('\n preparing for join')
      
            for i in range(0,len(Y_list)):
                self.X_train = np.asarray(list(self.X_train) + list(q.get()))
                self.Y_train = np.asarray( list(self.Y_train) + list(Y_list[i]))

            for p in p_:
                p.join()
                print('*', end='')
        
    def convertData(self, data):
        images = []
        for dataLine in data:
            if self.isPreprocessed == False:
                images.append(np.reshape(dataLine[1], (self.IMAGE_SIZE, self.IMAGE_SIZE, 1)))
            else:
                images.append(np.reshape(dataLine, (self.IMAGE_SIZE_CROP, self.IMAGE_SIZE_CROP, 1)))
        
        return np.asarray(images)

    def visualizeData(self, indexBegin, indexEnd, dataset = 'X_train'):
        if dataset == 'X_train':
            data = self.X_train
        elif dataset == 'X_valid':
            data = self.X_valid
        elif dataset == 'X_test':
            data = self.X_test
        elif dataset == 'X_submission':
            data = self.X_submission
    
        for index in range(indexBegin, indexEnd):
            plt.imshow(np.reshape(data[index], (self.IMAGE_SIZE_CROP,self.IMAGE_SIZE_CROP)), cmap='Greys')
            plt.show()

    def splitData(self):
        #self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(self.X_train, self.Y_train, test_size=0.10, random_state=42)
        self.X_train, self.X_valid, self.Y_train, self.Y_valid = train_test_split(self.X_train, self.Y_train, test_size=0.20, random_state=42)
  
    def applyRotationToDataset(self, dataset, angle, q):
        rotated_dataset = []
        for img in dataset:
            img = np.reshape(img, (self.IMAGE_SIZE_CROP, self.IMAGE_SIZE_CROP))
            img = self.rotate_image(img, angle)
            rotated_dataset.append(np.reshape(img, (self.IMAGE_SIZE_CROP, self.IMAGE_SIZE_CROP, 1)))
        q.put(np.asarray(rotated_dataset))

    def rotate_image(self, image, angle):
        center = tuple(np.array(image.shape[1::-1]) / 2)
        matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
        result = cv2.warpAffine(image, matrix, image.shape[1::-1], flags=cv2.INTER_LINEAR)
        return result


In [115]:
dataset = Dataset('Data', False, amplitude = None, stdDev = None, rotate_augment = False, angles = None)

In [118]:
print(dataset.Y_train[1])

[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]


## Model 

In [119]:
class TrainableModel:
    def __init__(self, dataset, model, name = "", patience = 15):
        self.name = name
        self.dataset = dataset
        self.model = model
        self.es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=patience)
        self.mc = ModelCheckpoint(PATH + 'best_model.h5', monitor='val_loss', mode='min', save_best_only=True)
        self.optimizer = 'adam'
        self.compileModel()

    def evaluate(self):
        #self.loadBestModel()
        results = self.model.evaluate(self.dataset.X_test, self.dataset.Y_test, batch_size=128)
        self.test_accuracy = results[1]
        print(results)
        with open(PATH + 'History_' + str(self.name) + '_' + str(self.dataset.name) + '_' + str(self.test_accuracy) + '.json', 'w') as f:
            json.dump(self.history.history, f)
    
    def compileModel(self):
        self.model.compile(optimizer= self.optimizer, loss=tf.keras.losses.categorical_crossentropy, metrics=['accuracy'])
    
    def softmaxVectorToOneHot(self, vector):
        index = np.argmax(vector, axis=0)
        oneHot = np.zeros(31)
        oneHot[index] = 1
        return oneHot

    def submission(self):
        #self.loadBestModel()
        Y_predicted = self.model.predict(self.dataset.X_submission)
        labels = []
        for line in Y_predicted:
            labels.append(self.dataset.oneHotTransformer.inverse_transform(self.softmaxVectorToOneHot(line).reshape(1, -1))[0][0])
        df = pd.DataFrame()
        df['Category'] = labels
        df.insert(0, 'Id', range(0, len(df)))
        df.to_csv(PATH + 'Submission-' + str(self.test_accuracy) + '.csv',index=False)

    def fit(self, num_epochs = 300, batchSize = 32):
        X = self.dataset.X_train
        Y = self.dataset.Y_train
        X_valid = self.dataset.X_valid
        Y_valid = self.dataset.Y_valid
        self.history = self.model.fit(X, Y, epochs=num_epochs, batch_size = batchSize, validation_data=(X_valid, Y_valid), callbacks=[self.es, self.mc])

    def loadBestModel(self):
        self.model = load_model(PATH + 'best_model.h5')

    def fitOverAllData(self, num_epochs = 5):
        X = self.dataset.X_ALL
        Y = self.dataset.Y_ALL
        self.history = self.model.fit(X, Y, epochs=num_epochs)

    def graphFittingLoss(self):
        plt.plot(self.history.history['loss'])
        plt.plot(self.history.history['val_loss'])
        plt.title('Model loss over the training steps')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        plt.show()

    def graphFittingAccuracy(self):
        plt.plot(self.history.history['accuracy'])
        plt.plot(self.history.history['val_accuracy'])
        plt.title('Model accuracy over the training steps')
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')

## Dense Net 

In [120]:
RUN = True

In [121]:
#This implementation of the DenseNet architecture is based on the fastai course
#https://github.com/fastai/courses/blob/master/deeplearning2/densenet-keras.ipynb

def convolution(x, numberFilters, filterSize, weigthDecay, p):
    x = layers.Conv2D(numberFilters, (filterSize, filterSize), kernel_initializer='uniform', padding='same', kernel_regularizer = tf.keras.regularizers.l2(weigthDecay))(x)
    return layers.Dropout(p)(x) if p else x

def convolutionBlock(x, numberFilters, bottleneck=False, p=None, weigthDecay=0):
    x = layers.Activation('relu')(layers.BatchNormalization(axis=-1)(x))
    if bottleneck: x = layers.Activation('relu')(layers.BatchNormalization(axis=-1)(convolution(x, numberFilters * 4, 1, weigthDecay, p)))
    return convolution(x, numberFilters, 3, weigthDecay, p)

def denselyConnectedBlock(x, numberOfLayers, growthRate, bottleneck=False, p=None, weigthDecay=0):
    if bottleneck: numberOfLayers //= 2
    for i in range(numberOfLayers):
        b = convolutionBlock(x, growthRate, bottleneck=bottleneck, p=p, weigthDecay=weigthDecay)
        x = concatenate([x,b], -1)
    return x

def TransBlock(x, compression=1.0, p=None, weigthDecay=0):
    numberFilters = int(x.get_shape().as_list()[-1] * compression)
    x = layers.Activation('relu')(layers.BatchNormalization(axis=-1)(x))
    x = convolution(x, numberFilters, 1, weigthDecay, p)
    return layers.AveragePooling2D((2, 2), strides=(2, 2))(x)

def Dense_Net_INIT(input, numberOfLabels, depth=40, blockCount=3, growthRate=12, filterCount=16, bottleneck=False, compression=1.0, p=None, weigthDecay=0):
    numberOfLayers = [int((depth - 4) / blockCount)] * blockCount

    x = convolution(input, filterCount, 3, weigthDecay, 0)
    for i,block in enumerate(numberOfLayers):
        x = denselyConnectedBlock(x, block, growthRate, bottleneck=bottleneck, p=p, weigthDecay=weigthDecay)
        if i != len(numberOfLayers)-1:
            x = TransBlock(x, compression=compression, p=p, weigthDecay=weigthDecay)

    x = layers.Activation('relu')(layers.BatchNormalization(axis=-1)(x))
    x = layers.GlobalAveragePooling2D()(x)
    return layers.Dense(numberOfLabels, activation='softmax', kernel_regularizer=regularizers.l2(weigthDecay))(x)

In [122]:
input = layers.Input(shape=(32, 32, 3))
output = Dense_Net_INIT(input, 10, depth = 100, growthRate=36)
model = models.Model(input, output)

In [None]:
if RUN:
    denseNet = TrainableModel(dataset, model, name = "DenseNet")
    denseNet.optimizer = tf.keras.optimizers.SGD(0.1, 0.9, nesterov=True)
    denseNet.compileModel()
    denseNet.fit(num_epochs = 100)
    denseNet.evaluate()
    denseNet.submission()

Epoch 1/100
  45/1250 [>.............................] - ETA: 10:42:16 - loss: 7.9719 - accuracy: 0.1319