In [2]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
import os
import tensorflow as tf
import keras

In [3]:
# X will be our dataset
# it must be edit with the right path and dataset name
X = pd.read_csv('../input/laminar22500-c-n/lami.csv')

y = X[['target']]
X = X.drop(['target'], axis= 1)

X = np.array(X)
y = y.iloc[:,0].values

# dummy_y is the one-hot encoding of the categorical variable y
dummy_y = np_utils.to_categorical(y)

In [None]:
# Crossvalidation on the units of the first layer with an architecture with only one hidden layer

# define baseline model with only one hidden layer. This reflects The Universal Approximation Theorem,
# which states that a neural network with 1 hidden layer can approximate any continuous function 
# for inputs within a specific range. So, this is the starting point for the model. The last
# dense will have a number of units equal to the number of classes, while the only hidden layer
# will have a number of units to be decided with crossvalidation.
# In this cell and in the following one we will always use a k-fold crossvalidation with K = 5 and
# we will always value the validation accuracy. 
# Notice that we computed for each model the mean among the results of the K different splits.
# Notice also that we will use the ealy stopping to stop the training when the validation error 
# increases for a certain number (patience) of iterations. Finally, it restores the weigths of
# the best iteration.

# results:
# units  val_accuracy
# 100    0.6348888874053955
# 500    0.6449333310127259
# 1000   0.6485777735710144

es = EarlyStopping(monitor='val_accuracy', 
                           mode='max',
                           patience=8, 
                           restore_best_weights=True) 

def baseline_model(y):
    # create model
    model = Sequential([
        Dense(y, activation="relu"),
        Dense(5, activation='softmax'),
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

num_epochs = 25
kf = KFold(n_splits = 5, shuffle=True)
n = X.shape[0]

HistoriesOfStories = []

callbacks_list = [es]
units = [100,500,1000]

for i in units:
    print("evaluating: ",i)
    count = 0
    Histories = []
    for train_index, val_index in kf.split(np.zeros(n),y):
        
        print("count: ",count)
        
        X_train = X[train_index,:]
        X_test = X[val_index,:]

        y_train = dummy_y[train_index,:]
        y_test  = dummy_y[val_index,:]

        model = baseline_model(i)

        # FIT THE MODEL
        history = model.fit(X_train,
                            y_train,
                            epochs=num_epochs,
                            callbacks=callbacks_list,
                            validation_data=(X_test,y_test),
                            batch_size=10, 
                            verbose=1)


        # LOAD BEST MODEL to evaluate the performance of the model
        #model.load_weights("/saved_models/model_"+str(fold_var)+".h5")

        Histories.append(history)

        tf.keras.backend.clear_session()
        count = count + 1
        
    HistoriesOfStories.append(Histories)
    
lista = []
for ele in HistoriesOfStories:
    lista2 = []
    for ala in ele:
        lista2.append(np.max(ala.history["val_accuracy"]))
    lista.append(np.mean(lista2))
for i in np.arange(0,len(units)):
    print(units[i], " ", lista[i])


In [None]:
# Crossvalidation on the units of the first and second layers with an architecture with two hidden layers

# results:
# units  val_accuracy
# 100    0.6789333343505859
# 500    0.6940444350242615 
# 1000   0.6965333342552185
# 5000   0.6961777687072754

es = EarlyStopping(monitor='val_accuracy', 
                           mode='max',
                           patience=8, 
                           restore_best_weights=True) 

def baseline_model(y):
    # create model
    model = Sequential([
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(5, activation='softmax'),
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

num_epochs = 25
kf = KFold(n_splits = 5, shuffle=True)
n = X.shape[0]

HistoriesOfStories = []

callbacks_list = [es]
units = [100,500,1000]

for i in units:
    print("evaluating: ",i)
    count = 0
    Histories = []
    for train_index, val_index in kf.split(np.zeros(n),y):
        
        print("count: ",count)
        
        X_train = X[train_index,:]
        X_test = X[val_index,:]

        y_train = dummy_y[train_index,:]
        y_test  = dummy_y[val_index,:]

        model = baseline_model(i)

        # FIT THE MODEL
        history = model.fit(X_train,
                            y_train,
                            epochs=num_epochs,
                            callbacks=callbacks_list,
                            validation_data=(X_test,y_test),
                            batch_size=10, 
                            verbose=1)


        # LOAD BEST MODEL to evaluate the performance of the model
        #model.load_weights("/saved_models/model_"+str(fold_var)+".h5")

        Histories.append(history)

        tf.keras.backend.clear_session()
        count = count + 1
        
    HistoriesOfStories.append(Histories)
    
lista = []
for ele in HistoriesOfStories:
    lista2 = []
    for ala in ele:
        lista2.append(np.max(ala.history["val_accuracy"]))
    lista.append(np.mean(lista2))
for i in np.arange(0,len(units)):
    print(units[i], " ", lista[i])
    

In [None]:
# Crossvalidation on the probability of the dropout with an architecture with two hidden dense layers

# here we are trying to evaluate a model with 2 hidden layers and a dropout before the output layer. 
# we can notice from the results that the dropout doesn't help

# results:
# prob  val_accuracy
# 0.0   0.6957777857780456
# 0.25   0.6911110997200012
# 0.5   0.6764444351196289

es = EarlyStopping(monitor='val_accuracy', 
                           mode='max',
                           patience=8, 
                           restore_best_weights=True) 

def baseline_model(y,x):
    # create model
    model = Sequential([
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dropout(x),
        Dense(5, activation='softmax'),
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

num_epochs = 25
kf = KFold(n_splits = 5, shuffle=True)
n = X.shape[0]

HistoriesOfStories = []

callbacks_list = [es]
prob = np.linspace(0,1,5).tolist()[0:3]

for i in prob:
    print("evaluating: ",i)
    count = 0
    Histories = []
    for train_index, val_index in kf.split(np.zeros(n),y):
        
        print("count: ",count)
        
        X_train = X[train_index,:]
        X_test = X[val_index,:]

        y_train = dummy_y[train_index,:]
        y_test  = dummy_y[val_index,:]

        model = baseline_model(500,i)

        # FIT THE MODEL
        history = model.fit(X_train,
                            y_train,
                            epochs=num_epochs,
                            callbacks=callbacks_list,
                            validation_data=(X_test,y_test),
                            batch_size=10, 
                            verbose=1)


        # LOAD BEST MODEL to evaluate the performance of the model
        #model.load_weights("/saved_models/model_"+str(fold_var)+".h5")

        Histories.append(history)

        tf.keras.backend.clear_session()
        count = count + 1
        
    HistoriesOfStories.append(Histories)
    
lista = []
for ele in HistoriesOfStories:
    lista2 = []
    for ala in ele:
        lista2.append(np.max(ala.history["val_accuracy"]))
    lista.append(np.mean(lista2))
for i in np.arange(0,len(prob)):
    print(prob[i], " ", lista[i])
    

In [None]:
# Crossvalidation on the probability of the dropout with an architecture with two hidden dense layers

# here we are trying to evaluate a model with 2 hidden layers and a dropout between the two hidden layers.
# we can notice from the results that the dropout doesn't help

# results:
# prob  val_accuracy
# 0.0   0.6952444434165954
# 0.25   0.6835999965667725
# 0.5   0.6672000050544739

es = EarlyStopping(monitor='val_accuracy', 
                           mode='max',
                           patience=8, 
                           restore_best_weights=True) 

def baseline_model(y,x):
    # create model
    model = Sequential([
        Dense(y, activation="relu"),
        Dropout(x),
        Dense(y, activation="relu"),
        Dense(5, activation='softmax'),
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

num_epochs = 25
kf = KFold(n_splits = 5, shuffle=True)
n = X.shape[0]

HistoriesOfStories = []

callbacks_list = [es]
prob = np.linspace(0,1,5).tolist()[0:3]

for i in prob:
    print("evaluating: ",i)
    count = 0
    Histories = []
    for train_index, val_index in kf.split(np.zeros(n),y):
        
        print("count: ",count)
        
        X_train = X[train_index,:]
        X_test = X[val_index,:]

        y_train = dummy_y[train_index,:]
        y_test  = dummy_y[val_index,:]

        model = baseline_model(500,i)

        # FIT THE MODEL
        history = model.fit(X_train,
                            y_train,
                            epochs=num_epochs,
                            callbacks=callbacks_list,
                            validation_data=(X_test,y_test),
                            batch_size=10, 
                            verbose=1)


        # LOAD BEST MODEL to evaluate the performance of the model
        #model.load_weights("/saved_models/model_"+str(fold_var)+".h5")

        Histories.append(history)

        tf.keras.backend.clear_session()
        count = count + 1
        
    HistoriesOfStories.append(Histories)
    
lista = []
for ele in HistoriesOfStories:
    lista2 = []
    for ala in ele:
        lista2.append(np.max(ala.history["val_accuracy"]))
    lista.append(np.mean(lista2))
for i in np.arange(0,len(prob)):
    print(prob[i], " ", lista[i])
    
    

In [None]:
# Crossvalidation on the units of the three hidden dense layers

# we can see that it's performing better than the previous architectures

# results:
# units  val_accuracy
# 100   0.7027111053466797
# 500   0.7086666703224183
# 1000   0.7111555576324463

es = EarlyStopping(monitor='val_accuracy', 
                           mode='max',
                           patience=8, 
                           restore_best_weights=True) 

def baseline_model(y):
    # create model
    model = Sequential([
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(5, activation='softmax'),
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

num_epochs = 25
kf = KFold(n_splits = 5, shuffle=True)
n = X.shape[0]

HistoriesOfStories = []

callbacks_list = [es]
units = [100,500,1000]

for i in units:
    print("evaluating: ",i)
    count = 0
    Histories = []
    for train_index, val_index in kf.split(np.zeros(n),y):
        
        print("count: ",count)
        
        X_train = X[train_index,:]
        X_test = X[val_index,:]

        y_train = dummy_y[train_index,:]
        y_test  = dummy_y[val_index,:]

        model = baseline_model(i)

        # FIT THE MODEL
        history = model.fit(X_train,
                            y_train,
                            epochs=num_epochs,
                            callbacks=callbacks_list,
                            validation_data=(X_test,y_test),
                            batch_size=10, 
                            verbose=1)


        # LOAD BEST MODEL to evaluate the performance of the model
        #model.load_weights("/saved_models/model_"+str(fold_var)+".h5")

        Histories.append(history)

        tf.keras.backend.clear_session()
        count = count + 1
        
    HistoriesOfStories.append(Histories)
    
lista = []
for ele in HistoriesOfStories:
    lista2 = []
    for ala in ele:
        lista2.append(np.max(ala.history["val_accuracy"]))
    lista.append(np.mean(lista2))
for i in np.arange(0,len(units)):
    print(units[i], " ", lista[i])
    

In [None]:
# Crossvalidation on the units of the four hidden dense layers

# we can see that it's performing better than the previous architectures and units = 500 has
# the best performance in val_accuracy

# results:
# units  val_accuracy
# 100   0.7044444441795349
# 500   0.7208444476127625
# 1000   0.7123555541038513

es = EarlyStopping(monitor='val_accuracy', 
                           mode='max',
                           patience=8, 
                           restore_best_weights=True) 

def baseline_model(y):
    # create model
    model = Sequential([
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(5, activation='softmax'),
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

num_epochs = 25
kf = KFold(n_splits = 5, shuffle=True)
n = X.shape[0]

HistoriesOfStories = []

callbacks_list = [es]
units = [100,500,1000]

for i in units:
    print("evaluating: ",i)
    count = 0
    Histories = []
    for train_index, val_index in kf.split(np.zeros(n),y):
        
        print("count: ",count)
        
        X_train = X[train_index,:]
        X_test = X[val_index,:]

        y_train = dummy_y[train_index,:]
        y_test  = dummy_y[val_index,:]

        model = baseline_model(i)

        # FIT THE MODEL
        history = model.fit(X_train,
                            y_train,
                            epochs=num_epochs,
                            callbacks=callbacks_list,
                            validation_data=(X_test,y_test),
                            batch_size=10, 
                            verbose=1)


        # LOAD BEST MODEL to evaluate the performance of the model
        #model.load_weights("/saved_models/model_"+str(fold_var)+".h5")

        Histories.append(history)

        tf.keras.backend.clear_session()
        count = count + 1
        
    HistoriesOfStories.append(Histories)
    
lista = []
for ele in HistoriesOfStories:
    lista2 = []
    for ala in ele:
        lista2.append(np.max(ala.history["val_accuracy"]))
    lista.append(np.mean(lista2))
for i in np.arange(0,len(units)):
    print(units[i], " ", lista[i])

In [None]:
# Crossvalidation on the units of the five hidden dense layers

# Notice that in this case the results are worse than those of the architecture with 4 hidden dense layers
# So, we can see that we can stop around four or five hidden dense layers. In the next cell we will try to
# see if it possibile to increase the val_accuracy on the 5-dense-layers architecture with some dropout layer.

# results:
# units  val_accuracy
# 100   0.7143555521965027
# 500   0.7064888834953308
# 1000   0.7032888889312744


es = EarlyStopping(monitor='val_accuracy', 
                           mode='max',
                           patience=8, 
                           restore_best_weights=True) 

def baseline_model(y):
    # create model
    model = Sequential([
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(5, activation='softmax'),
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

num_epochs = 25
kf = KFold(n_splits = 5, shuffle=True)
n = X.shape[0]

HistoriesOfStories = []

callbacks_list = [es]
units = [100,500,1000]

for i in units:
    print("evaluating: ",i)
    count = 0
    Histories = []
    for train_index, val_index in kf.split(np.zeros(n),y):
        
        print("count: ",count)
        
        X_train = X[train_index,:]
        X_test = X[val_index,:]

        y_train = dummy_y[train_index,:]
        y_test  = dummy_y[val_index,:]

        model = baseline_model(i)

        # FIT THE MODEL
        history = model.fit(X_train,
                            y_train,
                            epochs=num_epochs,
                            callbacks=callbacks_list,
                            validation_data=(X_test,y_test),
                            batch_size=10, 
                            verbose=1)


        # LOAD BEST MODEL to evaluate the performance of the model
        #model.load_weights("/saved_models/model_"+str(fold_var)+".h5")

        Histories.append(history)

        tf.keras.backend.clear_session()
        count = count + 1
        
    HistoriesOfStories.append(Histories)
    
lista = []
for ele in HistoriesOfStories:
    lista2 = []
    for ala in ele:
        lista2.append(np.max(ala.history["val_accuracy"]))
    lista.append(np.mean(lista2))
for i in np.arange(0,len(units)):
    print(units[i], " ", lista[i])


In [None]:
# Crossvalidation on the probability of the dropout layer in an architecture with 5 hidden dense layers

# Notice that in this case we will use units = 100, because it showed the best results in the previous crossvalidation
# on the architecture with 5 hidden dense layers. But it doesn't help.

# results:
# prob  val_accuracy
# 0.2   0.7085333347320557
# 0.4   0.6967111110687256
# 0.6   0.6939555525779724


es = EarlyStopping(monitor='val_accuracy', 
                           mode='max',
                           patience=8, 
                           restore_best_weights=True) 

def baseline_model(y,x):
    # create model
    model = Sequential([
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dropout(x),
        Dense(y, activation="relu"),
        Dense(5, activation='softmax'),
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

num_epochs = 25
kf = KFold(n_splits = 5, shuffle=True)
n = X.shape[0]

HistoriesOfStories = []

callbacks_list = [es]
prob = [0.2,0.4,0.6]

for i in prob:
    print("evaluating: ",i)
    count = 0
    Histories = []
    for train_index, val_index in kf.split(np.zeros(n),y):
        
        print("count: ",count)
        
        X_train = X[train_index,:]
        X_test = X[val_index,:]

        y_train = dummy_y[train_index,:]
        y_test  = dummy_y[val_index,:]

        model = baseline_model(100,i)

        # FIT THE MODEL
        history = model.fit(X_train,
                            y_train,
                            epochs=num_epochs,
                            callbacks=callbacks_list,
                            validation_data=(X_test,y_test),
                            batch_size=10, 
                            verbose=1)


        # LOAD BEST MODEL to evaluate the performance of the model
        #model.load_weights("/saved_models/model_"+str(fold_var)+".h5")

        Histories.append(history)

        tf.keras.backend.clear_session()
        count = count + 1
        
    HistoriesOfStories.append(Histories)
    
lista = []
for ele in HistoriesOfStories:
    lista2 = []
    for ala in ele:
        lista2.append(np.max(ala.history["val_accuracy"]))
    lista.append(np.mean(lista2))
for i in np.arange(0,len(prob)):
    print(prob[i], " ", lista[i])

In [None]:
# Crossvalidation on the probability of the dropout layer in an architecture with 4 hidden dense layers

# Notice that in this case we will use units = 500, because it showed the best results in the previous crossvalidation
# on the architecture with 4 hidden dense layers. But it doesn't help. So, we will not use any dropout layers. 

# results:
# prob  val_accuracy
# 0.2   0.7083555698394776
# 0.4   0.7010222196578979
# 0.6   0.6789777755737305

es = EarlyStopping(monitor='val_accuracy', 
                           mode='max',
                           patience=8, 
                           restore_best_weights=True) 

def baseline_model(y,x):
    # create model
    model = Sequential([
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dropout(x),
        Dense(y, activation="relu"),
        Dense(5, activation='softmax'),
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

num_epochs = 25
kf = KFold(n_splits = 5, shuffle=True)
n = X.shape[0]

HistoriesOfStories = []

callbacks_list = [es]
prob = [0.2,0.4,0.6]

for i in prob:
    print("evaluating: ",i)
    count = 0
    Histories = []
    for train_index, val_index in kf.split(np.zeros(n),y):
        
        print("count: ",count)
        
        X_train = X[train_index,:]
        X_test = X[val_index,:]

        y_train = dummy_y[train_index,:]
        y_test  = dummy_y[val_index,:]

        model = baseline_model(500,i)

        # FIT THE MODEL
        history = model.fit(X_train,
                            y_train,
                            epochs=num_epochs,
                            callbacks=callbacks_list,
                            validation_data=(X_test,y_test),
                            batch_size=10, 
                            verbose=1)


        # LOAD BEST MODEL to evaluate the performance of the model
        #model.load_weights("/saved_models/model_"+str(fold_var)+".h5")

        Histories.append(history)

        tf.keras.backend.clear_session()
        count = count + 1
        
    HistoriesOfStories.append(Histories)
    
lista = []
for ele in HistoriesOfStories:
    lista2 = []
    for ala in ele:
        lista2.append(np.max(ala.history["val_accuracy"]))
    lista.append(np.mean(lista2))
for i in np.arange(0,len(prob)):
    print(prob[i], " ", lista[i])
    

In [None]:
# Crossvalidation on the batch_size in an architecture with 4 hidden dense layers with 500 units each

# In this case we want to choose the batch_size, starting from the stochastic gradient descent (batch_size = 1) 
# to the mini-batch gradeint descent (batch_size = 40). Given this large dataset, the batch size equal to ten seems a good trade-off
# between training speed and val_accuracy.

# results:
# batch_size  val_accuracy
# 1           0.644266664981842
# 5           0.645377779006958
# 10          0.6481777906417847
# 20          0.6420444369316101
# 40          0.6357777714729309


es = EarlyStopping(monitor='val_accuracy', 
                           mode='max',
                           patience=8, 
                           restore_best_weights=True) 

def baseline_model(y):
    # create model
    model = Sequential([
        Dense(y, activation="relu"),
        Dense(5, activation='softmax'),
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

num_epochs = 25
kf = KFold(n_splits = 5, shuffle=True)
n = X.shape[0]

HistoriesOfStories = []

callbacks_list = [es]
batch_size = [1,5,10,20,40]

for i in batch_size:
    print("evaluating: ",i)
    count = 0
    Histories = []
    for train_index, val_index in kf.split(np.zeros(n),y):
        
        print("count: ",count)
        
        X_train = X[train_index,:]
        X_test = X[val_index,:]

        y_train = dummy_y[train_index,:]
        y_test  = dummy_y[val_index,:]

        model = baseline_model(500)

        # FIT THE MODEL
        history = model.fit(X_train,
                            y_train,
                            epochs=num_epochs,
                            callbacks=callbacks_list,
                            validation_data=(X_test,y_test),
                            batch_size=i, 
                            verbose=1)


        # LOAD BEST MODEL to evaluate the performance of the model
        #model.load_weights("/saved_models/model_"+str(fold_var)+".h5")

        Histories.append(history)

        tf.keras.backend.clear_session()
        count = count + 1
        
    HistoriesOfStories.append(Histories)
    
lista = []
for ele in HistoriesOfStories:
    lista2 = []
    for ala in ele:
        lista2.append(np.max(ala.history["val_accuracy"]))
    lista.append(np.mean(lista2))
for i in np.arange(0,len(batch_size)):
    print(batch_size[i], " ", lista[i])


In [None]:
# Crossvalidation on the learning rate 

# Up to now the best model has: batch size = 10, 4 layers each of 500 units and no dropout layers
# Now, we are going to tune one of the most important parameter: the learning rate. The deafault
# learning rate is 0.001, but we are going to realize a grid with learning rate that goes from
# 10^-1 to 10^-5. Notice that the optimizer used is Adam.
# In this case the best learning rate is 0.0001.

# results:
# learning_rate  val_accuracy
# 0.1            0.20764444172382354
# 0.01           0.6019999980926514
# 0.001          0.7085777640342712
# 0.0001         0.7168444395065308
# 1e-05          0.6318666696548462
# 1e-06          0.5545777916908264


es = EarlyStopping(monitor='val_accuracy', 
                           mode='max',
                           patience=8, 
                           restore_best_weights=True) 

def baseline_model(y,x):
    # create model
    model = Sequential([
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(5, activation='softmax'),
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer=x, metrics=['accuracy'])
    return model

num_epochs = 25
kf = KFold(n_splits = 5, shuffle=True)
n = X.shape[0]

HistoriesOfStories = []

callbacks_list = [es]
lr = [10**float(i) for i in np.arange(-1,-7,-1)]

for i in lr:
    print("evaluating: ",i)
    count = 0
    Histories = []
    for train_index, val_index in kf.split(np.zeros(n),y):
        
        print("count: ",count)
        
        X_train = X[train_index,:]
        X_test = X[val_index,:]

        y_train = dummy_y[train_index,:]
        y_test  = dummy_y[val_index,:]

        opt = tf.keras.optimizers.Adam(learning_rate=i)
        model = baseline_model(500,opt)

        # FIT THE MODEL
        history = model.fit(X_train,
                            y_train,
                            epochs=num_epochs,
                            callbacks=callbacks_list,
                            validation_data=(X_test,y_test),
                            batch_size=10, 
                            verbose=1)


        # LOAD BEST MODEL to evaluate the performance of the model
        #model.load_weights("/saved_models/model_"+str(fold_var)+".h5")

        Histories.append(history)

        tf.keras.backend.clear_session()
        count = count + 1
        
    HistoriesOfStories.append(Histories)
    
lista = []
for ele in HistoriesOfStories:
    lista2 = []
    for ala in ele:
        lista2.append(np.max(ala.history["val_accuracy"]))
    lista.append(np.mean(lista2))
for i in np.arange(0,len(lr)):
    print(lr[i], " ", lista[i])


In [None]:
# Crossvalidation on the learning rate around the previous best result

# In this case the best learning rate is 0.0005.

# results:
# learning_rate  val_accuracy
# 0.0005   0.7325333237648011
# 5e-05    0.6845333337783813

es = EarlyStopping(monitor='val_accuracy', 
                           mode='max',
                           patience=8, 
                           restore_best_weights=True) 

def baseline_model(y,x):
    # create model
    model = Sequential([
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(5, activation='softmax'),
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer=x, metrics=['accuracy'])
    return model

num_epochs = 25
kf = KFold(n_splits = 5, shuffle=True)
n = X.shape[0]

HistoriesOfStories = []

callbacks_list = [es]
lr = [0.0005,0.00005 ]

for i in lr:
    print("evaluating: ",i)
    count = 0
    Histories = []
    for train_index, val_index in kf.split(np.zeros(n),y):
        
        print("count: ",count)
        
        X_train = X[train_index,:]
        X_test = X[val_index,:]

        y_train = dummy_y[train_index,:]
        y_test  = dummy_y[val_index,:]

        opt = tf.keras.optimizers.Adam(learning_rate=i)
        model = baseline_model(500,opt)

        # FIT THE MODEL
        history = model.fit(X_train,
                            y_train,
                            epochs=num_epochs,
                            callbacks=callbacks_list,
                            validation_data=(X_test,y_test),
                            batch_size=10, 
                            verbose=1)


        # LOAD BEST MODEL to evaluate the performance of the model
        #model.load_weights("/saved_models/model_"+str(fold_var)+".h5")

        Histories.append(history)

        tf.keras.backend.clear_session()
        count = count + 1
        
    HistoriesOfStories.append(Histories)
    
lista = []
for ele in HistoriesOfStories:
    lista2 = []
    for ala in ele:
        lista2.append(np.max(ala.history["val_accuracy"]))
    lista.append(np.mean(lista2))
for i in np.arange(0,len(lr)):
    print(lr[i], " ", lista[i])

In [4]:
# In this cell we are evaluating the model with the best parameters chosen in the previous cells. 
# To recap: 4 hidden dense layers with 500 units each, no dropout layers, batch size equal to 10 and learning rate equal to 0.0005.
# We will choose the final model trough a k-fold crossvalidation with K = 3. The model with the highest val_accuracy will be 
# selected as classifier.

es = EarlyStopping(monitor='val_accuracy', 
                           mode='max',
                           patience=8, 
                           restore_best_weights=True) 

def baseline_model(y,x):
    # create model
    model = Sequential([
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(y, activation="relu"),
        Dense(5, activation='softmax'),
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer=x, metrics=['accuracy'])
    return model

num_epochs = 400
kf = KFold(n_splits = 3, shuffle=True)
n = X.shape[0]

callbacks_list = [es]

count = 0
Histories = []
models = []

for train_index, val_index in kf.split(np.zeros(n),y):

    print("count: ",count)

    X_train = X[train_index,:]
    X_test = X[val_index,:]

    y_train = dummy_y[train_index,:]
    y_test  = dummy_y[val_index,:]

    opt = tf.keras.optimizers.Adam(learning_rate=0.0005)
    model = baseline_model(500,opt)

    # FIT THE MODEL
    history = model.fit(X_train,
                        y_train,
                        epochs=num_epochs,
                        callbacks=callbacks_list,
                        validation_data=(X_test,y_test),
                        batch_size=10, 
                        verbose=1)

    models.append(model)

    Histories.append(history)

    tf.keras.backend.clear_session()
    count = count + 1


    
lista = []
for ele in Histories:
    lista.append(np.max(ele.history["val_accuracy"]))
for i in np.arange(0,len(lista)):
    print(lista[i])

model_max = np.argmax(lista)
final_model = models[model_max]    
model.save('./model','.h5')

In [11]:
from numpy import loadtxt
from keras.models import load_model
 
# load model
model = load_model('model.h5')

In [12]:
# evaluation of the model
# model2.evaluate(X, dummy_y)

In [38]:
# the following cells were used to generate the plots for the presentations and the values are those saved in the previous cells

import numpy as np
import matplotlib.pyplot as plt
a1 = np.array([0.6348888874053955, 0.6449333310127259, 0.6485777735710144]) #1 hidden layer
a2 = np.array([0.6789333343505859, 0.6940444350242615, 0.6965333342552185]) #2 hidden layer
a3 = np.array([0.7027111053466797, 0.7086666703224183, 0.7111555576324463]) #3 hidden layer
a4 = np.array([0.7044444441795349, 0.7208444476127625, 0.7123555541038513]) #4 hidden layer
a5 = np.array([0.7143555521965027, 0.7064888834953308, 0.7032888889312744]) #5 hidden layer
x1 = np.array([100, 500, 1000]) #num of units
plt.plot(x1, a1, marker = 'o', label='1 hidden layer')
plt.plot(x1, a2, marker = 'o',label='2 hidden layer')
plt.plot(x1, a3, marker = 'o',label='3 hidden layer')
plt.plot(x1, a4, marker = 'o',label='4 hidden layer')
plt.plot(x1, a5, marker = 'o',label='5 hidden layer')
plt.xlabel('number of units')
plt.ylabel('val_accuracy')
plt.legend()
plt.savefig('books_read1.png')
plt.show()

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

x = [1e-06, 1e-05, 5e-05, 0.0001, 0.0005, 0.001, 0.01]
y = [0.5545777916908264, 0.6318666696548462, 0.6845333337783813, 0.7168444395065308, 0.7325333237648011, 0.7085777640342712, 0.6019999980926514]
plt.plot(x, y,  marker = 'o')
plt.title('How val_accuracy changes with learning rate')
plt.xlabel('learning rate')
plt.ylabel('val_accuracy')
plt.savefig('books_read.png')
plt.show()

In [37]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

figure(figsize=(8, 6))
#%matplotlib inline

x1 = [0, 0.25, 0.5]
x2 = [0, 0.2, 0.4, 0.6]

y1 = [0.6957777857780456, 0.6911110997200012, 0.6764444351196289] # 2 hidden layer w/ dropout before output
y2 = [0.6952444434165954, 0.6835999965667725, 0.6672000050544739] # 2 hidden layer w/ dropout in between the 2 hidden layers

y3 = [0.7208444476127625, 0.7083555698394776, 0.7010222196578979, 0.6789777755737305] # 4 hidden layer w/ dropout
y4 = [0.7143555521965027, 0.7085333347320557, 0.6967111110687256, 0.6939555525779724] # 5 hidden layers w/ dropout 



plt.plot(x1, y1,  marker = 'o', label='2 hidden layers w/ dropout before output')
plt.plot(x1, y2,  marker = 'o', label='2 hidden layers w/ dropout in between the 2 hidden layers')
plt.plot(x2, y3,  marker = 'o', label='4 hidden layers w/ dropout')
plt.plot(x2, y4,  marker = 'o', label='5 hidden layers w/ dropout')
plt.title('How val_accuracy changes with dropout')
plt.xlabel('probability')
plt.ylabel('val_accuracy')
plt.legend(loc=3)
plt.savefig('books_read.png')
plt.show()

In [34]:
import matplotlib.pyplot as plt
%matplotlib inline

x = np.arange(0, 69, 1)

y1 = Histories[1].history['val_accuracy']
y2 = Histories[1].history['accuracy']

plt.plot(x, y1,  marker = '.', label="test accuracy")
plt.plot(x, y2,  marker = '.', label="training accuracy")

plt.title('plot for the final model: training and test accuracy')
plt.xlabel('epoch_num')
plt.ylabel('accuracy')
plt.legend(loc=4)
plt.savefig('books_read.png')

plt.show()

In [35]:
import matplotlib.pyplot as plt
%matplotlib inline

x = np.arange(0, 69, 1)

y1 = Histories[1].history['loss']
y2 = Histories[1].history['val_loss']

plt.plot(x, y1,  marker = '.', label="training loss")
plt.plot(x, y2,  marker = '.', label="test loss")

plt.title('plot for the final model: training and test loss')
plt.xlabel('epoch_num')
plt.ylabel('loss')
plt.legend(loc=3)
plt.savefig('books_read2.png')

plt.show()