In [None]:
import pickle
import os
import pandas as pd
import seaborn as sn
import numpy as np
np.random.seed(42)
from sklearn.model_selection import train_test_split as splt
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Reshape, Conv1D, MaxPooling1D, Flatten
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [None]:
dfy = pd.read_csv("MILANO_wSCORE.csv")
dfx = pd.read_csv("PLIC-milano-processed.csv", sep=";")

In [None]:
y = dfy['SCORE'].values

dfx = dfx.replace(-1, np.nan).replace("-1", np.nan).replace(-1.0, np.nan)
dfx = dfx.loc[:, (dfx != 0).any(axis=0)]
dfx = dfx.dropna(how='any', axis=1)
dfx = dfx.select_dtypes(exclude=['object', 'datetime64'])
dfx = dfx.drop(labels = ['smoking', 'smoking recod', 'glucose', 'Unnamed: 0', 'Unnamed: 0.1'], axis=1) #
X = dfx.values

In [None]:
bina = 0
for i in range(X.shape[1]):
    if (np.amax(X[:,i]) == 1):
        bina += 1
print(bina)

In [None]:
model = Sequential()
model.add(Dense(64, activation='sigmoid', input_shape = X[1].shape))
model.add(Dropout(0.5))
model.add(Dense(32, activation='sigmoid'))
model.add(Dense(4, activation='softmax'))

In [None]:
dfx.columns

In [None]:
# 0   0.04   0.12   0.24   1
model.summary()
X = X/X.max(axis=0)
yy = np.zeros((y.shape[0], 4))
for i,yyy in enumerate(y/100):
    print(yyy)
    if(yyy < 0.01): # Low
        yy[i][0] = 1
    elif(yyy < 0.02): # Medium
        yy[i][1] = 1
    elif(yyy < 0.05): # High
        yy[i][2] = 1
    else: # Very High (aka Dead)
        yy[i][3] = 1 

In [None]:
np.sum(yy, axis=0).astype("int")/

In [None]:
print(np.amax(X))
print(np.amax(yy))
X_tr, X_ts, y_tr, y_ts = splt(X, yy, test_size=0.25, random_state=42)
es = EarlyStopping(patience = 100, restore_best_weights = True)

In [None]:
model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics = ['accuracy'])

In [None]:
history = model.fit(X_tr, y_tr, validation_data=(X_ts, y_ts), epochs = 340, callbacks = [es]) #

In [None]:
def plot_history(history, filename, filename2):
    loss_list = [s for s in history.history.keys() if 'loss' in s and 'val' not in s]
    val_loss_list = [s for s in history.history.keys() if 'loss' in s and 'val' in s]
    acc_list = [s for s in history.history.keys() if 'acc' in s and 'val' not in s]
    val_acc_list = [s for s in history.history.keys() if 'acc' in s and 'val' in s]
    
    if len(loss_list) == 0:
        print('Loss is missing in history')
        return 
    
    ## As loss always exists
    epochs = range(1,len(history.history[loss_list[0]]) + 1)
    
    ## Loss
    plt.figure(1)
    for l in loss_list:
        plt.plot(epochs, history.history[l], 'b', label='Training loss (' + str(str(format(history.history[l][-1],'.5f'))+')'))
    for l in val_loss_list:
        plt.plot(epochs, history.history[l], 'g', label='Validation loss (' + str(str(format(history.history[l][-1],'.5f'))+')'))
    
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(filename)
    
    ## Accuracy
    plt.figure(2)
    for l in acc_list:
        plt.plot(epochs, history.history[l], 'b', label='Training accuracy (' + str(format(history.history[l][-1],'.5f'))+')')
    for l in val_acc_list:    
        plt.plot(epochs, history.history[l], 'g', label='Validation accuracy (' + str(format(history.history[l][-1],'.5f'))+')')

    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig(filename2)
    plt.show()

In [None]:
plot_history(history, "FNN_LOSS.png", "FNN_ACC.png")

In [None]:
model.save("model_29_15_03.h5")

In [None]:
pickle.dump(history, open("history_29_15_03.pkl", "wb"))

In [None]:
y_ts_pred = model.predict(X_ts)
cmatrix = np.zeros((4,4))
for i,y_our in enumerate(y_ts_pred):
    cmatrix[np.argmax(y_our)][np.argmax(y_ts[i])] += 1

In [None]:
cmatrix

In [None]:
df_cm = pd.DataFrame(cmatrix, index = ["No", "Low", "Med", "High"], columns = ["No", "Low", "Med", "High"])
plt.figure(figsize = (10,7))
snheatmap = sn.heatmap(df_cm, annot=True,  fmt='g', cmap="Blues")

In [None]:
np.sum(y_ts_pred, axis=0)

In [None]:
snheatmap.get_figure().savefig("FNN_CONFUSION_MATRIX.png")