# Fully-Connected Deep Neural Network for k-fold cross validation

### My updated template for a binary classification, with a confusion matrix

### Metrics given:

Accuracy, Matthews Correlation Coefficient

For each class: Recall, Precision, F1 score, Specificity, Sensitivity

In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
import keras
from keras import regularizers, losses
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from sklearn.impute import SimpleImputer as Imputer #class
from keras.utils.vis_utils import plot_model

import keras_metrics
from keras import optimizers
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
#from sklearn.preprocessing import Imputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler # https://jovianlin.io/feature-scaling/
from sklearn.model_selection import KFold

In [2]:
# Getting the data
dataset = pd.read_csv('HC_PA_data.csv')
dataset.head()

Unnamed: 0,ID,Csoport,Nem,SPI.raw_0_0,MFCC.mean_0_0_[E]_1,MFCC.std_0_0_[E]_1,MFCC.range_0_0_[E]_1,HNR.mean_5_5_[E],HNR.std_5_5_[E],HNR.range_5_5_[E],...,IMF_ENTROPY_RATIO.range_0_0_[E-e:-i-2-y],IMF_ENTROPY_RATIO.mean_0_0_[O-A:-o-u],IMF_ENTROPY_RATIO.std_0_0_[O-A:-o-u],IMF_ENTROPY_RATIO.range_0_0_[O-A:-o-u],IMF_ENTROPY_RATIO.mean_0_0_[v-z-Z],IMF_ENTROPY_RATIO.std_0_0_[v-z-Z],IMF_ENTROPY_RATIO.range_0_0_[v-z-Z],IMF_ENTROPY_RATIO.mean_0_0_[b-d-g-dz-dZ-d'],IMF_ENTROPY_RATIO.std_0_0_[b-d-g-dz-dZ-d'],IMF_ENTROPY_RATIO.range_0_0_[b-d-g-dz-dZ-d']
0,PA_002no,PA,no,0.702423,226.727241,15.565275,138.304191,7.702671,1.87398,8.800351,...,2.379776,1.229107,0.297684,1.656664,1.34209,0.59902,2.269212,2.035604,0.885508,3.693024
1,PA_003no,PA,no,0.368337,188.198555,20.79734,127.246116,-3.235416,1.84155,6.213561,...,0.950906,1.054856,0.154813,0.754372,1.022489,0.159252,0.522101,1.098897,0.111197,0.488349
2,PA_004no,PA,no,0.810142,245.252498,13.106253,94.247744,9.765168,1.55311,7.726234,...,2.337904,1.175937,0.285692,1.62628,1.212787,0.271315,0.985533,1.46722,0.332705,1.475147
3,PA_005no,PA,no,1.052086,271.537454,21.658928,129.618586,11.969402,1.212987,5.478905,...,2.311154,1.010327,0.270293,1.332628,1.857653,0.562689,2.14659,1.508461,0.403692,1.557925
4,PA_006no,PA,no,0.739211,249.913724,16.760158,117.845775,2.424641,4.498472,16.214943,...,3.031446,1.277202,0.288863,1.337109,1.138326,0.284632,1.042373,1.441762,0.32992,1.256846


In [3]:
# Shuffle
np.random.seed(42) # random seed is a number (or vector) used to initialize a pseudorandom number generator
dataset = dataset.reindex(np.random.permutation(dataset.index))
dataset.reset_index(inplace=True, drop=True)

# Selecting the training attributes(X) and the label(y)
X = dataset.iloc[:, 3:52].values # X = dataset.iloc[:, np.r_[3:52]].values  lets you choose multiple coloumbs
y = dataset.iloc[:,1].values

# encode target
encode = {"HC" : 0, "PA" : 1}
decode = { 0 : "HC", 1 : "PA"}

y = pd.DataFrame(y).replace(encode)
                     
from keras.utils import np_utils
y_categorical = np_utils.to_categorical(y)

# Replace NaN values in columns with columns mean values 
imputer = Imputer(missing_values=np.nan, strategy = 'mean') # an instance of the class with these properties
imputer = imputer.fit(X)         # we have to choose the columns with missing values
X = imputer.transform(X)           # replace the X values for the columns averages

# Feature Scaling
#scaler = StandardScaler()
scaler = MinMaxScaler() 
scaled_data = scaler.fit_transform(X)

In [4]:
# Define the network

from keras.layers import InputLayer
#print(X.shape[0])

NUM_COLS = X.shape[1]
NUM_ROWS = X.shape[0]
#input_shape=(NUM_ROWS * NUM_COLS,)

def get_sequential_dnn():
    # create model
    classifier = Sequential(name="Sequential_DNN")  # future ANN classifier, now we initialize the different hidden layers 
    # ReLu activation function for the hidden layers
    # Sigmoid for the final layer
    
    # input layer and the first hidden layer
    classifier.add(InputLayer(input_shape=(X.shape[1])))
    classifier.add(Dense(49, activation='relu', name="first_hidden_layer", input_shape=(NUM_ROWS * NUM_COLS,)))
    
    
    # tip: number of nodes in the hidden layers = average (number of nodes in the input layer and the number of nodes in the output layer) 
                                # so output_dim =(49+1)/2 = 25
        
                                # init -> initialize the weights randomly
                                # input_dim = 49
    
    # Add a dropout layer for input layer
    # More to read about dropout here: http://jmlr.org/papers/volume15/srivastava14a/srivastava14a.pdf
    
    classifier.add(Dropout(0.25))
    
    #Then we simply add the input-, hidden- and output-layers. 
    # Between them, we are using dropout to prevent overfitting. 
    # Note that you should always use a dropout rate between 20% and 50%. 
    # At every layer, we use “Dense” which means that the units are fully connected. 
    
    # second hidden layer
    #classifier.add(Dense(output_dim = 25, kernel_regularizer = regularizers.l2(0.01), activity_regularizer = regularizers.l1(0.01), init ='uniform', activation = 'relu', input_dim = 49))         
    classifier.add(Dense(25, activation = 'relu', name="second_hidden_layer"))
    classifier.add(Dropout(0.25))
    #classifier.add(Dropout(0.5))
    
    
    # third hidden layer
    classifier.add(Dense(25, activation = 'relu', name="third_hidden_layer"))
    classifier.add(Dropout(0.25))
    #classifier.add(Dropout(0.5))
    
    # 4th hidden layer
    classifier.add(Dense(25, activation = 'relu', name="forth_hidden_layer"))
    classifier.add(Dropout(0.25))
    #classifier.add(Dropout(0.5))
    
    #classifier.add(Dense(output_dim = 2, activation = 'softmax', name="output_layer"))
    classifier.add(Dense(2, activation='softmax', name="final_layer"))
    return classifier

classifier=get_sequential_dnn()
classifier.summary()
#print(len(classifier.layers))

Model: "Sequential_DNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
first_hidden_layer (Dense)   (None, 49)                2450      
_________________________________________________________________
dropout (Dropout)            (None, 49)                0         
_________________________________________________________________
second_hidden_layer (Dense)  (None, 25)                1250      
_________________________________________________________________
dropout_1 (Dropout)          (None, 25)                0         
_________________________________________________________________
third_hidden_layer (Dense)   (None, 25)                650       
_________________________________________________________________
dropout_2 (Dropout)          (None, 25)                0         
_________________________________________________________________
forth_hidden_layer (Dense)   (None, 25)             

In [5]:
BATCH_SIZE = 4 #  128
EPOCHS = 10

fold_number = 10
kf = KFold(fold_number)
fold = 0
#cm_sum = np.empty([2,2])
pred_list = []
y_list = []
notrounded_list = []
print("Number of folds: {}".format(fold_number))

#kfold = StratifiedKFold(n_splits=fold_number shuffle=True, random_state=42)
for train, test in kf.split(scaled_data):
    
    fold = fold + 1
    print("")
    print("Fold #{}".format(fold))
    print("")
    
    x_train = scaled_data[train]
    y_train = y_categorical[train]
    x_test = scaled_data[test]
    y_test = y_categorical[test]
      
    #classifier.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=[keras_metrics.precision(), keras_metrics.recall(), 'acc'])
    #classifier.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[keras_metrics.precision(), keras_metrics.recall(), 'acc'])
    classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=[keras_metrics.precision(), keras_metrics.recall(), 'acc'])
        
    # Fit the model    
    #classifier.fit(x_train, y_train, validation_data = (x_test, y_test), epochs=150, verbose=0) 

    classifier.fit(x_train, y_train,
          batch_size=BATCH_SIZE,
          epochs=EPOCHS,
          verbose=1,
          validation_data = (x_test, y_test))
    

    # Predicting test labels    
    
    pred = classifier.predict(x_test)
    notrounded = pred
    notrounded_list.extend(notrounded)
    
    
    pred = pred.round().astype(int)
    pred = pred.reshape(y_test.shape)
    
    #scores = classifier.evaluate(x_test, y_test)
    
    print('Test:', y_test)
    pred = np.argmax(pred, axis=1)
    y_test = np.argmax(y_test, axis=1)
    
    #cm = confusion_matrix(y_test, pred)
    #cm_sum += cm
    
    #print("y_test")
    #print(y_test)
    #print("pred")
    #print(pred)
    
    #print("cm:")
    #print(cm)
    #print("")
    #print("cum_sum:")
    #print(cm_sum)
    pred = pred.astype('str')
    y_test = y_test.astype('str')
    pred[pred == '0'] = decode[0]
    pred[pred == '1'] = decode[1]
    y_test[y_test == '0'] = decode[0]
    y_test[y_test == '1'] = decode[1]
    
    pred_list.extend(pred)
    y_list.extend(y_test)

    y_actu = pd.Series(y_test, name='True')
    y_pred = pd.Series(pred, name='Pred')
    df_confusion = pd.crosstab(y_pred, y_actu, colnames=['True'], rownames=['Predicted'], margins=True)
    print("")
    print("{}.fold's confusion matrix:".format(fold))
    print("")
    print(df_confusion)
    
    print("")
    print("The classifier's confusion matrix")
    print("")
    y_actu = pd.Series(y_list, name='True')
    y_pred = pd.Series(pred_list, name='Pred')
    df_finalconf = pd.crosstab(y_pred, y_actu, colnames=['True'], rownames=['Predicted'], margins=True)
    print(df_finalconf)
    
    print("")
    print("__________________________________")
    print("")

Number of folds: 10

Fold #1

Epoch 1/10
Instructions for updating:
`inputs` is now automatically inferred
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test: [[1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]]

1.fold's confusion matrix:

True       HC  PA  All
Predicted             
HC         14   4   18
PA          7  20   27
All        21  24   45

The classifier's confusion matrix

True       HC  PA  All
Predicted             
HC         14   4   18
PA          7  

Epoch 9/10
Epoch 10/10
Test: [[0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]]

3.fold's confusion matrix:

True       HC  PA  All
Predicted             
HC          9   2   11
PA          8  26   34
All        17  28   45

The classifier's confusion matrix

True       HC  PA  All
Predicted             
HC         39  16   55
PA         18  62   80
All        57  78  135

__________________________________


Fold #4

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test: [[1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.

Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test: [[0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]]

6.fold's confusion matrix:

True       HC  PA  All
Predicted             
HC         23   0   23
PA          1  21   22
All        24  21   45

The classifier's confusion matrix

True        HC   PA  All
Predicted               
HC          85   19  104
PA          31  135  166
All        116  154  270

__________________________________


Fold #7

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test: [[0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 

Epoch 8/10
Epoch 9/10
Epoch 10/10
Test: [[0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]]

8.fold's confusion matrix:

True       HC  PA  All
Predicted             
HC         16   3   19
PA          3  23   26
All        19  26   45

The classifier's confusion matrix

True        HC   PA  All
Predicted               
HC         115   28  143
PA          36  181  217
All        151  209  360

__________________________________


Fold #9

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test: [[0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1.

Test: [[1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]]

10.fold's confusion matrix:

True       HC  PA  All
Predicted             
HC         18   2   20
PA          1  24   25
All        19  26   45

The classifier's confusion matrix

True        HC   PA  All
Predicted               
HC         156   34  190
PA          37  223  260
All        193  257  450

__________________________________



In [6]:
print("")
print("__________________________________")
print("")
print("The classifier's confusion matrix")
print("")
y_actu = pd.Series(y_list, name='True')
y_pred = pd.Series(pred_list, name='Pred')
df_finalconf = pd.crosstab(y_pred, y_actu, colnames=['True'], rownames=['Predicted'], margins=True)
print(df_finalconf)


__________________________________

The classifier's confusion matrix

True        HC   PA  All
Predicted               
HC         156   34  190
PA          37  223  260
All        193  257  450


In [7]:
cm_final = df_finalconf.iloc[0:-1].values

cm_final = cm_final[:,[0,1]]
print(cm_final)

[[156  34]
 [ 37 223]]


In [8]:
#########################################################
# Statistical measures calculated from Confusion Matrix #
#########################################################
import math

# (tp + tn) / (tp + fp + tn + fn)
def get_accuracy(mx):
    [tp, fp], [fn, tn] = mx
    #print([tp, fp], [fn, tn])
    return (tp + tn) / (tp + fp + tn + fn)

# sensitivity, recall, hit rate, or true positive rate (TPR)
# tp / (tp + fn)
def get_recall(mx):
    [tp, fp], [fn, tn] = mx
    return tp / (tp + fn)

# precision or positive predictive value (PPV)
# tp / (tp + fp)
def get_precision(mx):
    [tp, fp], [fn, tn] = mx
    return tp / (tp + fp)

# harmonic mean of precision and sensitivity
# 2*((precision*recall)/(precision+recall))
def get_f1score(mx):
    return 2*((get_precision(mx)*get_recall(mx))/(get_precision(mx)+get_recall(mx)))

# specificity, selectivity or true negative rate (TNR)
# 
def get_specificity(mx):
    [tp, fp], [fn, tn] = mx
    return tp / (tp + fn)

def get_sensitivity(mx):
    [tp, fp], [fn, tn] = mx
    return tn/(tn+fp)

def get_MCC(mx):
    # Matthews Correlation Coefficient (MCC)
    [tp, fp], [fn, tn] = mx
    
    return (tp*tn-fp*fn)/math.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))

In [9]:
# Calculating statistical measures from Confusion Matrix
mx = cm_final
print("__________________________________")
print("")
print('Accuracy: %f' % get_accuracy(mx))
print('Matthews Correlation Coefficient: %f' % get_MCC(mx))
print("")

print("For class {}: ".format(df_finalconf.columns[0]) )
print("")
print('Recall: %f' % get_recall(mx))
print('Precision: %f' % get_precision(mx))
print('F1 score: %f' % get_f1score(mx))
print('Specificity: %f' % get_specificity(mx))
print('Sensitivity: %f' % get_sensitivity(mx))

[tn, fp], [fn, tp] = mx
mx_ = [tp, fn],[fp, tn]
print("")
print("For class {}: ".format(df_finalconf.columns[1]) )
print("")
print('Recall: %f' % get_recall(mx_))
print('Precision: %f' % get_precision(mx_))
print('F1 score: %f' % get_f1score(mx_))
print('Specificity: %f' % get_specificity(mx_))
print('Sensitivity: %f' % get_sensitivity(mx_))

__________________________________

Accuracy: 0.842222
Matthews Correlation Coefficient: 0.677368

For class HC: 

Recall: 0.808290
Precision: 0.821053
F1 score: 0.814621
Specificity: 0.808290
Sensitivity: 0.867704

For class PA: 

Recall: 0.867704
Precision: 0.857692
F1 score: 0.862669
Specificity: 0.867704
Sensitivity: 0.808290
