## Read the feature selected data frame via mutual information

In [1]:
import pandas as pd
import numpy as np
import time

np.random.seed(33)
df = pd.read_csv("data/featureSelectedAllDataWithY.csv")
# df = pd.read_csv("../data/Cleaned_dat_encoded.csv")
print(df.shape)
df.head()

## Split the data into training and testing data
from sklearn.model_selection import train_test_split

training_data, testing_data = train_test_split(df, test_size=0.2, random_state=25)


(534730, 21)


In [2]:
y_train = training_data['disposition']
y_test = testing_data['disposition']
X_train = training_data.drop("disposition",axis = 1)
X_test = testing_data.drop("disposition",axis = 1)

# y_train = np.array(y_train)
# y_test = np.array(y_test)
# X_train = np.array(X_train)
# X_test = np.array(X_test)
y_train = np.array(y_train)[:100]
y_test = np.array(y_test)[:10]
X_train = np.array(X_train)[:100,]
X_test = np.array(X_test)[:10,]

## Create a DNN model 

In [3]:
def create_model(optimizer='adam'):
    model = Sequential([
        Dense(30, activation='relu'),
        Dense(30, activation='relu'),
        Dense(30, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer = optimizer, loss = 'binary_crossentropy' , metrics = ['accuracy'],lr = 0.001)
    return model

## Grid Search for DNN

In [9]:
batch_size = [16, 32, 64, 128, 256]
epochs = [20, 50, 80, 110, 140]
optimizer = ['SGD', 'RMSprop', 'Adam']
param_grid = dict(batch_size=batch_size,
                      epochs=epochs,
                      optimizer=optimizer)

In [12]:
np.random.seed(52)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow_core.python.keras.layers import Dense
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
# create model
model =KerasClassifier(build_fn=create_model,verbose=0)

## add the compile method for the neural network

clf = GridSearchCV(estimator=model, param_grid=param_grid, cv=5,n_jobs=-1)

KeyboardInterrupt: 

In [63]:
best_clf = clf.fit(X_train, y_train)

In [76]:
print("Best cross validation accuracy: {:.2f}".format(best_clf.best_score_))
print("Test set score: {:.2f}".format(best_clf.score(X_test,y_test)))
print("Best parameters: {}".format(best_clf.best_params_))

Best cross validation accuracy: 0.86
Test set score: 0.90
Best parameters: {'batch_size': 32, 'epochs': 110, 'optimizer': 'RMSprop'}


Prediction of the test results

## Obtain the best hyperparameter

In [5]:
from sklearn.metrics import confusion_matrix, accuracy_score
np.random.seed(52)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow_core.python.keras.layers import Dense
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# batch_size = best_clf.best_params_['batch_size']
# epochs = best_clf.best_params_['epochs']
# optimizer = best_clf.best_params_['optimizer']

batch_size = 32
epochs = 3
optimizer = 'Adam'

clf = create_model(optimizer)
history = clf.fit(X_train, y_train,batch_size=batch_size,epochs=epochs)
clf.save("dnn.h5")
# clf.fit(X_train, y_train,batch_size=batch_size,epochs=epochs)
y_pred = clf.predict(X_test)
y_pred = (y_pred > 0.5)

mylist = []
cm = confusion_matrix(y_test, y_pred)
ac = accuracy_score(y_test, y_pred)
mylist.append(ac)
print(cm)
print(ac)

Train on 100 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
[[3 0]
 [7 0]]
0.3


## Visualize the matrix results

In [7]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, matthews_corrcoef
from sklearn.metrics import roc_auc_score, average_precision_score
from prettytable import PrettyTable
accuracy_scores = []
f1_scores = []
recall_scores = []
precision_scores = []
MCCs = []
auROCs = []
auPRCs = []

accuracy_scores.append(accuracy_score(y_true=y_test, y_pred=y_pred))
f1_scores.append(f1_score(y_true=y_test, y_pred=y_pred))
recall_scores.append(recall_score(y_true=y_test, y_pred=y_pred))
precision_scores.append(precision_score(y_true=y_test, y_pred=y_pred))
MCCs.append(matthews_corrcoef(y_true=y_test, y_pred=y_pred))
auROCs.append(roc_auc_score(y_true=y_test, y_score=clf.predict(X_test)))
auPRCs.append(average_precision_score(y_true=y_test,  y_score=clf.predict(X_test)))

table = PrettyTable()
column_names = ['Accuracy', 'auROC', 'auPRC', 'recall', 'precision', 'f1', 'MCC']
table.add_column(column_names[0], np.round(accuracy_scores, 4))
table.add_column(column_names[1], np.round(auROCs, 4))
table.add_column(column_names[2], np.round(auPRCs, 4))
table.add_column(column_names[3], np.round(recall_scores, 4))
table.add_column(column_names[4], np.round(precision_scores, 4))
table.add_column(column_names[5], np.round(f1_scores, 4))
table.add_column(column_names[6], np.round(MCCs, 4))
print(table)

+----------+--------+--------+--------+-----------+--------+-----+
| Accuracy | auROC  | auPRC  | recall | precision |   f1   | MCC |
+----------+--------+--------+--------+-----------+--------+-----+
|   0.3    | 0.1905 | 0.5333 |  1.0   |    0.3    | 0.4615 | 0.0 |
+----------+--------+--------+--------+-----------+--------+-----+


  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
