In [1]:
import pandas as pd
from sklearn import preprocessing
from tensorflow.keras import Sequential, Input
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
PCA_xTrain = pd.read_csv('PCA_xTrain.csv')
PCA_xTest = pd.read_csv('PCA_xTest.csv')

# normalize the dataset
xTrain = pd.read_csv('xTrain.csv')
xTest = pd.read_csv('xTest.csv')
yTrain = pd.read_csv('yTrain.csv')
yTest = pd.read_csv('yTest.csv')

scaler = preprocessing.MinMaxScaler()
scaler.fit(xTrain)
nor_xTrain = scaler.transform(xTrain)
nor_xTest = scaler.transform(xTest)

scaler.fit(PCA_xTrain)
nor_PCA_xTrain = scaler.transform(PCA_xTrain)
nor_PCA_xTest = scaler.transform(PCA_xTest)

In [3]:
input_nodes = xTrain.shape[1] # number of features / number of nodes in the input layer
output_nodes = 1 # number of nodes in the output layer

In [4]:
### optimal model
model = Sequential()

def create_model(layers, dropout, activation, optimizer):
    model = Sequential()
    model.add(Input((input_nodes,)))
    
    for i, nodes in enumerate(layers):
        model.add(Dense(nodes, kernel_initializer = 'he_uniform', activation=activation))
        model.add(Dropout(dropout))
    
    model.add(Dense(output_nodes, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

In [5]:
model = KerasClassifier(build_fn=create_model, layers=(48,32), dropout=0.35,
                        batch_size=128, activation='relu', optimizer='rmsprop', verbose=0, epochs=100)

In [6]:
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score,fbeta_score,roc_auc_score,roc_curve,auc
import numpy as np

model.fit(nor_xTrain, yTrain)

<tensorflow.python.keras.callbacks.History at 0x1a381ce390>

In [7]:
train_predict = model.predict(nor_xTrain)
test_predict = model.predict(nor_xTest)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [8]:
from sklearn.metrics import plot_roc_curve
#print('Classification Report:')
#print(classification_report(yTest.to_numpy(), predictions))
#print('\n')
#print('Confusion Matirx:')
#print(confusion_matrix(yTest.to_numpy(), predictions))
#print('\n')
print('training accuracy:')
print(accuracy_score(yTrain, train_predict))
print('testing accuracy:')
print(accuracy_score(yTest, test_predict))
print('training fbeta Score:')
print(fbeta_score(yTrain, train_predict, beta=0.5))
print('testing fbeta Score:')
print(fbeta_score(yTest, test_predict, beta=0.5))

training accuracy:
0.8145833333333333
testing accuracy:
0.8132086001292369
training fbeta Score:
0.8468828356882901
testing fbeta Score:
0.8461198333277039


In [9]:
train_predict_proba = model.predict_proba(nor_xTrain)[:, 1]
test_predict_proba = model.predict_proba(nor_xTest)[:, 1]

Instructions for updating:
Please use `model.predict()` instead.


In [10]:
print('training roc_auc Score:')
fpr_train, tpr_train, thresholds_train = roc_curve(yTrain, train_predict_proba)
print(auc(fpr_train, tpr_train))
print('testing roc_auc Score:')
fpr_test, tpr_test, thresholds_test = roc_curve(yTest, test_predict_proba)
print(auc(fpr_test, tpr_test))

training roc_auc Score:
0.6980665705837528
testing roc_auc Score:
0.6971613740654132


In [11]:
from sklearn.dummy import DummyClassifier
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score,fbeta_score,roc_auc_score,plot_roc_curve

standard_scaler = preprocessing.StandardScaler()
standard_scaler.fit(xTrain)
standard_xTrain = standard_scaler.transform(xTrain)
standard_xTest = standard_scaler.transform(xTest)
standard_scaler.fit(PCA_xTrain)
standard_PCA_xTrain = standard_scaler.transform(PCA_xTrain)
standard_PCA_xTest = standard_scaler.transform(PCA_xTest)

oversample = SMOTE()
xTrain_smote, yTrain_smote = oversample.fit_resample(xTrain, yTrain)

standard_scaler.fit(xTrain_smote)
standard_smote_xTrain = standard_scaler.transform(xTrain_smote)
standard_smote_xTest = standard_scaler.transform(xTest)

dummy_model = DummyClassifier(strategy='stratified', constant=1)
dummy_model.fit(standard_xTrain, yTrain)
predictions = dummy_model.predict(standard_xTest)

print("baseline accuracy (original): ", accuracy_score(yTest, predictions))
print("baseline fbeta score (original): ", fbeta_score(yTest, predictions, beta=0.5))
print("baseline roc_auc score (original): ", roc_auc_score(yTest, predictions))

dummy_model = DummyClassifier(strategy='stratified', constant=1)
dummy_model.fit(standard_PCA_xTrain, yTrain)
predictions = dummy_model.predict(standard_xTest)

print("baseline accuracy (PCA): ", accuracy_score(yTest, predictions))
print("baseline fbeta score (PCA): ", fbeta_score(yTest, predictions, beta=0.5))
print("baseline roc_auc score (PCA): ", roc_auc_score(yTest, predictions))

dummy_model = DummyClassifier(strategy='stratified', constant=1)
dummy_model.fit(standard_smote_xTrain, yTrain_smote)
predictions = dummy_model.predict(standard_smote_xTest)

print("baseline accuracy (SMOTE): ", accuracy_score(yTest, predictions))
print("baseline fbeta score (SMOTE): ", fbeta_score(yTest, predictions, beta=0.5))
print("baseline roc_auc score (SMOTE): ", roc_auc_score(yTest, predictions))

baseline accuracy (original):  0.7006256241555543
baseline fbeta score (original):  0.8149948408946284
baseline roc_auc score (original):  0.5025162526527828
baseline accuracy (PCA):  0.6963813663866534
baseline fbeta score (PCA):  0.8131630910738331
baseline roc_auc score (PCA):  0.499815533777281
baseline accuracy (SMOTE):  0.5006461845738119
baseline fbeta score (SMOTE):  0.7230737937904527
baseline roc_auc score (SMOTE):  0.5003519795850975
