In [None]:
import pandas as pd
from sklearn import preprocessing
from tensorflow.keras import Sequential, Input
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
PCA_xTrain = pd.read_csv('PCA_xTrain.csv')
PCA_xTest = pd.read_csv('PCA_xTest.csv')

X = pd.read_csv('X.csv')
y = pd.read_csv('y.csv')

In [None]:
# normalize the dataset
xTrain = pd.read_csv('xTrain.csv')
xTest = pd.read_csv('xTest.csv')
yTrain = pd.read_csv('yTrain.csv')
yTest = pd.read_csv('yTest.csv')

scaler = preprocessing.MinMaxScaler()
scaler.fit(xTrain)
nor_xTrain = scaler.transform(xTrain)
nor_xTest = scaler.transform(xTest)

scaler.fit(PCA_xTrain)
nor_PCA_xTrain = scaler.transform(PCA_xTrain)
nor_PCA_xTest = scaler.transform(xTest)

In [None]:
### oversampling and undersampling
from imblearn.under_sampling import (ClusterCentroids, RandomUnderSampler,
                                     NearMiss,
                                     InstanceHardnessThreshold,
                                     CondensedNearestNeighbour,
                                     EditedNearestNeighbours,
                                     RepeatedEditedNearestNeighbours,
                                     AllKNN,
                                     NeighbourhoodCleaningRule,
                                     OneSidedSelection)
from imblearn.over_sampling import SMOTE
from sklearn import model_selection

oversample = SMOTE()
xTrain_smote, yTrain_smote = oversample.fit_resample(xTrain, yTrain)

scaler = preprocessing.MinMaxScaler()
scaler.fit(xTrain_smote)
nor_xTrain_smote = scaler.transform(xTrain_smote)
nor_xTest = scaler.transform(xTest)

In [None]:
# the undersampling result is unsatisfied
'''
undersample = RepeatedEditedNearestNeighbours()
X_renn, y_renn = undersample.fit_resample(X, y)
xTrain_renn, xTest_renn, yTrain_renn, yTest_renn = model_selection.train_test_split(X_renn, y_renn, train_size = 0.7, random_state=42)

scaler = preprocessing.MinMaxScaler()
scaler.fit(xTrain_renn)
nor_xTrain_renn = scaler.transform(xTrain_renn)
nor_xTest_renn = scaler.transform(xTest_renn)
'''

In [None]:
# hyperparameter tuning

# number of units in the hidden layers
layers = [(128,),(78,),(56,),(178,89),(98,45),(48,32),(128,56),(79,39),(128,56,28),(78,39,19)]
# dropout rate in the hidden layer
dropouts = [0.2, 0.25, 0.3, 0.35, 0.4]
# activation
activations = ['relu', 'sigmoid']
# optimizer
optimizers = ['adam', 'rmsprop']
# batch size
batches = [128, 256]

In [None]:
input_nodes = xTrain_smote.shape[1] # number of features / number of nodes in the input layer
output_nodes = 1 # number of nodes in the output layer

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

def create_model(layers, dropout, activation, optimizer):
    model = Sequential()
    model.add(Input((input_nodes,)))
    
    for i, nodes in enumerate(layers):
        model.add(Dense(nodes, kernel_initializer = 'he_uniform', activation=activation))
        model.add(Dropout(dropout))
    
    model.add(Dense(output_nodes, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

In [None]:
model = KerasClassifier(build_fn=create_model, verbose=0, epochs=100)
param_grid = dict(layers=layers, dropout=dropouts, activation=activations, optimizer=optimizers, batch_size=batches)
grid = RandomizedSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(nor_xTrain_smote, yTrain_smote)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
	print("%f (%f) with: %r" % (mean, stdev, param))