In [1]:
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from sklearn.metrics import (
    classification_report,
    precision_score,
    recall_score,
    accuracy_score,
    f1_score,
    r2_score,
)
from sklearn.model_selection import KFold
import pandas as pd
import numpy as np
from binning import bin

Read the train and test datasets


In [2]:
train_df = pd.read_csv("Training_set.csv")
test_df = pd.read_csv("Testing_set.csv")

Get the training features and label


In [3]:
X_train = train_df.iloc[:, :-1].to_numpy()
y_train = train_df.iloc[:, -1].to_numpy()

Get the testing features and label


In [4]:
X_test = test_df.iloc[:, :-1].to_numpy()
y_test = test_df.iloc[:, -1].to_numpy()

Initialize a 5-fold cross-validation object


In [5]:
kf = KFold(n_splits=5)

## MLP Classifier


Bin the label


In [6]:
y_train_binned = bin(y_train)
y_test_binned = bin(y_test)

Convert the label to categorical


In [7]:
y_train_binned_categorical = to_categorical(y_train_binned)

Iniatialize an MLP Classifier model


In [8]:
mlp_class = Sequential()
mlp_class.add(Dense(256, activation="relu"))
mlp_class.add(Dense(len(np.unique(y_train_binned)), activation="softmax"))
mlp_class.compile(optimizer="adam", loss="categorical_crossentropy")

Define an EarlyStopping object


In [9]:
early_stopping = EarlyStopping(patience=20)

Initialize scores lists


In [10]:
precisions = []
recalls = []
accuracies = []
f1_scores = []

Train the model using 5-fold cross-validation


In [12]:
fold = 1
for train, test in kf.split(X_train, y_train_binned):
    print(f"##### FOLD: {fold} #####")

    # Fit the model
    mlp_class.fit(
        X_train[train],
        y_train_binned_categorical[train],
        epochs=100,
        batch_size=32,
        validation_data=(X_train[test], y_train_binned_categorical[test]),
        callbacks=[early_stopping],
        verbose=0,
    )

    # Predict on the test set
    predictions = np.argmax(mlp_class.predict(X_train[test]), axis=1)

    # Evaluate the model
    precision = precision_score(
        y_true=y_train_binned[test],
        y_pred=predictions,
        zero_division=0,
        average="weighted",
    )
    recall = recall_score(
        y_true=y_train_binned[test],
        y_pred=predictions,
        zero_division=0,
        average="weighted",
    )
    accuracy = accuracy_score(y_true=y_train_binned[test], y_pred=predictions)
    f1 = f1_score(
        y_true=y_train_binned[test],
        y_pred=predictions,
        zero_division=0,
        average="weighted",
    )

    # Store the result
    precisions.append(precision)
    recalls.append(recall)
    accuracies.append(accuracy)
    f1_scores.append(f1)

    # Print the scores for each fold
    print(f"Precision = {precision}")
    print(f"Recall = {recall}")
    print(f"Accuracy = {accuracy}")
    print(f"F1 score = {f1}\n")

    fold += 1

##### FOLD: 1 #####
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 617us/step
Precision = 0.3473103519213587
Recall = 0.4439847478730027
Accuracy = 0.4439847478730027
F1 score = 0.38925906444964253

##### FOLD: 2 #####
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 620us/step
Precision = 0.34894912592698224
Recall = 0.45703798814572716
Accuracy = 0.45703798814572716
F1 score = 0.36496743948782995

##### FOLD: 3 #####
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 633us/step
Precision = 0.4119510277915492
Recall = 0.4593206490019844
Accuracy = 0.4593206490019844
F1 score = 0.3772199455589978

##### FOLD: 4 #####
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 617us/step
Precision = 0.3531514283921204
Recall = 0.46216100540834987
Accuracy = 0.46216100540834987
F1 score = 0.38357574426180013

##### FOLD: 5 #####
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 633us/step
Precision = 0.404

Print the mean scores of the folds


In [13]:
print("Mean Scores:")
print(f"Mean Precision = {np.mean(precisions)}")
print(f"Mean Recall = {np.mean(recalls)}")
print(f"Mean Accuracy = {np.mean(accuracies)}")
print(f"Mean F1 score = {np.mean(f1_scores)}")

Mean Scores:
Mean Precision = 0.3684833020259964
Mean Recall = 0.45603521773462785
Mean Accuracy = 0.45603521773462785
Mean F1 score = 0.3741413126245405


Make predictions on the test set


In [14]:
predictions = np.argmax(mlp_class.predict(X_test), axis=1)

[1m3012/3012[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 735us/step


Evaluate the model


In [15]:
print(classification_report(y_true=y_test_binned, y_pred=predictions, zero_division=0))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00     11718
           1       0.46      0.91      0.61     42216
           2       0.42      0.16      0.24     32447
           3       0.00      0.00      0.00      9998

    accuracy                           0.45     96379
   macro avg       0.22      0.27      0.21     96379
weighted avg       0.34      0.45      0.35     96379



## MLP Regressor


Iniatialize an MLP Regressor model


In [16]:
mlp_reg = Sequential()
mlp_reg.add(Dense(256, activation="relu"))
mlp_reg.add(Dense(1))
mlp_reg.compile(optimizer="adam", loss="mse")

Define an EarlyStopping object


In [17]:
early_stopping = EarlyStopping(patience=20)

Initialize scores lists


In [18]:
r2_scores = []

Train the model using 5-fold cross-validation


In [19]:
fold = 1
for train, test in kf.split(X_train, y_train_binned):
    print(f"##### FOLD: {fold} #####")

    # Fit the model
    mlp_reg.fit(
        X_train[train],
        y_train[train],
        epochs=100,
        batch_size=32,
        validation_data=(X_train[test], y_train[test]),
        callbacks=[early_stopping],
        verbose=0,
    )

    # Predict on the test set
    predictions = mlp_reg.predict(X_train[test])

    # Evaluate the model
    r2 = r2_score(y_true=y_train[test], y_pred=predictions)

    # Store the result
    r2_scores.append(r2)

    # Print the scores for each fold
    print(f"R2 score = {r2}")

    fold += 1

##### FOLD: 1 #####
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 630us/step
R2 score = 0.053430649501092
##### FOLD: 2 #####
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 628us/step
R2 score = 0.07431661763468589
##### FOLD: 3 #####
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 611us/step
R2 score = 0.08929963797698914
##### FOLD: 4 #####
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 625us/step
R2 score = 0.08792789918445143
##### FOLD: 5 #####
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 622us/step
R2 score = 0.09455471162797968


Print the mean scores of the folds


In [20]:
print(f"Mean R2 score = {np.mean(r2_scores)}")

Mean R2 score = 0.07990590318503962


Make predictions on the test set


In [21]:
predictions = mlp_reg.predict(X_test)

[1m3012/3012[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 593us/step


Evaluate the model


In [22]:
print(r2_score(y_true=y_test, y_pred=predictions))

0.05237978049936287
