In [1]:
random_seed = 1
import random
random.seed(random_seed)

import numpy as np
# set the random seed to make the experiment reproducible

np.random.seed(random_seed)

import tensorflow
# set seed
tensorflow.random.set_seed(random_seed)

In [33]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import pandas as pd
import tensorflow as tf
assert tf.__version__ >= "2.0"
from tensorflow.keras import layers
from matplotlib import pyplot as plt
import seaborn as sns
from keras.utils import np_utils

# imblean provides tools for us to deal with imbalanced class sizes
from imblearn.over_sampling import SMOTE 
from imblearn.under_sampling import EditedNearestNeighbours
from imblearn.combine import SMOTEENN

# example of random oversampling to balance the class distribution
from collections import Counter
from sklearn.datasets import make_classification
from imblearn.over_sampling import RandomOverSampler

from imblearn.over_sampling import ADASYN

import sklearn
assert sklearn.__version__ >= "0.20"

# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

In [34]:
X_train_df = pd.read_excel('data/model_inputs/x_train_3perc.xlsx', index_col=0)
X_test_df = pd.read_excel('data/model_inputs/x_test_3perc.xlsx', index_col=0)
y_train_df = pd.read_excel('data/model_inputs/y_train_3perc.xlsx', index_col=0)
y_test_df = pd.read_excel('data/model_inputs/y_test_3perc.xlsx', index_col=0)

In [35]:
pd.DataFrame(y_train_df.reset_index()).groupby('decision').count()

Unnamed: 0_level_0,index
decision,Unnamed: 1_level_1
-1,147
0,1264
1,100


### Oversampling

In [36]:
def smote(X_train, y_train):
    sm = SMOTE(sampling_strategy='not majority')
    X_train_oversampled, y_train_oversampled = sm.fit_resample(X_train, y_train)
    return X_train_oversampled, y_train_oversampled

def random_oversampler(X_train, y_train):
    oversample = RandomOverSampler(sampling_strategy='not majority')
    X_over, y_over = oversample.fit_resample(X_train, y_train)
    return X_over, y_over

def adasyn(X_train, y_train):
    ada = ADASYN(sampling_strategy = 'not majority')
    X_resampled, y_resampled = ADASYN().fit_resample(X_train, y_train)
    return X_resampled, y_resampled

In [37]:
sm = smote(X_train_df, y_train_df)
sm[1].value_counts()

decision
-1          1264
 0          1264
 1          1264
dtype: int64

In [38]:
random_oversampling = random_oversampler(X_train_df, y_train_df)
random_oversampling[1].value_counts()

decision
-1          1264
 0          1264
 1          1264
dtype: int64

In [39]:
ada = adasyn(X_train_df, y_train_df)
ada[1].value_counts()

decision
 0          1264
 1          1258
-1          1235
dtype: int64

### Shuffle + Numpy Array

In [40]:
def shuffle_numpy_encode(X_train, X_test, y_train, y_test):
    # shuffle the dataset! 
    # train_data = X_train.copy(deep=True)
    # train_data['decision'] = y_train['decision']

    # train_data = train_data.sample(frac=1)

    # X_train = train_data.drop(['decision'], axis=1)
    # y_train = pd.DataFrame(train_data['decision'])

    # convert to numpy arrays
    X_train = np.array(X_train).astype(dtype='float64')
    X_test = np.array(X_test).astype(dtype='float64')

    y_train = np.array(y_train['decision']).astype(dtype='uint8')
    y_test = np.array(y_test['decision']).astype(dtype='uint8')

    # one hot encode
    # y_train = np_utils.to_categorical(y_train, num_classes=3)
    # y_test = np_utils.to_categorical(y_test, num_classes=3)

    y_train = np.array(list(map(lambda x: 2 if x==-1 else x, y_train))).astype(dtype='uint8')
    y_test = np.array(list(map(lambda x: 2 if x==-1 else x, y_test))).astype(dtype='uint8')


    return X_train, X_test, y_train, y_test

In [41]:
def plot_curve(hist):
  """Plot a curve of one or more classification metrics vs. epoch."""  
  # list_of_metrics should be one of the names shown in:
  # https://www.tensorflow.org/tutorials/structured_data/imbalanced_data#define_the_model_and_metrics  

  epochs = hist.epoch
  f, ax = plt.subplots(ncols=2, figsize=(20,8))
  ax[0].plot(epochs, hist.history['loss'], label='Training Loss')
  ax[0].plot(epochs, hist.history['val_loss'], label='Validation Loss')
  ax[0].set_xlabel('Epochs')
  ax[0].set_ylabel('Loss')
  ax[0].legend()
  ax[1].plot(epochs, hist.history['accuracy'], label='Training Accuracy')
  ax[1].plot(epochs, hist.history['val_accuracy'], label='Validation Accuracy')
  ax[1].set_xlabel('Epochs')
  ax[1].set_ylabel('Accuracy')
  ax[1].legend()

In [42]:
from tabnanny import verbose


def create_model(my_learning_rate):
  """Create and compile a deep neural net."""
  model = tf.keras.models.Sequential()
  # model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
  model.add(tf.keras.layers.Dense(input_shape=(X_train_df.shape[1],), units=300, activation='relu'))
  model.add(tf.keras.layers.Dense(units=100, activation='relu'))
  model.add(tf.keras.layers.Dense(units=3, activation='softmax'))    
  
  # # All models in this course are sequential.
  # model = tf.keras.models.Sequential()

  # # Define the first hidden layer.   
  # model.add(tf.keras.layers.Dense(input_shape=(X_train_df.shape[1],), units=32, activation='relu')) ### remove flatten code
  
  # # Define a dropout regularization layer. 
  # model.add(tf.keras.layers.Dropout(rate=0.2))

  # # Define the output layer. The units parameter is set to 10 because
  # # the model must choose among 10 possible output values (representing
  # # the digits from 0 to 9, inclusive).
  # #
  # # Don't change this layer.
  # model.add(tf.keras.layers.Dense(units=3, activation='softmax'))    ### change to 3
                           
  # # Construct the layers into a model that TensorFlow can execute.  
  # # Notice that the loss function for multi-class classification
  # # is different than the loss function for binary classification.  
  # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=my_learning_rate),
  #               loss="sparse_categorical_crossentropy", 
  #               metrics=['accuracy', tf.keras.metrics.Recall(), tf.keras.metrics.Precision()])
  
  model.compile(optimizer=tf.keras.optimizers.SGD(), loss="sparse_categorical_crossentropy", metrics=['accuracy'])

  return model    


def train_model(model, train_features, train_label, epochs,
                batch_size=None, validation_split=0.1):
  """Train the model by feeding it data."""

  history = model.fit(x=train_features, y=train_label, batch_size=batch_size,
                      epochs=epochs, shuffle=True, 
                      validation_split=validation_split, verbose=0)
  
  return history   

### Train Model

In [43]:
# The following variables are the hyperparameters.
learning_rate = 0.003
epochs = 50
batch_size = 4000
validation_split = 0.2

# Establish the model's topography.
# my_model = create_model(learning_rate)

In [44]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=(X_train_df.shape[1],)))
model.add(tf.keras.layers.Dense(units=300, activation='relu'))
model.add(tf.keras.layers.Dense(units=100, activation='relu'))
model.add(tf.keras.layers.Dense(units=3, activation='softmax'))  
model.compile(optimizer=tf.keras.optimizers.SGD(), loss="sparse_categorical_crossentropy", metrics=['accuracy'])

In [45]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 11)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 300)               3600      
_________________________________________________________________
dense_7 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_8 (Dense)              (None, 3)                 303       
Total params: 34,003
Trainable params: 34,003
Non-trainable params: 0
_________________________________________________________________


In [46]:
results = pd.DataFrame(columns=['loss', 'accuracy', 'recall', 'precision'])

SMOTE Results

In [47]:
X_train, X_test, y_train, y_test = shuffle_numpy_encode(sm[0], X_test_df, sm[1], y_test_df)

In [48]:
X_train.shape

(3792, 11)

In [49]:
history = model.fit(x=X_train, y=y_train, epochs=30, shuffle=True)

Epoch 1/30


InvalidArgumentError:  Received a label value of 255 which is outside the valid range of [0, 3).  Label values: 0 0 0 1 0 255 0 255 1 255 0 1 255 0 255 255 255 255 0 0 1 1 0 0 255 255 1 255 0 1 1 255
	 [[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits (defined at <ipython-input-49-d1003b9dece6>:1) ]] [Op:__inference_train_function_1541]

Function call stack:
train_function


In [23]:
# # Train the model on the normalized training set.
# hist = train_model(model, X_train, y_train, epochs, batch_size, validation_split)
# # Plot a graph of the metric vs. epochs.
# plot_curve(hist)
# # Evaluate on Test & Save results
# results.loc['smote'] = list(model.evaluate(X_test, y_test))

InvalidArgumentError:  Received a label value of 255 which is outside the valid range of [0, 3).  Label values: 0 0 0 1 0 0 0 0 255 255 255 0 255 255 255 1 1 1 1 0 0 0 1 0 255 1 255 255 0 0 255 255 255 0 0 0 0 255 0 255 255 255 0 0 0 255 1 255 0 255 1 1 0 0 255 0 0 0 255 1 255 0 0 0 255 0 255 0 0 0 255 1 0 255 255 0 0 0 1 1 0 255 255 0 255 255 255 1 0 0 255 0 1 255 0 0 255 0 0 255 255 0 255 0 0 0 1 1 255 255 255 0 255 0 255 0 0 255 255 1 255 0 1 0 255 255 255 255 1 1 0 1 0 0 0 0 255 255 0 255 255 255 0 255 1 255 255 255 255 0 255 0 255 0 0 255 1 0 0 255 255 255 255 255 255 255 0 1 255 0 255 1 255 255 0 1 255 255 255 255 0 0 0 0 255 0 0 0 255 1 0 0 0 255 1 0 0 255 0 255 1 1 0 255 0 0 0 1 255 255 255 1 0 255 255 255 255 255 255 255 255 255 1 255 255 255 0 0 0 0 255 0 255 0 0 0 255 255 0 0 255 0 255 0 0 255 255 0 255 255 255 0 0 0 0 255 255 255 255 0 0 0 255 255 0 1 0 0 0 1 1 0 0 255 0 0 1 255 255 0 255 255 255 255 0 255 0 255 1 0 0 255 255 255 0 1 0 0 0 255 255 1 0 255 0 0 0 0 255 255 255 0 255 255 0 255 255 255 0 1 0 255 0 0 0 255 0 0 0 0 0 255 255 1 0 1 255 255 255 0 1 255 0 255 255 0 255 0 1 1 0 0 0 0 255 1 0 0 0 0 255 0 255 255 0 0 0 0 0 1 255 255 255 255 255 255 0 1 1 255 0 0 0 0 0 0 0 0 255 255 255 255 255 0 1 0 0 0 255 1 1 255 0 0 0 1 255 0 255 0 255 0 255 0 255 1 0 255 1 255 0 255 1 255 0 255 255 0 1 1 1 255 255 0 1 255 255 1 255 255 1 0 255 1 255 255 1 255 0 1 255 0 0 255 255 0 0 255 0 255 255 255 1 255 0 255 0 0 0 1 0 255 1 0 255 255 255 0 0 255 0 0 0 0 255 255 1 255 0 255 1 255 1 255 0 255 0 0 0 255 1 255 0 0 255 0 255 255 0 255 1 0 255 255 0 1 0 1 255 1 0 0 255 255 0 255 0 0 255 1 255 255 1 0 0 0 255 0 0 255 1 0 1 0 1 1 255 0 255 255 1 0 0 255 0 0 0 255 255 0 1 0 0 1 255 0 0 0 0 255 255 1 255 255 0 0 0 255 0 255 1 255 255 0 255 255 255 255 255 0 1 0 1 255 0 255 0 0 0 0 1 0 0 255 1 255 0 0 255 255 255 255 255 255 255 1 0 255 255 255 0 1 0 0 1 255 0 0 0 0 0 255 0 0 0 255 255 255 255 255 255 0 255 1 0 255 255 255 1 0 255 0 0 0 0 0 1 0 1 255 0 255 255 255 1 255 0 1 255 255 255 255 1 255 255 255 0 0 0 255 255 255 255 255 0 0 0 0 255 255 255 1 255 1 0 255 255 255 255 0 0 0 0 1 1 0 0 255 255 0 0 0 255 255 255 255 1 255 255 255 0 0 0 0 255 255 0 255 255 255 255 1 1 1 0 255 0 255 255 255 0 1 255 255 0 255 1 255 0 255 0 255 255 0 255 255 255 255 0 1 255 255 1 1 0 255 1 255 255 0 1 255 255 255 0 255 255 1 0 255 0 0 0 255 1 255 255 1 0 1 255 1 255 255 0 255 0 0 1 255 255 255 0 0 255 255 255 255 0 0 0 255 1 255 0 0 0 0 255 1 255 0 0 255 1 1 255 1 0 255 0 1 0 255 255 1 0 0 255 1 0 255 1 255 255 0 255 255 255 1 0 1 0 0 1 1 0 0 0 255 0 1 1 255 0 255 1 255 255 255 0 0 0 1 0 0 0 0 0 0 0 0 255 1 255 0 255 255 255 1 255 0 0 255 0 0 0 0 255 255 0 255 0 1 0 255 0 1 255 255 255 255 0 0 0 255 0 255 0 255 255 255 255 255 0 0 255 1 255 255 0 1 0 1 0 255 255 1 1 1 0 0 1 255 0 1 1 0 1 255 255 1 0 1 0 0 0 255 0 0 1 1 255 0 0 1 0 0 0 1 1 0 0 255 0 0 0 1 1 0 255 255 0 0 255 0 0 255 1 255 0 255 0 1 1 1 0 255 1 255 255 0 255 255 0 255 1 0 255 255 1 0 255 0 0 0 0 255 1 0 255 255 255 0 255 255 255 255 1 255 255 0 0 255 0 0 0 0 0 255 255 255 255 0 0 255 255 255 255 255 0 255 255 1 1 255 255 0 1 255 0 0 0 255 255 255 0 255 0 0 255 0 255 0 255 0 255 0 0 255 255 0 1 255 1 255 255 255 0 0 0 255 0 0 255 255 0 255 255 255 1 255 1 255 255 255 255 1 255 255 255 255 255 0 0 0 0 0 255 0 255 1 1 1 0 0 1 255 0 255 255 255 1 0 0 0 0 255 1 0 1 255 1 0 255 0 0 0 1 255 255 1 0 255 1 255 1 1 0 255 0 1 0 255 0 255 0 255 1 255 0 255 255 1 0 255 0 0 255 0 0 255 0 1 255 0 0 0 255 0 1 0 255 255 255 1 0 1 255 1 255 255 0 255 0 255 0 0 255 1 0 255 255 0 255 255 1 0 1 0 0 255 255 255 1 255 255 0 1 0 255 255 255 255 1 1 255 0 0 0 255 255 0 1 0 255 0 255 0 0 255 0 1 1 255 0 0 255 0 0 255 255 1 1 255 0 0 0 0 0 1 0 1 255 255 255 0 255 255 0 255 0 0 0 255 0 1 1 1 0 255 1 255 0 0 0 255 255 1 1 255 255 1 0 255 255 0 0 255 255 0 255 255 255 0 0 255 255 255 255 0 0 255 255 0 255 255 0 255 0 255 0 1 0 255 255 255 0 0 255 255 0 255 1 255 0 0 0 0 255 255 0 0 0 255 0 255 255 0 255 255 0 255 1 0 0 255 255 0 255 255 0 255 255 255 255 0 255 255 255 1 0 255 1 255 0 255 255 255 1 0 1 1 0 255 255 0 255 255 255 255 255 1 0 0 255 0 0 255 255 0 255 0 255 255 255 0 1 0 255 255 255 0 1 255 0 255 255 255 0 0 1 0 0 0 0 0 255 255 255 0 0 0 255 0 0 0 0 1 0 0 255 1 0 255 255 0 1 255 255 255 255 0 255 0 0 0 255 255 0 255 255 0 255 0 255 255 255 0 0 0 0 1 0 0 255 255 255 0 0 255 255 0 0 255 255 255 255 0 0 1 0 0 1 0 0 255 0 0 0 255 0 0 0 255 0 1 0 1 0 255 0 255 0 0 0 0 0 255 255 0 255 1 0 255 0 255 0 1 255 255 1 255 255 255 1 255 255 255 0 0 255 0 0 255 1 255 255 255 255 255 255 0 255 255 0 0 1 0 255 1 255 255 0 255 0 0 0 255 0 1 1 255 255 255 255 0 255 255 0 255 0 0 1 255 1 0 0 255 255 0 1 1 255 255 255 255 1 0 255 0 255 0 255 255 255 0 0 0 0 255 1 255 255 1 0 1 0 0 0 255 0 1 0 255 255 255 255 0 255 0 0 0 1 0 255 0 255 255 0 255 255 0 255 0 1 255 255 255 1 255 0 1 255 1 0 0 1 0 0 255 255 0 255 255 1 255 1 0 1 0 0 255 0 0 255 255 0 255 255 0 255 0 255 255 255 1 255 0 255 0 1 255 255 255 0 255 255 255 255 255 0 1 255 0 255 255 255 255 255 1 1 255 1 1 255 255 0 1 255 0 1 1 0 0 255 0 1 0 0 255 0 255 0 1 1 255 0 255 1 0 1 255 255 0 0 255 0 255 1 0 0 255 0 0 0 0 1 0 0 0 0 255 0 0 0 255 0 0 1 0 0 0 0 0 0 0 255 0 1 0 1 255 0 1 0 255 0 0 0 1 1 255 0 255 0 0 255 0 0 0 255 0 1 0 0 255 0 0 1 1 255 0 0 255 1 255 0 0 0 255 0 255 255 1 255 255 1 255 255 1 255 255 255 255 0 255 0 0 255 0 1 0 0 255 255 255 255 0 1 0 0 0 1 255 0 1 1 255 255 255 255 0 255 0 255 255 0 0 255 0 255 255 0 1 0 255 0 0 255 0 255 255 255 0 255 0 255 0 0 0 0 1 0 0 255 255 0 255 1 255 0 1 1 255 0 255 0 1 255 0 255 0 1 255 255 0 0 255 255 0 255 0 255 1 0 1 255 255 0 255 0 0 255 0 1 255 1 1 0 1 0 255 255 1 255 255 1 255 0 255 255 1 0 255 0 255 0 0 0 0 1 255 255 255 0 0 255 0 255 255 0 0 255 255 0 255 0 0 255 0 255 255 0 255 0 0 1 255 0 0 255 0 1 0 0 255 1 1 255 255 1 1 0 0 0 255 255 1 0 255 1 0 1 0 1 255 0 255 0 255 255 255 255 255 0 255 0 0 255 255 255 0 255 0 255 0 0 1 0 255 1 0 1 255 0 1 0 0 1 0 255 255 0 0 0 1 255 0 0 0 1 255 255 0 255 0 0 255 255 1 1 0 255 255 255 255 0 255 255 0 1 255 255 0 1 0 0 0 0 255 0 255 1 0 255 255 0 255 255 255 0 255 0 0 255 255 255 0 0 0 0 255 255 0 0 0 0 1 255 255 255 255 0 0 1 255 0 0 0 0 1 0 255 255 0 255 0 0 1 255 255 0 255 0 255 0 255 0 255 0 0 0 1 0 1 0 255 0 255 255 255 0 0 255 255 1 255 1 255 0 255 1 0 1 255 0 1 255 0 0 1 0 255 1 0 1 0 1 1 255 0 255 255 255 0 1 0 255 0 255 255 0 255 0 1 255 0 255 0 0 0 0 255 1 255 0 1 255 0 1 0 1 255 0 0 255 255 0 255 0 255 255 1 255 1 255 255 0 0 255 255 1 0 255 255 0 255 255 0 0 255 0 255 255 255 0 255 255 0 255 255 255 255 255 255 0 255 0 255 1 255 255 0 0 255 1 255 1 0 1 0 255 0 255 0 0 255 0 0 255 255 255 255 0 255 0 1 0 255 1 255 0 0 1 0 0 1 1 255 0 255 255 0 1 255 255 0 0 0 1 0 0 255 1 255 255 255 255 0 0 0 255 255 0 0 0 0 0 0 0 0 255 0 255 0 0 255 1 255 0 255 0 0 1 0 255 1 255 0 255 1 255 1 255 1 0 255 255 1 0 0 0 255 255 0 255 1 255 1 0 0 0 255 255 0 0 0 255 255 0 1 0 1 255 0 0 255 0 0 255 0 0 1 255 0 255 255 255 1 1 255 255 255 0 1 0 0 1 0 0 1 0 1 0 0 1 0 255 255 0 0 255 1 255 255 0 0 1 255 0 255 255 0 255 0 255 255 255 0 255 0 255 0 255 255 255 255 255 0 0 0 0 1 255 255 0 0 1 255 255 255 1 255 255 255 0 255 1 0 255 0 255 1 1 0 255 255 255 255 0 0 0 0 0 0 255 0 255 0 255 255 255 0 255 255 255 255 0 255 0 1 0 0 0 255 0 255 255 1 255 0 1 0 1 0 255 0 0 0 0 1 255 255 0 0 0 0 0 255 1 0 255 0 1 255 0 1 255 0 1 255 1 0 1 255 1 0 0 1 255 255 255 0 0 255 255 0 0 0 0 0 0 1 0 1 0 255 255 1 0 1 255 255 255 1 255 0 0 1 255 0 255 0 0 255 0 0 255 0 255 0 0 255 255 0 1 255 255 255 0 255 0 1 255 0 0 1 0 0 0 0 0 0 0 255 1 255 255 255 255 1 0 255 255 255 0 0 0 0 255 0 0 255 1 0 0 0 0 255 255 0 1 255 255 255 0 255 0 255 1 0 1 255 255 1 1 0 255 255 0 1 0 0 255 0 0 0 0 255 1 255 255 0 0 1 0 255 0 1 0 255 255 0 1 0 0 255 0 255 255 0 0 255 1 0 0 0 0 255 0 255 0 255 0 0 1 1 0 255 0 0 255 255 0 0 255 255 255 0 1 0 255 255 255 0 0 255 1 0 255 1 255 1 0 0 0 255 1 1 0 0 0 0 0 1 1 255 0 255 0 1 1 1 1 0 0 255 0 0 255 255 255 0 255 1 255 1 255 0 0 255 0 255 255 255 0 1 0 0 0 255 0 255 1 0 0 255 0 255 255 0 255 0 0 0 255 0 0 0 0 255 0 1 255 255 0 255 255 0 0 0 255 255 255 255 255 0 255 1 255 1 0 255 0 1 0 255 255 255 0 0 0 255 0 255 255 0 255 255 255 0 255 255 0 255 1 0 1 255 255 255 1 255 1 1 0 1 0 0 255 0 0 1 255 255 1 255 255 255 255 255 0 0 0 0 255 255 0 255 1 255 255 0 0 255 255 1 1 255 1 0 255 1 255 0 255 1 1 0 0 0 0 255 255 255 255 0 0 1 1 255 0 0 0 255 0 0 255 1 255 255 255 255 0 255 255 1 0 255 255 1 1 0 255 255 0 0 255 1 255 0 0 1 255 0 255 0 0 1 0 0 255 255 0 255 1 255 0 1 255 255 0 255 1 0 0 1 1 1 0 0 255 1 0 0 0 0 1 255 0 0 255 255 0 0 0 0 1 255 0 1 0 0 255 0 0 0 1 1 1 0 0 0 0 255 1 255 255 0 255 255 0
	 [[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits (defined at <ipython-input-14-5a5862b6c682>:44) ]] [Op:__inference_train_function_488]

Function call stack:
train_function


In [None]:
predict_x=my_model.predict(X_test) 
predictions=np.argmax(predict_x,axis=1)

In [None]:
predict_x

In [None]:
set(predictions)

In [None]:
np.array(y_test_df['decision'].astype('string'))

In [None]:
sklearn.metrics.multilabel_confusion_matrix(
    np.array(y_test_df['decision']),
    classes_x,
    labels=[0,1,-1]
    # num_classes=3,
    # weights=None,
    # dtype=tf.dtypes.int32,
    # name=None
)

Random Oversampling Results

In [None]:
X_train, X_test, y_train, y_test = shuffle_numpy_encode(random_oversampling[0], X_test_df, random_oversampling[1], y_test_df)

In [None]:
# Train the model on the normalized training set.
hist = train_model(my_model, X_train, y_train, epochs, batch_size, validation_split)
# Plot a graph of the metric vs. epochs.
plot_curve(hist)
# Evaluate on Test & Save results
results.loc['random_oversampling'] = list(my_model.evaluate(X_test, y_test))

ADASYN

In [None]:
X_train, X_test, y_train, y_test = shuffle_numpy_encode(ada[0], X_test_df, ada[1], y_test_df)

In [None]:
# Train the model on the normalized training set.
hist = train_model(my_model, X_train, y_train, epochs, batch_size, validation_split)
# Plot a graph of the metric vs. epochs.
plot_curve(hist)
# Evaluate on Test & Save results
results.loc['adasyn'] = list(my_model.evaluate(X_test, y_test))

### Results

In [None]:
# 3 percent
results['f1'] = 2 * (results['precision'] * results['recall']) / (results['precision'] + results['recall'])
results 

In [None]:
# 5 percent
results['f1'] = 2 * (results['precision'] * results['recall']) / (results['precision'] + results['recall'])
results 