In [2]:
import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE, BorderlineSMOTE
from imblearn.under_sampling import RandomUnderSampler, TomekLinks
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, classification_report, make_scorer, accuracy_score
from sklearn.model_selection import RandomizedSearchCV, KFold, train_test_split
from collections import Counter
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

import torch
import torchvision
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
import time

%matplotlib inline

# Load Data:

In [285]:
X_train = pd.read_csv('Data/X_train_mlp.csv')
y_train = pd.read_csv('Data/y_train_mlp.csv', header = None)

# Splitting Out 10% to Hyperparameter Search:

In [286]:
# Converting y_train into an array:
y_train = np.array(y_train[0])

In [287]:
# Using train-test split to split out a random, stratified 10% portion of the training data to grid search:
X_gs, X_leave, y_gs, y_leave = train_test_split(X_train, y_train, 
                                                test_size = 0.9,     # Inverse of the size of the data to search
                                                stratify = y_train,  # Keeping same proportion of target classes
                                                random_state = 42)  

In [288]:
# Converting X to np array:
X_gs = X_gs.to_numpy()

# Distribution of Forest Cover Types in Dataset:

In [289]:
y_gs

array([1, 2, 2, ..., 1, 1, 2], dtype=int64)

In [290]:
y_gs.tolist().count(1)

13558

In [291]:
unique, counts = np.unique(y_gs, return_counts=True)

print(np.asarray((unique, counts)).T)

[[    1 13558]
 [    2 18131]
 [    3  2288]
 [    4   176]
 [    5   607]
 [    6  1111]
 [    7  1313]]


# Other Functions:

In [292]:
class CreatePytorchDataset(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
    
    def __len__(self):
        return len(self.X_data)
    
    def __getitem__(self, idx):
        return self.X_data[idx], self.y_data[idx]

In [293]:
def calc_acc(y_pred, y_test):
    y_pred_torch = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_values = torch.max(y_pred_torch, dim = 1)    
    correct_pred = (y_pred_values == y_test).float()
    accuracy = correct_pred.sum() / len(correct_pred)
    accuracy = accuracy * 100
    
    return accuracy

# MLP Model:

In [294]:
class MLP(nn.Module):
    
    def __init__(self, n_features, n_classes, n_neurons, activ, drop):
        super(MLP, self).__init__()
        
        # Define the layers: 
        # Defining the linear tranformations:
        self.fc1 = nn.Linear(n_features, n_neurons)
        self.fc2 = nn.Linear(n_neurons, int(n_neurons/2))
        self.fc3 = nn.Linear(int(n_neurons/2), int(n_neurons/4))
        
        # Output layer
        self.output = nn.Linear(int(n_neurons/4), n_classes)
       
        # Batch normalisation layers 
        # (https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm1d.html?highlight=batchnorm#torch.nn.BatchNorm1d)
        self.batchnorm1 = nn.BatchNorm1d(n_neurons)
        self.batchnorm2 = nn.BatchNorm1d(int(n_neurons/2))
        self.batchnorm3 = nn.BatchNorm1d(int(n_neurons/4))
    
        # Defining dropout rate:
        self.dropout = nn.Dropout(drop) 
        
        # Defining activation function
        self.activ = activ
        
        
    # Define forward function
    def forward(self, x):
        hidden = self.fc1(x)
        hidden = self.batchnorm1(hidden)
        hidden = self.activ(hidden)
        
        hidden = self.fc2(hidden)
        hidden = self.batchnorm2(hidden)
        hidden = self.activ(hidden)  
        hidden = self.dropout(hidden)
        
        hidden = self.fc3(hidden)
        hidden = self.batchnorm3(hidden)
        hidden = self.activ(hidden)
        hidden = self.dropout(hidden)
        
        output = self.output(hidden)
        
        return output
    

# Cross Validation Loop:

In [303]:
def cross_val_loop(model, n_epochs, train_loader, val_loader, criterion, optimizer):
    
    # To track lowest validation loss through epochs, set initially to infinity:
    min_val_loss = np.Inf
    max_acc = 0
    
    # List to hold losses through epochs:
    val_loss_list = []
    
    for epoch in range(1, n_epochs+1):
        
        # Setting initial losses/accuracy to zero, to be updated within batch loops
        val_loss_epoch = 0
        val_acc_epoch = 0

        # TRAINING #
        model.train()

        for data, target in train_loader:
            # Set gradients of model parameters to zero:
            optimizer.zero_grad()

            # Making predictions based on the training data batch
            y_pred_batch = model(data)

            # Cross-Entropy Loss:
            train_batch_loss = criterion(y_pred_batch, target.flatten())

            # Classifications correct within batch, to use to calculate overall accuracy:
            train_batch_acc = calc_acc(y_pred_batch, target.flatten())

            # Calculate the sum of gradients using backward:
            train_batch_loss.backward()

            # Update parameters by performing a single optimization:
            optimizer.step()


        # Disabling gradient calculation to speed up process, as backward will not be called during validation:
        with torch.no_grad():
            # VALIDATION #
            model.eval()

            for data, target in val_loader:
                # Use the model to predict target in val set batch:
                y_pred_val_batch =  model(data)

                # Calculate loss and accuracy on val set:
                val_batch_loss = criterion(y_pred_val_batch, target.flatten())
                val_batch_acc = calc_acc(y_pred_val_batch, target.flatten())

                # Update the running loss/accuracy totals:
                val_loss_epoch += train_batch_loss.item()
                val_acc_epoch += train_batch_acc.item()

        # PRINTING STATISTICS #

        # Calculate metrics across epoch:

        avg_val_loss = val_loss_epoch/ len(val_loader)
        avg_val_acc = val_acc_epoch/ len(val_loader)

        # Appending lists outside epoch loop to store results:
        val_loss_list.append(avg_val_loss)
        
        if avg_val_loss <= min_val_loss:
            # Alter min loss to current avg_val_loss:
            min_val_loss = avg_val_loss
            
        if avg_val_acc >= max_acc:
            # Alter min loss to current avg_val_loss:
            max_acc = avg_val_acc
        
        # EARLY STOPPING CRITERIA #
        # If validation loss has increased for 5 consequetive epochs, end loop:
        # Only run if > 10 epochs gone
        if epoch > 10:
            # Checks if the last 5 val_losses are all larger than the preceeding one:
            if ((val_loss_list[-1]>val_loss_list[-2]) and (val_loss_list[-2]>val_loss_list[-3]) and 
                (val_loss_list[-3]>val_loss_list[-4]) and (val_loss_list[-4]>val_loss_list[-5]) and 
                (val_loss_list[-5]>val_loss_list[-6])):
                break 
            

    return min_val_loss, max_acc, epoch

# Parameter Search Using Over and Under Sampling Methods:

In [298]:
# Setting static variables:

N_CLASSES = 7
N_INPUT_FEAT = 54
CRITERION = nn.CrossEntropyLoss()
EVAL_METRIC = make_scorer(accuracy_score)
max_epochs = 100
n_folds = 4
kf = KFold(n_splits = n_folds,shuffle = False)

In [299]:
# Setting Model and Optimizer (parameters found previously):

mlp_model = MLP(n_features=54, n_classes=7, n_neurons=512, activ = nn.LeakyReLU(), drop = 0.1)

optimizer = torch.optim.SGD(mlp_model.parameters(), lr = 0.1, momentum = 0.9)

## Defining Sampling Strategies:

In [300]:
# Sampling strategies to loop through:
tomek = [TomekLinks(sampling_strategy=[1,2]), 'None']

# Undersampling the two majority classes - Types 1 & 2:
unders = [(0.85, 0.85),                   # 15 % undersampling of majority classes
          (0.75, 0.75),                   # 25 %
          (0.60, 0.60),                   # 40 %
          (0.40, 0.40),                   # 60 %
          (0.20, 0.20),                   # 80 %
          (0.55, 0.40)  ]                 # Imbalanced undersampling

# Oversampling the 5 minority classes - Types 3, 4, 5, 6 & 7:
overs = [(1.2, 1.2, 1.2, 1.2, 1.2),       # 20% oversampling
         (1.5, 1.5, 1.5, 1.5, 1.5),       # 50% oversampling
         (2.0, 2.0, 2.0, 2.0, 2.0),       # 100% oversampling
         (4.0, 4.0, 4.0, 4.0, 4.0),       # 300% oversampling
         (6.0, 6.0, 6.0, 6.0, 6.0),       # 500% oversampling
         (2.0, 10.0, 10.0, 5.0, 4.0),     # Imbalanced oversampling
         (0),
         (1.2, 1.2, 1.2, 1.2, 1.2),       # Borderline SMOTE
         (1.5, 1.5, 1.5, 1.5, 1.5),
         (2.0, 2.0, 2.0, 2.0, 2.0),
         (4.0, 4.0, 4.0, 4.0, 4.0),
         (6.0, 6.0, 6.0, 6.0, 6.0),
         (2.0, 10.0, 10.0, 5.0, 4.0),
         (0),  ]

In [301]:
def get_count(array, cov_type, percent):
        # Count the number of a specific cover type in the current split array to define sampling bounds:
        count = round(array.tolist().count(cov_type)*percent)
        return count

In [315]:
test_results = np.zeros(((len(tomek)*len(unders)*len(overs)), 11))

loop_counter = 0

print("TOTAL LOOPS = ", (len(tomek)*len(unders)*len(overs)*n_folds))


for t in range(len(tomek)):

    for u in range(len(unders)):

        for o in range(len(overs)):

            val_losses = 0
            accuracies = 0
            epochs = 0
            vll = []

            start = time.time()

            for train_index, test_index in kf.split(X_gs):
                
                # Setting Model and Optimizer (parameters found previously):
                mlp_model = MLP(n_features=54, n_classes=7, n_neurons=512, activ = nn.LeakyReLU(), drop = 0.1)
                optimizer = torch.optim.SGD(mlp_model.parameters(), lr = 0.1, momentum = 0.9)

                # Obtaining the indexes to split the data for each Cross Validation Fold:
                X_train_CV, X_val_CV = X_gs[train_index], X_gs[test_index]
                y_train_CV, y_val_CV = y_gs[train_index], y_gs[test_index]
                
                # Definining Scaler:
                scaler = MinMaxScaler()

                # Fitting scaler on X_train_CV:
                X_train_CV = scaler.fit_transform(X_train_CV)
                
                # The applying that scaler to X_val separately:
                X_val_CV = scaler.transform(X_val_CV)
                
                
                ###### APPLYING SAMPLING STRATEGY ######
                # Appling sampling strategies to X_train/y_train after splitting for Cross-val
                
                # Only applying Tomek Links on half the loops (t = 0 or 1):
                if t == 0:
                    resample = tomek[t]
                    X_train_CV, y_train_CV = resample.fit_resample(X_train_CV, y_train_CV)
                    
                # Undersampling applied on majority classes according to methods above:
                under_strategy = {1:get_count(y_train_CV, 1, unders[u][0]),
                                  2:get_count(y_train_CV, 2, unders[u][1])}
                
                undersample = RandomUnderSampler(sampling_strategy = under_strategy)
                X_train_CV, y_train_CV = undersample.fit_resample(X_train_CV, y_train_CV)
                
                # Oversampling applied on minority classes according to methods above:
                if o == 6:
                    oversample = SMOTE()
                elif o == 13:
                    oversample = BorderlineSMOTE()
                elif o < 6:
                    over_strategy = {3:get_count(y_train_CV, 3, overs[o][0]),
                                     4:get_count(y_train_CV, 4, overs[o][1]),
                                     5:get_count(y_train_CV, 5, overs[o][2]),
                                     6:get_count(y_train_CV, 6, overs[o][3]),
                                     7:get_count(y_train_CV, 7, overs[o][4]),
                                    }
                    oversample = SMOTE(sampling_strategy = over_strategy)
                else:
                    over_strategy = {3:get_count(y_train_CV, 3, overs[o][0]),
                                     4:get_count(y_train_CV, 4, overs[o][1]),
                                     5:get_count(y_train_CV, 5, overs[o][2]),
                                     6:get_count(y_train_CV, 6, overs[o][3]),
                                     7:get_count(y_train_CV, 7, overs[o][4]),
                                    }
                    oversample = BorderlineSMOTE(sampling_strategy = over_strategy)
                
                X_train_CV, y_train_CV = oversample.fit_resample(X_train_CV, y_train_CV)
        

                # Converting these fold splits into tensors:
                X_train_CV_tensor = torch.from_numpy(X_train_CV).float()
                X_val_CV_tensor = torch.from_numpy(X_val_CV).float()
                y_train_CV_tensor = torch.from_numpy(y_train_CV).long() - 1
                y_val_CV_tensor = torch.from_numpy(y_val_CV).long() - 1

                # Creating combined datasets to feed into Data Loader:
                train_CV_dataset = CreatePytorchDataset(X_train_CV_tensor, y_train_CV_tensor)
                val_CV_dataset = CreatePytorchDataset(X_val_CV_tensor, y_val_CV_tensor)

                # Creating Data Loader Objects:
                train_CV_loader = DataLoader(train_CV_dataset, batch_size=512, shuffle = True, drop_last=True)
                val_CV_loader = DataLoader(val_CV_dataset, batch_size=512, shuffle=True, drop_last=True)


                print("Starting Loop {:.0f}".format(loop_counter))
                loop_counter += 1

                ######## EPOCH MODEL TRAINING ########
                min_val_loss_epochs, max_acc_epochs, epochs_taken = cross_val_loop(mlp_model, max_epochs, 
                                                     train_CV_loader, val_CV_loader, CRITERION, optimizer)

                val_losses += min_val_loss_epochs
                accuracies += max_acc_epochs
                epochs += epochs_taken
                vll.append(min_val_loss_epochs)

                #print("Time to complete CV Loop: {:.1f} minutes".format((time.process_time() - start)/60))

            av_val_loss = val_losses/ n_folds
            av_accuracy = accuracies/ n_folds
            av_epochs = epochs / n_folds
            time_taken = time.time() - start

            test_results[o+(u*len(overs))+(t*(len(unders)*len(overs)))][0] = av_val_loss
            test_results[o+(u*len(overs))+(t*(len(unders)*len(overs)))][1] = av_accuracy
            test_results[o+(u*len(overs))+(t*(len(unders)*len(overs)))][2] = t
            test_results[o+(u*len(overs))+(t*(len(unders)*len(overs)))][3] = u
            test_results[o+(u*len(overs))+(t*(len(unders)*len(overs)))][4] = o
            test_results[o+(u*len(overs))+(t*(len(unders)*len(overs)))][5] = time_taken
            test_results[o+(u*len(overs))+(t*(len(unders)*len(overs)))][6] = av_epochs
            test_results[o+(u*len(overs))+(t*(len(unders)*len(overs)))][7] = vll[0]
            test_results[o+(u*len(overs))+(t*(len(unders)*len(overs)))][8] = vll[1]
            test_results[o+(u*len(overs))+(t*(len(unders)*len(overs)))][9] = vll[2]
            test_results[o+(u*len(overs))+(t*(len(unders)*len(overs)))][10] = vll[3]


TOTAL LOOPS =  672
Starting Loop 0
Starting Loop 1
Starting Loop 2
Starting Loop 3
Starting Loop 4
Starting Loop 5
Starting Loop 6
Starting Loop 7
Starting Loop 8
Starting Loop 9
Starting Loop 10
Starting Loop 11
Starting Loop 12
Starting Loop 13
Starting Loop 14
Starting Loop 15
Starting Loop 16
Starting Loop 17
Starting Loop 18
Starting Loop 19
Starting Loop 20
Starting Loop 21
Starting Loop 22
Starting Loop 23
Starting Loop 24
Starting Loop 25
Starting Loop 26
Starting Loop 27
Starting Loop 28
Starting Loop 29
Starting Loop 30
Starting Loop 31
Starting Loop 32
Starting Loop 33
Starting Loop 34
Starting Loop 35
Starting Loop 36
Starting Loop 37
Starting Loop 38
Starting Loop 39
Starting Loop 40
Starting Loop 41
Starting Loop 42
Starting Loop 43
Starting Loop 44
Starting Loop 45
Starting Loop 46
Starting Loop 47
Starting Loop 48
Starting Loop 49
Starting Loop 50
Starting Loop 51
Starting Loop 52
Starting Loop 53
Starting Loop 54
Starting Loop 55
Starting Loop 56
Starting Loop 57
Start

  n_samples_majority,


Starting Loop 72


  n_samples_majority,


Starting Loop 73


  n_samples_majority,


Starting Loop 74


  n_samples_majority,


Starting Loop 75
Starting Loop 76
Starting Loop 77
Starting Loop 78
Starting Loop 79
Starting Loop 80
Starting Loop 81
Starting Loop 82
Starting Loop 83
Starting Loop 84
Starting Loop 85
Starting Loop 86
Starting Loop 87
Starting Loop 88
Starting Loop 89
Starting Loop 90
Starting Loop 91
Starting Loop 92
Starting Loop 93
Starting Loop 94
Starting Loop 95
Starting Loop 96
Starting Loop 97
Starting Loop 98
Starting Loop 99


  n_samples_majority,


Starting Loop 100


  n_samples_majority,


Starting Loop 101


  n_samples_majority,


Starting Loop 102


  n_samples_majority,


Starting Loop 103
Starting Loop 104
Starting Loop 105
Starting Loop 106
Starting Loop 107
Starting Loop 108
Starting Loop 109
Starting Loop 110
Starting Loop 111
Starting Loop 112
Starting Loop 113
Starting Loop 114
Starting Loop 115
Starting Loop 116
Starting Loop 117
Starting Loop 118
Starting Loop 119
Starting Loop 120
Starting Loop 121
Starting Loop 122
Starting Loop 123
Starting Loop 124
Starting Loop 125
Starting Loop 126
Starting Loop 127


  n_samples_majority,


Starting Loop 128


  n_samples_majority,


Starting Loop 129


  n_samples_majority,


Starting Loop 130


  n_samples_majority,


Starting Loop 131
Starting Loop 132
Starting Loop 133
Starting Loop 134
Starting Loop 135
Starting Loop 136
Starting Loop 137
Starting Loop 138
Starting Loop 139
Starting Loop 140
Starting Loop 141
Starting Loop 142
Starting Loop 143
Starting Loop 144
Starting Loop 145
Starting Loop 146
Starting Loop 147
Starting Loop 148
Starting Loop 149
Starting Loop 150
Starting Loop 151
Starting Loop 152
Starting Loop 153
Starting Loop 154
Starting Loop 155


  n_samples_majority,


Starting Loop 156


  n_samples_majority,


Starting Loop 157


  n_samples_majority,


Starting Loop 158


  n_samples_majority,


Starting Loop 159
Starting Loop 160
Starting Loop 161
Starting Loop 162
Starting Loop 163
Starting Loop 164
Starting Loop 165
Starting Loop 166
Starting Loop 167
Starting Loop 168
Starting Loop 169
Starting Loop 170
Starting Loop 171
Starting Loop 172
Starting Loop 173
Starting Loop 174
Starting Loop 175
Starting Loop 176
Starting Loop 177
Starting Loop 178
Starting Loop 179


  n_samples_majority,


Starting Loop 180


  n_samples_majority,


Starting Loop 181


  n_samples_majority,


Starting Loop 182


  n_samples_majority,


Starting Loop 183


  n_samples_majority,
  n_samples_majority,


Starting Loop 184


  n_samples_majority,
  n_samples_majority,


Starting Loop 185


  n_samples_majority,
  n_samples_majority,


Starting Loop 186


  n_samples_majority,
  n_samples_majority,


Starting Loop 187
Starting Loop 188
Starting Loop 189
Starting Loop 190
Starting Loop 191
Starting Loop 192
Starting Loop 193
Starting Loop 194
Starting Loop 195
Starting Loop 196
Starting Loop 197
Starting Loop 198
Starting Loop 199
Starting Loop 200
Starting Loop 201
Starting Loop 202
Starting Loop 203
Starting Loop 204
Starting Loop 205
Starting Loop 206
Starting Loop 207


  n_samples_majority,


Starting Loop 208


  n_samples_majority,


Starting Loop 209


  n_samples_majority,


Starting Loop 210


  n_samples_majority,


Starting Loop 211


  n_samples_majority,
  n_samples_majority,


Starting Loop 212


  n_samples_majority,
  n_samples_majority,


Starting Loop 213


  n_samples_majority,
  n_samples_majority,


Starting Loop 214


  n_samples_majority,
  n_samples_majority,


Starting Loop 215
Starting Loop 216
Starting Loop 217
Starting Loop 218
Starting Loop 219
Starting Loop 220
Starting Loop 221
Starting Loop 222
Starting Loop 223
Starting Loop 224
Starting Loop 225
Starting Loop 226
Starting Loop 227
Starting Loop 228


  n_samples_majority,


Starting Loop 229
Starting Loop 230


  n_samples_majority,


Starting Loop 231


  n_samples_majority,


Starting Loop 232


  n_samples_majority,


Starting Loop 233


  n_samples_majority,


Starting Loop 234


  n_samples_majority,


Starting Loop 235


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 236


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 237


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 238


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 239


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 240


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 241


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 242


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 243


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 244


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 245


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 246


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 247
Starting Loop 248
Starting Loop 249
Starting Loop 250
Starting Loop 251
Starting Loop 252
Starting Loop 253
Starting Loop 254
Starting Loop 255
Starting Loop 256


  n_samples_majority,


Starting Loop 257
Starting Loop 258


  n_samples_majority,


Starting Loop 259


  n_samples_majority,


Starting Loop 260


  n_samples_majority,


Starting Loop 261


  n_samples_majority,


Starting Loop 262


  n_samples_majority,


Starting Loop 263


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 264


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 265


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 266


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 267


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 268


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 269


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 270


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 271


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 272


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 273


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 274


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 275
Starting Loop 276
Starting Loop 277
Starting Loop 278
Starting Loop 279
Starting Loop 280
Starting Loop 281
Starting Loop 282
Starting Loop 283
Starting Loop 284
Starting Loop 285
Starting Loop 286
Starting Loop 287
Starting Loop 288
Starting Loop 289
Starting Loop 290
Starting Loop 291


  n_samples_majority,


Starting Loop 292


  n_samples_majority,


Starting Loop 293


  n_samples_majority,


Starting Loop 294


  n_samples_majority,


Starting Loop 295


  n_samples_majority,
  n_samples_majority,


Starting Loop 296


  n_samples_majority,
  n_samples_majority,


Starting Loop 297


  n_samples_majority,
  n_samples_majority,


Starting Loop 298


  n_samples_majority,
  n_samples_majority,


Starting Loop 299
Starting Loop 300
Starting Loop 301
Starting Loop 302
Starting Loop 303
Starting Loop 304
Starting Loop 305
Starting Loop 306
Starting Loop 307
Starting Loop 308
Starting Loop 309
Starting Loop 310
Starting Loop 311
Starting Loop 312
Starting Loop 313
Starting Loop 314
Starting Loop 315
Starting Loop 316
Starting Loop 317
Starting Loop 318
Starting Loop 319


  n_samples_majority,


Starting Loop 320


  n_samples_majority,


Starting Loop 321


  n_samples_majority,


Starting Loop 322


  n_samples_majority,


Starting Loop 323


  n_samples_majority,
  n_samples_majority,


Starting Loop 324


  n_samples_majority,
  n_samples_majority,


Starting Loop 325


  n_samples_majority,
  n_samples_majority,


Starting Loop 326


  n_samples_majority,
  n_samples_majority,


Starting Loop 327
Starting Loop 328
Starting Loop 329
Starting Loop 330
Starting Loop 331
Starting Loop 332
Starting Loop 333
Starting Loop 334
Starting Loop 335
Starting Loop 336
Starting Loop 337
Starting Loop 338
Starting Loop 339
Starting Loop 340
Starting Loop 341
Starting Loop 342
Starting Loop 343
Starting Loop 344
Starting Loop 345
Starting Loop 346
Starting Loop 347
Starting Loop 348
Starting Loop 349
Starting Loop 350
Starting Loop 351
Starting Loop 352
Starting Loop 353
Starting Loop 354
Starting Loop 355
Starting Loop 356
Starting Loop 357
Starting Loop 358
Starting Loop 359
Starting Loop 360
Starting Loop 361
Starting Loop 362
Starting Loop 363
Starting Loop 364
Starting Loop 365
Starting Loop 366
Starting Loop 367
Starting Loop 368
Starting Loop 369
Starting Loop 370
Starting Loop 371
Starting Loop 372
Starting Loop 373
Starting Loop 374
Starting Loop 375
Starting Loop 376
Starting Loop 377
Starting Loop 378
Starting Loop 379
Starting Loop 380
Starting Loop 381
Starting L

  n_samples_majority,


Starting Loop 408


  n_samples_majority,


Starting Loop 409
Starting Loop 410


  n_samples_majority,


Starting Loop 411
Starting Loop 412
Starting Loop 413
Starting Loop 414
Starting Loop 415
Starting Loop 416
Starting Loop 417
Starting Loop 418
Starting Loop 419
Starting Loop 420
Starting Loop 421
Starting Loop 422
Starting Loop 423
Starting Loop 424
Starting Loop 425
Starting Loop 426
Starting Loop 427
Starting Loop 428
Starting Loop 429
Starting Loop 430
Starting Loop 431
Starting Loop 432
Starting Loop 433
Starting Loop 434
Starting Loop 435


  n_samples_majority,


Starting Loop 436


  n_samples_majority,


Starting Loop 437
Starting Loop 438


  n_samples_majority,


Starting Loop 439
Starting Loop 440
Starting Loop 441
Starting Loop 442
Starting Loop 443
Starting Loop 444
Starting Loop 445
Starting Loop 446
Starting Loop 447
Starting Loop 448
Starting Loop 449
Starting Loop 450
Starting Loop 451
Starting Loop 452
Starting Loop 453
Starting Loop 454
Starting Loop 455
Starting Loop 456
Starting Loop 457
Starting Loop 458
Starting Loop 459
Starting Loop 460
Starting Loop 461
Starting Loop 462
Starting Loop 463


  n_samples_majority,


Starting Loop 464


  n_samples_majority,


Starting Loop 465


  n_samples_majority,


Starting Loop 466


  n_samples_majority,


Starting Loop 467
Starting Loop 468
Starting Loop 469
Starting Loop 470
Starting Loop 471
Starting Loop 472
Starting Loop 473
Starting Loop 474
Starting Loop 475
Starting Loop 476
Starting Loop 477
Starting Loop 478
Starting Loop 479
Starting Loop 480
Starting Loop 481
Starting Loop 482
Starting Loop 483
Starting Loop 484
Starting Loop 485
Starting Loop 486
Starting Loop 487
Starting Loop 488
Starting Loop 489
Starting Loop 490
Starting Loop 491


  n_samples_majority,


Starting Loop 492


  n_samples_majority,


Starting Loop 493


  n_samples_majority,


Starting Loop 494


  n_samples_majority,


Starting Loop 495
Starting Loop 496
Starting Loop 497
Starting Loop 498
Starting Loop 499
Starting Loop 500
Starting Loop 501
Starting Loop 502
Starting Loop 503
Starting Loop 504
Starting Loop 505
Starting Loop 506
Starting Loop 507
Starting Loop 508
Starting Loop 509
Starting Loop 510
Starting Loop 511
Starting Loop 512
Starting Loop 513
Starting Loop 514
Starting Loop 515


  n_samples_majority,


Starting Loop 516


  n_samples_majority,


Starting Loop 517


  n_samples_majority,


Starting Loop 518


  n_samples_majority,


Starting Loop 519


  n_samples_majority,
  n_samples_majority,


Starting Loop 520


  n_samples_majority,
  n_samples_majority,


Starting Loop 521


  n_samples_majority,
  n_samples_majority,


Starting Loop 522


  n_samples_majority,
  n_samples_majority,


Starting Loop 523
Starting Loop 524
Starting Loop 525
Starting Loop 526
Starting Loop 527
Starting Loop 528
Starting Loop 529
Starting Loop 530
Starting Loop 531
Starting Loop 532
Starting Loop 533
Starting Loop 534
Starting Loop 535
Starting Loop 536
Starting Loop 537
Starting Loop 538
Starting Loop 539
Starting Loop 540
Starting Loop 541
Starting Loop 542
Starting Loop 543


  n_samples_majority,


Starting Loop 544


  n_samples_majority,


Starting Loop 545


  n_samples_majority,


Starting Loop 546


  n_samples_majority,


Starting Loop 547


  n_samples_majority,
  n_samples_majority,


Starting Loop 548


  n_samples_majority,
  n_samples_majority,


Starting Loop 549


  n_samples_majority,
  n_samples_majority,


Starting Loop 550


  n_samples_majority,
  n_samples_majority,


Starting Loop 551
Starting Loop 552
Starting Loop 553
Starting Loop 554
Starting Loop 555
Starting Loop 556
Starting Loop 557
Starting Loop 558
Starting Loop 559
Starting Loop 560
Starting Loop 561
Starting Loop 562
Starting Loop 563
Starting Loop 564
Starting Loop 565
Starting Loop 566
Starting Loop 567


  n_samples_majority,


Starting Loop 568


  n_samples_majority,


Starting Loop 569


  n_samples_majority,


Starting Loop 570


  n_samples_majority,


Starting Loop 571


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 572


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 573


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 574


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 575


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 576


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 577


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 578


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 579


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 580


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 581


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 582


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 583
Starting Loop 584
Starting Loop 585
Starting Loop 586
Starting Loop 587
Starting Loop 588
Starting Loop 589
Starting Loop 590
Starting Loop 591
Starting Loop 592
Starting Loop 593
Starting Loop 594
Starting Loop 595


  n_samples_majority,


Starting Loop 596


  n_samples_majority,


Starting Loop 597


  n_samples_majority,


Starting Loop 598


  n_samples_majority,


Starting Loop 599


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 600


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 601


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 602


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 603


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 604


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 605


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 606


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 607


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 608


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 609


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 610


  n_samples_majority,
  n_samples_majority,
  n_samples_majority,
  n_samples_majority,


Starting Loop 611
Starting Loop 612
Starting Loop 613
Starting Loop 614
Starting Loop 615
Starting Loop 616
Starting Loop 617
Starting Loop 618
Starting Loop 619
Starting Loop 620
Starting Loop 621
Starting Loop 622
Starting Loop 623
Starting Loop 624
Starting Loop 625
Starting Loop 626
Starting Loop 627


  n_samples_majority,


Starting Loop 628


  n_samples_majority,


Starting Loop 629


  n_samples_majority,


Starting Loop 630


  n_samples_majority,


Starting Loop 631


  n_samples_majority,
  n_samples_majority,


Starting Loop 632


  n_samples_majority,
  n_samples_majority,


Starting Loop 633


  n_samples_majority,
  n_samples_majority,


Starting Loop 634


  n_samples_majority,
  n_samples_majority,


Starting Loop 635
Starting Loop 636
Starting Loop 637
Starting Loop 638
Starting Loop 639
Starting Loop 640
Starting Loop 641
Starting Loop 642
Starting Loop 643
Starting Loop 644
Starting Loop 645
Starting Loop 646
Starting Loop 647
Starting Loop 648
Starting Loop 649
Starting Loop 650
Starting Loop 651
Starting Loop 652
Starting Loop 653
Starting Loop 654
Starting Loop 655


  n_samples_majority,


Starting Loop 656


  n_samples_majority,


Starting Loop 657


  n_samples_majority,


Starting Loop 658


  n_samples_majority,


Starting Loop 659


  n_samples_majority,
  n_samples_majority,


Starting Loop 660


  n_samples_majority,
  n_samples_majority,


Starting Loop 661


  n_samples_majority,
  n_samples_majority,


Starting Loop 662


  n_samples_majority,
  n_samples_majority,


Starting Loop 663
Starting Loop 664
Starting Loop 665
Starting Loop 666
Starting Loop 667
Starting Loop 668
Starting Loop 669
Starting Loop 670
Starting Loop 671


# Converting Results to Dataframe and Saving to Csv:

In [316]:
test_res_df_samp= pd.DataFrame(test_results, columns= ['av_val_loss', "av_accuracy", 'tomek', 'under_strat',
                            'over_strat', 'time_taken', 'av_epochs', 'cv_1_val_loss',
                                'cv_2_val_loss', 'cv_3_val_loss', 'cv_4_val_loss'])

In [317]:
tomek_map = {0: 'TL', 1: 'No TL'}
under_map = {0: '-15%', 1: '-25%', 2: '-40%', 3: '-60%', 4: '-80%', 5: 'Imbal'}
over_map = {0: '+20% S', 1: '+50% S', 2: '+100% S', 3: '+300% S', 4: '+500% S', 5: 'Imbal S', 6: 'SMOTE',
            7: '+20% BS', 8: '+50% BS', 9: '+100% BS', 10: '+300% BS', 11: '+500% BS', 12: 'Imbal BS', 13: 'BoSMOTE'}

In [318]:
test_res_df_samp['tomek'] = test_res_df_samp['tomek'].map(tomek_map)
test_res_df_samp['under_strat'] = test_res_df_samp['under_strat'].map(under_map)
test_res_df_samp['over_strat'] = test_res_df_samp['over_strat'].map(over_map)

In [4]:
test_res_df_samp.sort_values(by='av_val_loss')

Unnamed: 0,av_val_loss,tomek,under_strat,over_strat,time_taken,av_epochs,cv_1_val_loss,cv_2_val_loss,cv_3_val_loss,cv_4_val_loss
0,0.124310,TL,-15%,SMOTE,8896.437500,88.88,0.109355,0.182153,0.108922,0.096809
1,0.126080,TL,-15%,BoSMOTE,7236.890625,75.38,0.183616,0.080773,0.119248,0.120682
2,0.139486,No TL,-15%,SMOTE,7637.843750,82.13,0.162788,0.102508,0.158336,0.134314
3,0.143836,No TL,Imbal,SMOTE,4108.640625,86.63,0.140898,0.130114,0.180991,0.123340
4,0.144682,TL,-80%,+500% S,3300.812500,94.50,0.076449,0.109514,0.235394,0.157372
5,0.157577,TL,-60%,SMOTE,4012.296875,94.50,0.133853,0.204773,0.116076,0.175606
6,0.161531,TL,Imbal,SMOTE,4292.234375,100.00,0.251552,0.168233,0.150311,0.076026
7,0.165127,TL,Imbal,BoSMOTE,4008.265625,92.25,0.260163,0.188745,0.088149,0.123450
8,0.165936,TL,-80%,+300% S,2517.468750,97.88,0.237322,0.153238,0.105844,0.167340
9,0.166525,TL,-40%,BoSMOTE,4431.265625,69.75,0.198248,0.181117,0.129114,0.157621


In [321]:
test_res_df_samp.to_csv('Data/MLP_Gridsearch_Sampling_results.csv')