In [6]:
import pandas as pd
import numpy as np
import time

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, classification_report, make_scorer, accuracy_score
from sklearn.model_selection import RandomizedSearchCV, KFold

import torch
import torchvision
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

%matplotlib inline

# Import Data:

In [7]:
# Training Set:
X_train = pd.read_csv('Data/X_train.csv')
y_train = pd.read_csv('Data/y_train.csv', header=None)

In [8]:
print("X_train Length: {:,}".format(len(X_train)))
print("y_train Length: {:,}".format(len(y_train)))

X_train Length: 464,809
y_train Length: 464,809


## Normalization:

In [9]:
# Definining Scaler:
scaler = MinMaxScaler()

# Fitting scaler on X_train:
X_train = scaler.fit_transform(X_train)

In [10]:
# Converting y_train into an array:
y_train = np.array(y_train[0])

In [11]:
# Using train-test split to split out a random, stratified 10% portion of the training data to grid search:
X_gs, X_leave, y_gs, y_leave = train_test_split(X_train, y_train, 
                                                test_size = 0.9,  # Inverse of the size of the data to search
                                                stratify = y_train)  # Keeping same proportion of target classes

- A 10% stratified subset will be used to search for hyperparameters, see the length of X_train/y_test below:

In [12]:
print("X Cross-Val Length: {:,}".format(len(X_gs)))
print("y Cross-Val Length: {:,}".format(len(y_gs)))

X Cross-Val Length: 46,480
y Cross-Val Length: 46,480


## Create Pytorch Dataloader Datasets:
- See MLP Notebook for full explanation:

In [13]:
# Creating a dataset class based on torch.utils.data.Dataset that can be loaded using train_loader
# source: https://pytorch.org/tutorials/beginner/data_loading_tutorial.html

class CreatePytorchDataset(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
    
    def __len__(self):
        return len(self.X_data)
    
    def __getitem__(self, idx):
        return self.X_data[idx], self.y_data[idx]

## Function to Calculate Accuracy:

In [14]:
def calc_acc(y_pred, y_test):
    y_pred_torch = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_values = torch.max(y_pred_torch, dim = 1)    
    correct_pred = (y_pred_values == y_test).float()
    accuracy = correct_pred.sum() / len(correct_pred)
    accuracy = accuracy * 100
    
    return accuracy

## Defining The Model:

In [15]:
class MLP(nn.Module):
    
    def __init__(self, n_features, n_classes, n_neurons, activ, drop):
        super(MLP, self).__init__()
        
        # Define the layers: 
        # Defining the linear tranformations:
        self.fc1 = nn.Linear(n_features, n_neurons)
        self.fc2 = nn.Linear(n_neurons, int(n_neurons/2))
        self.fc3 = nn.Linear(int(n_neurons/2), int(n_neurons/4))
        
        # Output layer
        self.output = nn.Linear(int(n_neurons/4), n_classes)
       
        # Batch normalisation layers 
        # (https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm1d.html?highlight=batchnorm#torch.nn.BatchNorm1d)
        self.batchnorm1 = nn.BatchNorm1d(n_neurons)
        self.batchnorm2 = nn.BatchNorm1d(int(n_neurons/2))
        self.batchnorm3 = nn.BatchNorm1d(int(n_neurons/4))
    
        # Defining dropout rate:
        self.dropout = nn.Dropout(drop) 
        
        # Defining activation function
        self.activ = activ
        
        
    # Define forward function
    def forward(self, x):
        hidden = self.fc1(x)
        hidden = self.batchnorm1(hidden)
        hidden = self.activ(hidden)
        
        hidden = self.fc2(hidden)
        hidden = self.batchnorm2(hidden)
        hidden = self.activ(hidden)  
        hidden = self.dropout(hidden)
        
        hidden = self.fc3(hidden)
        hidden = self.batchnorm3(hidden)
        hidden = self.activ(hidden)
        hidden = self.dropout(hidden)
        
        output = self.output(hidden)
        
        return output
    

## Model Training Within Cross Validation Loop:

In [16]:
def cross_val_loop(model, n_epochs, train_loader, val_loader, criterion, optimizer):
    
    # To track lowest validation loss through epochs, set initially to infinity:
    min_val_loss = np.Inf

    # Lists to hold accuracies and losses through epochs:
    train_loss_list = []
    train_acc_list = []
    val_loss_list = []
    val_acc_list = []

    for epoch in range(1, n_epochs+1):
        
        # Setting initial losses/accuracy to zero, to be updated within batch loops
        train_loss_epoch = 0
        train_acc_epoch = 0
        val_loss_epoch = 0
        val_acc_epoch = 0

        # TRAINING #
        model.train()

        for data, target in train_loader:
            # Set gradients of model parameters to zero:
            optimizer.zero_grad()

            # Making predictions based on the training data batch
            y_pred_batch = model(data)

            # Cross-Entropy Loss:
            train_batch_loss = criterion(y_pred_batch, target.flatten())

            # Classifications correct within batch, to use to calculate overall accuracy:
            train_batch_acc = calc_acc(y_pred_batch, target.flatten())

            # Calculate the sum of gradients using backward:
            train_batch_loss.backward()

            # Update parameters by performing a single optimization:
            optimizer.step()

            # Update the running loss/accuracy totals:
            train_loss_epoch += train_batch_loss.item()
            train_acc_epoch += train_batch_acc.item()

        # Disabling gradient calculation to speed up process, as backward will not be called during validation:
        with torch.no_grad():
            # VALIDATION #
            model.eval()

            for data, target in val_loader:
                # Use the model to predict target in val set batch:
                y_pred_val_batch =  model(data)

                # Calculate loss and accuracy on val set:
                val_batch_loss = criterion(y_pred_val_batch, target.flatten())
                val_batch_acc = calc_acc(y_pred_val_batch, target.flatten())

                # Update the running loss/accuracy totals:
                val_loss_epoch += train_batch_loss.item()
                val_acc_epoch += train_batch_acc.item()

        # PRINTING STATISTICS #

        # Calculate metrics across epoch:
        avg_train_loss = train_loss_epoch/ len(train_loader)
        avg_train_acc = train_acc_epoch/ len(train_loader)

        avg_val_loss = val_loss_epoch/ len(val_loader)
        avg_val_acc = val_acc_epoch/ len(val_loader)

        # Display:
        #print('Epoch {}: | Train Loss: {:.3f} | Train Acc: {:.3f} | Val Loss: {:.3f}| Val Acc:{:.3f}'.format(
            #epoch, avg_train_loss, avg_train_acc, avg_val_loss, avg_val_acc))

        # Appending lists outside epoch loop to store results:
        train_loss_list.append(avg_train_loss)
        train_acc_list.append(avg_train_acc)
        val_loss_list.append(avg_val_loss)
        val_acc_list.append(avg_val_acc)

        if avg_val_loss <= min_val_loss:
            # Alter min loss to current avg_val_loss:
            min_val_loss = avg_val_loss
        
        # EARLY STOPPING CRITERIA #
        # If validation loss has increased for 5 consequetive epochs, end loop:
        # Only run if > 10 epochs gone
        if epoch > 10:
            # Checks if the last 5 val_losses are all larger than the preceeding one:
            if ((val_loss_list[-1]>val_loss_list[-2]) and (val_loss_list[-2]>val_loss_list[-3]) and 
                (val_loss_list[-3]>val_loss_list[-4]) and (val_loss_list[-4]>val_loss_list[-5]) and 
                (val_loss_list[-5]>val_loss_list[-6])):
                break 


    
    
    return min_val_loss, epoch

# Cross Validation Loops:

In [17]:
# Setting static variables:

N_CLASSES = 7
N_INPUT_FEAT = 54
CRITERION = nn.CrossEntropyLoss()
EVAL_METRIC = make_scorer(accuracy_score)

In [191]:
# Setting parameter values to search through:
drop_out = [0, 0.1, 0.2, 0.3]            
max_neurons = [64, 128, 256, 512]           
learn_rate = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5]     
moment = [0, 1e-2, 1e-3, 0.999]           

# Creating an array to store results from the searches:
test_results = np.zeros(((len(drop_out)*len(max_neurons)*len(learn_rate)*len(moment)), 11))

max_epochs = 100
n_folds = 4
kf = KFold(n_splits = n_folds,shuffle = False)
CRITERION = nn.CrossEntropyLoss()
EVAL_METRIC = make_scorer(accuracy_score)
loop_counter = 0
print("TOTAL LOOPS = ", (len(drop_out)*len(max_neurons)*len(learn_rate)*len(moment)*n_folds))

# Looping through parameters and performing cross-validation within those loops:
for m in range(len(moment)):
    
    for l in range(len(learn_rate)):

        for n in range(len(max_neurons)):

            for i in range(len(drop_out)):

                ### ALTERING MAX NEURONS AND DROP OUT PARAMETERS ###
                mlp_model = MLP(n_features = 54, n_classes = 7, 
                                n_neurons = max_neurons[n], 
                                activ = nn.LeakyReLU(), 
                                drop = drop_out[i])

                ### ALTERING LEARNING RATE AND MOMENTUM PARAMETERS ###
                # Optimizer changed manually between Adam and SGD and cell re-run, results stored to csv below #
                optimizer = torch.optim.Adam(mlp_model.parameters(), lr = learn_rate[l], weight_decay = moment[m])

                val_losses = 0
                epochs = 0
                vll = []
                
                start = time.process_time()
                
                for train_index, test_index in kf.split(X_gs):                    

                    # Obtaining the indexes to split the data for each Cross Validation Fold:
                    X_train_CV, X_val_CV = X_gs[train_index], X_gs[test_index]
                    y_train_CV, y_val_CV = y_gs[train_index], y_gs[test_index]

                    # Converting these fold splits into tensors:
                    X_train_CV_tensor = torch.from_numpy(X_train_CV).float()
                    X_val_CV_tensor = torch.from_numpy(X_val_CV).float()
                    y_train_CV_tensor = torch.from_numpy(y_train_CV).long() - 1
                    y_val_CV_tensor = torch.from_numpy(y_val_CV).long() - 1

                    # Creating combined datasets to feed into Data Loader:
                    train_CV_dataset = CreatePytorchDataset(X_train_CV_tensor, y_train_CV_tensor)
                    val_CV_dataset = CreatePytorchDataset(X_val_CV_tensor, y_val_CV_tensor)

                    # Creating Data Loader Objects:
                    train_CV_loader = DataLoader(train_CV_dataset, batch_size=64, shuffle = True)
                    val_CV_loader = DataLoader(val_CV_dataset, batch_size=64, shuffle=True)

                    print("Starting Loop {:.0f}".format(loop_counter))
                    loop_counter += 1

                    ######## EPOCH MODEL TRAINING ########
                    min_val_loss_epochs, epochs_taken = cross_val_loop(mlp_model, max_epochs, 
                                                         train_CV_loader, val_CV_loader, CRITERION, optimizer)

                    val_losses += min_val_loss_epochs
                    epochs += epochs_taken
                    vll.append(min_val_loss_epochs)
                    
                    #print("Time to complete CV Loop: {:.1f} minutes".format((time.process_time() - start)/60))

                av_val_loss = val_losses/ n_folds
                av_epochs = epochs / n_folds
                time_taken = time.process_time() - start
                
                # Storing results in array created outside loop:
                test_results[i+(n*len(drop_out))+(l*(len(drop_out)*len(max_neurons))+ 
                                (m*(len(drop_out)*len(max_neurons)*len(learn_rate))))][0] = av_val_loss
                test_results[i+(n*len(drop_out))+(l*(len(drop_out)*len(max_neurons))+
                                (m*(len(drop_out)*len(max_neurons)*len(learn_rate))))][1] = drop_out[i]
                test_results[i+(n*len(drop_out))+(l*(len(drop_out)*len(max_neurons))+
                                (m*(len(drop_out)*len(max_neurons)*len(learn_rate))))][2] = max_neurons[n]
                test_results[i+(n*len(drop_out))+(l*(len(drop_out)*len(max_neurons))+
                                (m*(len(drop_out)*len(max_neurons)*len(learn_rate))))][3] = learn_rate[l]
                test_results[i+(n*len(drop_out))+(l*(len(drop_out)*len(max_neurons))+
                                (m*(len(drop_out)*len(max_neurons)*len(learn_rate))))][4] = moment[m]
                test_results[i+(n*len(drop_out))+(l*(len(drop_out)*len(max_neurons))+
                                (m*(len(drop_out)*len(max_neurons)*len(learn_rate))))][5] = time_taken
                test_results[i+(n*len(drop_out))+(l*(len(drop_out)*len(max_neurons))+
                                (m*(len(drop_out)*len(max_neurons)*len(learn_rate))))][6] = av_epochs
                test_results[i+(n*len(drop_out))+(l*(len(drop_out)*len(max_neurons))+
                                (m*(len(drop_out)*len(max_neurons)*len(learn_rate))))][7] = vll[0]
                test_results[i+(n*len(drop_out))+(l*(len(drop_out)*len(max_neurons))+
                                (m*(len(drop_out)*len(max_neurons)*len(learn_rate))))][8] = vll[1]
                test_results[i+(n*len(drop_out))+(l*(len(drop_out)*len(max_neurons))+
                                (m*(len(drop_out)*len(max_neurons)*len(learn_rate))))][9] = vll[2]
                test_results[i+(n*len(drop_out))+(l*(len(drop_out)*len(max_neurons))+
                                (m*(len(drop_out)*len(max_neurons)*len(learn_rate))))][10] = vll[3]
    
        
        

TOTAL LOOPS =  360
Starting Loop 0
Starting Loop 1
Starting Loop 2
Starting Loop 3
Starting Loop 4
Starting Loop 5
Starting Loop 6
Starting Loop 7
Starting Loop 8
Starting Loop 9
Starting Loop 10
Starting Loop 11
Starting Loop 12
Starting Loop 13
Starting Loop 14
Starting Loop 15
Starting Loop 16
Starting Loop 17
Starting Loop 18
Starting Loop 19
Starting Loop 20
Starting Loop 21
Starting Loop 22
Starting Loop 23
Starting Loop 24
Starting Loop 25
Starting Loop 26
Starting Loop 27
Starting Loop 28
Starting Loop 29
Starting Loop 30
Starting Loop 31
Starting Loop 32
Starting Loop 33
Starting Loop 34
Starting Loop 35
Starting Loop 36
Starting Loop 37
Starting Loop 38
Starting Loop 39
Starting Loop 40
Starting Loop 41
Starting Loop 42
Starting Loop 43
Starting Loop 44
Starting Loop 45
Starting Loop 46
Starting Loop 47
Starting Loop 48
Starting Loop 49
Starting Loop 50
Starting Loop 51
Starting Loop 52
Starting Loop 53
Starting Loop 54
Starting Loop 55
Starting Loop 56
Starting Loop 57
Start

# Saving Results to Dataframe/ csv:

In [26]:
np.set_printoptions(precision=4, suppress=True)

In [27]:
test_res_df= pd.DataFrame(test_results, columns= ['av_val_loss', 'drop_out', 'max_neurons',
                            'learn_rate', 'weight_decay', 'time_taken', 'av_epochs', 'cv_1_val_loss',
                                'cv_2_val_loss', 'cv_3_val_loss', 'cv_4_val_loss'])

In [39]:
test_res_df.sort_values(by='av_val_loss').head(10)

Unnamed: 0,av_val_loss,drop_out,max_neurons,learn_rate,momentum,time_taken,av_epochs,cv_1_val_loss,cv_2_val_loss,cv_3_val_loss,cv_4_val_loss
0,0.200918,0.1,512,0.01,0.0,3345.125,78.75,0.278339,0.164305,0.160212,0.200817
1,0.201888,0.0,256,0.001,0.0,1764.46875,72.5,0.238721,0.234384,0.173371,0.161074
2,0.206505,0.0,512,0.001,0.0,2301.171875,55.0,0.286845,0.200367,0.155842,0.182966
3,0.217063,0.0,512,0.01,0.0,2873.140625,68.75,0.245914,0.231567,0.215597,0.175172
4,0.235044,0.0,256,0.01,0.0,1963.203125,80.0,0.290983,0.276656,0.207501,0.165034
5,0.247868,0.0,128,0.01,0.0,1207.09375,76.25,0.256995,0.259954,0.293576,0.180946
6,0.258694,0.0,512,0.0001,0.0,3092.828125,73.75,0.321361,0.300024,0.208216,0.205175
7,0.260151,0.0,256,0.001,0.001,1438.0,57.5,0.364366,0.217242,0.198681,0.260313
8,0.264219,0.1,512,0.001,0.0,2575.828125,60.0,0.354458,0.235324,0.244575,0.22252
9,0.269248,0.1,256,0.001,0.0,1427.578125,57.5,0.29432,0.317084,0.186814,0.278773


In [33]:
#test_res_df.to_csv('Data/MLP_Activ_Data/MLP_ADAM_GS_RELU.csv')