# Experiment: Classification using Neural Net / MLP

In [2]:
import numpy as np
import pandas as pd
import os

from converter import * 

from sklearn import metrics
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.preprocessing import *

In [3]:
# Data preparation
data = np.load('data_processed/chunk1.npy')
data = pd.DataFrame(data).as_matrix()
labels = labels_to_numeric(data[:,60])
data[:,60] = labels

In [71]:
data

array([[-546.3631938194943, 116.98695573835903, -4.861569329175866, ...,
        0.9078297873738591, -1.749434573764696, 0],
       [-562.0175695749476, 128.08630421054093, 1.5521094186028215, ...,
        0.13435586288506007, -0.5028825165831374, 14],
       [-768.0485577412256, 90.00203429194795, 28.85907213562754, ...,
        0.2050543431615374, 0.2356009427612622, 8],
       ..., 
       [-590.1165858091811, 137.36530414141032, 27.38342805707573, ...,
        0.2277344734054592, 0.5351962088664386, 2],
       [-478.7168919525388, 75.35484879768894, 60.23120923626227, ...,
        0.26970327531627797, 0.7025104029473643, 2],
       [-502.6165625444437, 169.57863313700733, 6.561729465612, ...,
        0.7661254059566451, -0.14261218844844656, 12]], dtype=object)

In [125]:
def perform_trial(classifier, data, normalizer=None):
    x = np.array(data[:,:60], dtype=np.float64)
    y = np.array(data[:,60], dtype=np.int)
    
    train_x, test_x, train_y, test_y = train_test_split(x, y)
    
    if normalizer:
        normalizer.fit(train_x)
        train_x = normalizer.transform(train_x)
        test_x = normalizer.transform(test_x)
        
    classifier.fit(train_x, train_y)
    pred_y = classifier.predict(test_x)
    print("Generalized Accuracy: ", metrics.accuracy_score(test_y, pred_y))

## Trial 1 
- <b>Arhitecture:</b> 2 hidden layers (60 x 60)
- <b>Optimizer:</b> LBFGS
- <b>Activation function:</b> ReLU

In [91]:
net = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(60,60), random_state=1)
perform_trial(net, data)

Generalized Accuracy:  0.49424


## Trial 2 
- <b>Arhitecture:</b> 3 hidden layers (120 x 100 x 30)
- <b>Optimizer:</b> SGD
- <b>Activation function:</b> ReLU

In [92]:
net = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(120,100,30), random_state=1)
perform_trial(net, data)

Generalized Accuracy:  0.0666


## Trial 3
- <b>Arhitecture:</b> 4 hidden layers (60 x 60 x 60 x 60)
- <b>Optimizer:</b> ADAM
- <b>Activation function:</b> Logistic / Sigmoid

In [122]:
net = MLPClassifier(activation='logistic', solver='adam', alpha=1e-5, hidden_layer_sizes=(60,60,60,60), random_state=1)
perform_trial(net, data)

Generalized Accuracy:  0.72724


## Trial 4 
- <b>Arhitecture:</b> 3 hidden layers (120 x 100 x 120)
- <b>Optimizer:</b> ADAM
- <b>Activation function:</b> ReLU

In [94]:
net = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(120,100,120), random_state=1)
perform_trial(net, data)

Generalized Accuracy:  0.76152


## Trial 5
- <b>Arhitecture:</b> 3 hidden layers (180 x 120 x 60)
- <b>Optimizer:</b> ADAM
- <b>Activation function:</b> ReLU
- <b>Learning rate:</b> 0.003

In [129]:
net = MLPClassifier(activation='relu', solver='adam', learning_rate_init=0.003, alpha=1e-5, 
                    hidden_layer_sizes=(180,120,60), random_state=1)
perform_trial(net, data)

Generalized Accuracy:  0.74


## Trial 6: MinMax Normalization [-1,1]
- Same parameters as for Trial 4

In [136]:
net = MLPClassifier(activation='relu', solver='adam', learning_rate_init=0.003, alpha=1e-5, 
                    hidden_layer_sizes=(180,120,60), random_state=1)
perform_trial(net, data, normalizer=MinMaxScaler(feature_range=(-1,1)))

Generalized Accuracy:  0.77144


## Trial 7: RobustScaler Normalization [-1,1]
- Same parameters as for Trial 5

In [None]:
net = MLPClassifier(activation='relu', solver='adam', learning_rate_init=0.003, alpha=1e-5, 
                    hidden_layer_sizes=(180,120,60), random_state=1)
perform_trial(net, data, normalizer=RobustScaler())

Generalized Accuracy:  0.77164


# Training on the entire data set

For this, we need to import all the data and train the neural net based on the entire data set.

In [None]:
FOLDER = 'data_processed/'
data_files = os.listdir(FOLDER)

data_all = []
for file in data_files:
    data_chunk = np.load(FOLDER + file)
    try:
        data_all = np.concatenate([data_all, data_chunk], axis=0)
    except TypeError:
        data_all = data_chunk

In [None]:
data_all = pd.DataFrame(data_all).as_matrix()
labels_all = labels_to_numeric(data_all[:,60])
data_all[:,60] = labels_all
data_all

### Training with the best candidate - from Trial 6

In [None]:
net = MLPClassifier(activation='relu', solver='adam', learning_rate_init=0.003, alpha=1e-5, 
                    hidden_layer_sizes=(180,120,60), random_state=1)
scaler = RobustScaler()
perform_trial(net, data_all, normalizer=scaler)