In [None]:
import os
import numpy as np
import pandas as pd
import time
import warnings

from sklearn.neural_network import MLPClassifier
import sklearn.metrics
from sklearn.metrics import balanced_accuracy_score


### Data Preprocessing

In [None]:
%run load_and_plot_data.py

In [None]:
train_y_df, valid_y_df

In [None]:
temp = np.array(train_y_df)
count_array = temp[:,1]
count_train = np.unique(count_array, return_counts = True)

temp = np.array(valid_y_df)
count_array = temp[:,1]
count_valid = np.unique(count_array, return_counts = True)

In [None]:
count_train, count_valid

In [None]:
train_y = np.array(train_y_df['class_uid'])
train_y

In [None]:
train_x.shape, train_x[0].dtype

In [None]:
scaler = sklearn.preprocessing.MinMaxScaler()
scaler.fit(train_x)
print(np.sum(scaler.transform(train_x)))

In [None]:
normed_train_x = sklearn.preprocessing.normalize(train_x, norm = 'l1', axis = 1)
normed_train_x.shape

In [None]:
mlp = MLPClassifier(
            activation='relu',
            alpha=0.0001,
            max_iter=2000,
            solver='sgd',
            learning_rate='adaptive', 
            momentum=0.0,
            tol=1e-5,
            n_iter_no_change=50,
            )

In [None]:
mlp_hyperparameter_grid = dict(
    hidden_layer_sizes = [32, 64, 128],
    random_state = [101],
    batch_size= [5, 100, 1000],
    learning_rate_init=[0.001, 0.005, 0.0001, 0.0005], 
    )

In [None]:
valid_y = np.array(valid_y_df['class_uid'])
valid_y.shape, train_y.shape

### fit on train

In [None]:
### splitter
valid_indicators_L = np.hstack([
    -1 * np.ones(int(0.8 * train_y.size)), # -1 means never include this example in any test split
    0  * np.ones(train_y.size - int(0.8 * train_y.size)), #  0 means include in the first test split (we count starting at 0 in python)
    ])
my_splitter = sklearn.model_selection.PredefinedSplit(valid_indicators_L)

In [None]:
mlp_searcher = sklearn.model_selection.GridSearchCV(estimator = mlp,
                                                    param_grid = mlp_hyperparameter_grid,
                                                    scoring = 'balanced_accuracy',
                                                    cv = my_splitter,
                                                    return_train_score = True,
                                                    refit=False
                                                   )

In [20]:
start_time_sec = time.time()
mlp_searcher.fit(normed_train_x, train_y)
elapsed_time_sec = time.time() - start_time_sec

In [None]:
mlp_searcher_result = pd.DataFrame(mlp_searcher.cv_results_).copy()
mlp_searcher_result

In [None]:
mlp_searcher.best_params_