In [6]:
# add the src folder to the path
import sys
import pandas as pd
sys.path.append('../')

from src.search import grid_search_cv
from src.utils import load_cup, parse_results
from src.validation import kfold_cv

from src.network import Network
from src.activations import ReLU, Sigmoid
from src.losses import MeanSquaredError
from src.metrics import MeanEuclideanError
from src.regularizers import L1, L2
from src.callbacks import EarlyStopping

X, y, scaler = load_cup(validation=False, scale_outputs=True)
y.shape

(1194, 2, 1)

# Trying first a coarse grid search for model selected in preliminary.ipynb

In [None]:
#Coarse grained grid search 24-16
model = Network(9)
model.add_layer(24, ReLU())
model.add_layer(16, ReLU())
model.add_layer(2, Sigmoid())

gs_results = grid_search_cv(
    model, 
    x=X, 
    y=y, 
    n_folds=3,
    metric=MeanEuclideanError(), 
    loss=MeanSquaredError(), 
    eta=[1e-2, 1e-3, 1e-1],
    nesterov=[0.8, 0.7, 0.5, 0.6],
    reg_type=[L2, L1],
    reg_val=[1e-7, 1e-8, 1e-6, 1e-5],
    epochs=1000,
    scaler=scaler,
    workers=8,
    verbose=False
)

In [None]:
# save results
results_df = parse_results(gs_results)
results_df.to_csv('../result_gs/2416_coarse.csv')

In [None]:
# coarse grained 32-16
#Coarse grained grid search
model = Network(9)
model.add_layer(32, ReLU())
model.add_layer(16, ReLU())
model.add_layer(2, Sigmoid())

gs_results = grid_search_cv(
    model, 
    x=X, 
    y=y, 
    n_folds=3,
    metric=MeanEuclideanError(), 
    loss=MeanSquaredError(), 
    eta=[1e-2, 1e-3, 1e-1],
    nesterov=[0.8, 0.7, 0.5, 0.6],
    reg_type=[L2, L1],
    reg_val=[1e-7, 1e-8, 1e-6, 1e-5],
    epochs=1000,
    scaler=scaler,
    workers=8,
    verbose=False
)

In [None]:
results_df = parse_results(gs_results)
results_df.to_csv('../result_gs/3216_coarse.csv')

## Analyzing coarse results.

In [None]:
res2416 = pd.read_csv("../result_gs/2416_coarse.csv", index_col=0)
res3216 = pd.read_csv("../result_gs/3216_coarse.csv", index_col=0)

# putting architecture column to interpret results.
res3216['arch'] = ['3216' for _ in range(len(res3216))]
res2416['arch'] = ['2416' for _ in range(len(res2416))]



In [None]:
# print results for 24-16 architecture, sorted by mean euclidean error.
res2416.sort_values(by=['val_mee'], ascending=True).head(10)

In [None]:
# print results for 32-16 arch, sorted by best validation mee
res3216.sort_values(by=['val_mee'], ascending=True).head(10)

In [None]:
# merge results and express regularizer strength in log scale to better reading
import numpy as np

res = pd.concat([res2416, res3216])
res['reg_val'] = res['reg_val'].apply(lambda x: np.log10(x))

#print best 10 models overall.
res.sort_values(by=['val_mee'], ascending=True).head(10)

In [None]:
# saving to csv
res.to_csv('../result_gs/merged_coarse.csv')

## Now, try with fine grid search for both models.

In [None]:
#Fine grained grid search 24-16
model = Network(9)
model.add_layer(24, ReLU())
model.add_layer(16, ReLU())
model.add_layer(2, Sigmoid(), initializer='xavier')

gs_results = grid_search_cv(
    model, 
    x=X, 
    y=y, 
    n_folds=3,
    metric=MeanEuclideanError(), 
    loss=MeanSquaredError(), 
    eta=[0.8e-2, 1e-2, 1.2e-2, 1.4e-2],
    nesterov=[0.5, 0.52, 0.54, 0.56, 0.58, 0.6],
    reg_type=[L1,L2],
    reg_val=[1e-7, 10**(-6.8), 10**(-6.6), 10**(-6.4), 10**(-6.2), 1e-6],
    epochs=1000,
    scaler=scaler,
    workers=8,
    verbose=False
)

In [None]:
results_df = parse_results(gs_results)
results_df.to_csv('../result_gs/2416_fine.csv')

In [8]:
results_2416 = pd.read_csv('../result_gs/2416_fine.csv')
results_2416.sort_values(by='val_mee', ascending=True).head(10)

Unnamed: 0.1,Unnamed: 0,eta,nesterov,reg_type,reg_val,tr_mee,val_mee,loss,val_loss
128,128,0.01,0.58,L2,2.511886e-07,1.301139,1.498053,1.379257,1.853623
222,222,0.014,0.5,L2,1e-07,1.299053,1.503843,1.378399,1.858226
145,145,0.012,0.5,L1,1.584893e-07,1.373286,1.513375,1.570382,1.899246
107,107,0.01,0.54,L2,1e-06,1.372048,1.514176,1.539621,1.909278
25,25,0.008,0.54,L1,1.584893e-07,1.398222,1.514346,1.618376,1.92533
251,251,0.014,0.54,L2,1e-06,1.354133,1.514635,1.512146,1.88763
215,215,0.012,0.6,L2,1e-06,1.360103,1.516494,1.525632,1.918317
165,165,0.012,0.52,L2,3.981072e-07,1.348887,1.518237,1.50306,1.894714
132,132,0.01,0.6,L1,1e-07,1.393688,1.51839,1.597435,1.936572
177,177,0.012,0.54,L2,3.981072e-07,1.352925,1.518642,1.515752,1.920026


In [2]:
#Fine grained grid search 32-16
model = Network(9)
model.add_layer(32, ReLU())
model.add_layer(16, ReLU())
model.add_layer(2, Sigmoid(), initializer='xavier')

gs_results = grid_search_cv(
    model, 
    x=X, 
    y=y, 
    n_folds=3,
    metric=MeanEuclideanError(), 
    loss=MeanSquaredError(), 
    eta=[0.8e-2, 1e-2, 1.2e-2, 1.4e-2],
    nesterov=[0.6, 0.62, 0.64, 0.66, 0.68, 0.7],
    reg_type=[L1,L2],
    reg_val=[1e-8, 10**(-7.8), 10**(-7.6), 10**(-7.4), 10**(-7.2), 1e-7],
    epochs=1000,
    scaler=scaler,
    workers=6,
    verbose=False
)

Gridsearch: exploring 288 combinations.


100%|██████████| 288/288 [1:31:28<00:00, 19.06s/it]


In [3]:
results_df = parse_results(gs_results)
results_df.to_csv('../result_gs/3216_fine.csv')

In [None]:
results_3216 = pd.read_csv('../result_gs/3216_fine.csv')
results_3216.sort_values(by='val_mee', ascending=True).head(10)

In [None]:
results_df = parse_results(gs_results)
results_df.sort_values(by='val_mee', ascending=True)

### analyze fine gs results

In [None]:
fine3216 = pd.read_csv("../result_gs/3216_fine.csv", index_col=0)
fine3216.sort_values(by=['val_mee'], ascending=True).head(10)

fine3216['arch'] = ['3216' for _ in range(len(fine3216))]
fine3216['reg_val'] = fine3216['reg_val'].apply(lambda x: np.log10(x))
fine3216 = fine3216.round(4)
fine3216.sort_values(by=['val_mee'], ascending=True, inplace=True)
fine3216.head(8)

In [None]:
fine2416 = pd.read_csv("../result_gs/2416_fine.csv", index_col=0)
fine2416.sort_values(by=['val_mee'], ascending=True).head(10)

fine2416['arch'] = ['2416' for _ in range(len(fine2416))]
fine2416['reg_val'] = fine2416['reg_val'].apply(lambda x: np.log10(x))
fine2416 = fine2416.round(4)
fine2416.sort_values(by=['val_mee'], ascending=True, inplace=True)
fine2416.head(8)

In [None]:
# merge again and saver to csv
fine = pd.concat([fine2416, fine3216])
fine.to_csv('../result_gs/merged_fine.csv')

# show results (top 4)
fine.sort_values(by=['val_mee'], ascending=True).head(4)

# Final Model Selection

- 32-16 ReLU activated, Sigmoid output with Xavier initialization
- regularizer L2, with $\log_{10}(\lambda) = -7.6 $ 
- learning rate  = 0.014
- nesterov momentum $\alpha =0.6$

### Assessing corret number of tr epochs wih 10 fold CV

In [None]:
# perform 5-fold cross validation with best parameters to assess correct number of epochs
X, y, scaler = load_cup(scale_outputs=True, validation=False)

from src.validation import kfold_cv

reg = 6.31e-8
model = Network(9, L2(reg))
model.add_layer(32, ReLU(), initializer='xavier')
model.add_layer(16, ReLU(), initializer='xavier')
model.add_layer(2, Sigmoid(), initializer='xavier')

try:
    res = kfold_cv(
        model,
        X,
        y,
        k=10,
        metric=MeanEuclideanError(),
        loss=MeanSquaredError(),
        nesterov=0.6,
        epochs=1000,
        eta=0.014,
        verbose=True,
        callbacks=[EarlyStopping(50)],
        scaler=scaler
    )
except KeyboardInterrupt:
    model.bar.close()

### 170 epochs needed. Now re-train model on all dev. set

In [None]:
reg = 6.31e-8 
model = Network(9, L2(reg))
model.add_layer(32, ReLU())
model.add_layer(16, ReLU())
model.add_layer(2, Sigmoid(), initializer='xavier')

stats = model.train(
    eta=0.014, 
    nesterov=0.6, 
    train=(X, y),
    validation=(X, y), # putting tr as validation, just to print the MEE. 
    metric=MeanEuclideanError(), 
    loss=MeanSquaredError(),
    epochs=170,
)

### Finally assess performance on test set

In [None]:
from src.utils import load_cup_test
X_test, y_test = load_cup_test(scaler=scaler)

y_pred = model.multiple_outputs(X_test)

y_pred_new = scaler.inverse_transform(y_pred.reshape((y_pred.shape[0], y_pred.shape[1]))).reshape(y_pred.shape)
y_test_new = scaler.inverse_transform(y_test.reshape((y_test.shape[0], y_test.shape[1]))).reshape(y_test.shape)

metric = MeanEuclideanError()
print("MEE (real scale):", metric(y_pred_new, y_test_new))