In [None]:
import os
os.environ["TF_USE_LEGACY_KERAS"] = "1"

from functools import partial

import keras_tuner
import numpy as np
import pandas as pd
from scipy import integrate
import tf_keras as keras

from models.nn import build_model
from har import get_data

In [None]:
def make_qscore(conditional_dist):
    d = conditional_dist
    def qscore(a, y):
        q = d.quantile(a).numpy().flatten()[0]
        return ((y <= q) - a) * (q - y)
    return qscore

In [None]:
test_bound = 4600
val_bound = 4600 // 10 * 8

results = []
best_hp = defaultdict(dict)

for csv_path in Path('data/TAQ').iterdir():
    company = csv_path.stem.split('_')[0]
    data = get_data(csv_path)
    x_train, y_train = data[:val_bound, 1:], data[:val_bound, 0]
    x_val, y_val = data[val_bound: test_bound, 1:], data[val_bound: test_bound, 0]
    x_test, y_test = data[test_bound:, 1:], data[test_bound:, 0]
    for dist in distributions:
        hypermodel = partial(build_model, dist_name=dist)
        tuner = keras_tuner.RandomSearch(
            hypermodel=hypermodel,
            objective=keras_tuner.Objective("val_loss", "min"),
            max_trials=10,
            overwrite=True,
            directory=f"./trials",
        )
        tuner.search(
          x_train, y_train,
          validation_data=(x_val, y_val),
          epochs=30,
          callbacks=[early_stop]
        )
        best_model = tuner.get_best_models()[0]
        l_score = best_model.evaluate(x_test, y_test)
        crps = []
        for i in range(len(x_test)):
            y_hat = best_model(x_test[i].reshape(1,3))
            crps_res = tanhsinh(
              make_qscore(y_hat), 
              np.zeros(1), 
              np.ones(1), 
              args=(y_test[i])
            )
            crps.append(crps_res.integral)

        results.append([company, dist, l_score, np.mean(crps)])
        best_hp[company][dist] = tuner.get_best_hyperparameters()[0]

In [None]:
df = pd.DataFrame(results, cols=['company', 'distribution', 'l_score', 'crps'])

In [None]:
df.to_csv('nn_results.csv')
with open('nn_model_hp.json', 'w') as jfile:
    json.dump(best_hp)