In [None]:
import os
os.environ["TF_USE_LEGACY_KERAS"] = "1"

from collections import defaultdict
from functools import partial
import json
from pathlib import Path

import keras_tuner
import tensorflow as tf
import tf_keras as keras

from density_estimation.models.pnn import build_prob_nn
from density_estimation.common import get_data


DATA_DIR = Path("data/TAQ")

In [None]:
if tf.config.list_physical_devices('GPU'):
  strategy = tf.distribute.MirroredStrategy()
elif 'TPU_WORKER_HOSTNAME' in os.environ:
  cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
      tpu='grpc://' + os.environ['TPU_WORKER_HOSTNAMES']
  )
  tf.config.experimental_connect_to_cluster(cluster_resolver)
  tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
  strategy = tf.distribute.TPUStrategy(cluster_resolver)
else:
  strategy = tf.distribute.get_strategy()

In [None]:
results = []
best_hp = defaultdict(dict)
distributions = ['normal', 't', 'laplace', 'skewnorm', 'skewt', 'jsu']
paths = list(DATA_DIR.glob('*_300_cts.csv'))
early_stop = keras.callbacks.EarlyStopping(patience=10, monitor='val_loss', restore_best_weights=True)

for csv_path in paths:
    company = csv_path.stem.split('_')[0]
    data = get_data(csv_path)

    t_bound = round(data.shape[0] * .8)
    v_bound = round(t_bound * .8)
    x_train, y_train = data[:v_bound, 1:], data[:v_bound, 0]
    x_val, y_val = data[v_bound: t_bound, 1:], data[v_bound: t_bound, 0]

    for dist in distributions:
        hypermodel = partial(build_prob_nn, dist_name=dist)
        tuner = keras_tuner.RandomSearch(
            hypermodel=hypermodel,
            objective=keras_tuner.Objective("val_loss", "min"),
            distribution_strategy=strategy,
            max_trials=30,
            overwrite=True,
            directory=f"./{company}_trials",
            project_name=f"{dist}",
            max_retries_per_trial=3,
            max_consecutive_failed_trials=8,
        )
        tuner.search(
          x_train, y_train,
          validation_data=(x_val, y_val),
          epochs=50,
          callbacks=[early_stop]
        )
        best_model = tuner.get_best_models()[0]
        best_model.save(f"./{company}_trials/{dist}/best_model.keras")
        best_hp[company][dist] = tuner.get_best_hyperparameters()[0].values


with open("nn_model_hp.json", 'w') as jfile:
    json.dump(best_hp, jfile, indent=4)