In [39]:
# %matplotlib inline

from __future__ import annotations
from cycler import cycler
import json
import matplotlib.pyplot as plt
from matplotlib.legend_handler import HandlerTuple
import numpy as np
import pandas as pd
from pathlib import Path
import seaborn as sns
import shutil
from sklearn.model_selection import ParameterGrid
import tensorflow as tf
import tensorflow.keras as keras
from typing import Callable, Literal

sns.set() # Use seaborn themes.

In [40]:
# Here define some root paths for saving content.
roots = dict(
    dataset = Path('./dataset/'),
    images = Path('./images/'),
    tuning = Path('./tuning/'),
    tables = Path('./tables/'),
)
for k,r in roots.items(): # Create directories if they do not already exist.
    if not r.exists():
        r.mkdir(exist_ok=True)

In [79]:
def load_metrics(metrics_path: str):
    """Load model metrics from file."""
    with open(metrics_path, 'r') as f:
        return json.load(f)

def load_history(history_path: str):
    """Load model history from file."""
    return pd.read_csv(history_path)

def hypertune_load_parametergrid(tuning_root: Path) -> list[dict]:
    parameter_grid_path = tuning_root/'parameter_grid.json'
    with open(parameter_grid_path, 'r') as f:
        grid = json.load(f)
    return grid

def hypertune_load_history(tuning_root: Path) -> list[pd.DataFrame]:
    # Load parameter grid so that we know how many models there are.
    grid = hypertune_load_parametergrid(tuning_root)

    # Iterate over parameter grid to load history for each model in the correct order.
    histories: list[list[pd.DataFrame]] = [] # History lists for each model.
    for i, p in enumerate(grid):
        cur_model_name = f"model_{i}"
        history_path = tuning_root/f"{cur_model_name}_history.csv"
        hist = load_history(history_path)
        histories.append(hist)
    return histories

def hypertune_load_metrics(tuning_root: Path) -> list[dict]:
    # Load parameter grid so that we know how many models there are.
    grid = hypertune_load_parametergrid(tuning_root)

    # Iterate over parameter grid to load history for each model in the correct order.
    metrics: list[list[pd.DataFrame]] = [] # History lists for each model.
    for i, p in enumerate(grid):
        cur_model_name = f"model_{i}"
        path = tuning_root/f"{cur_model_name}_metrics.json"
        met = load_metrics(path)
        metrics.append(met)
    return metrics

In [42]:
tuning_root = roots['tuning']/"gan-hypertune"
grid = hypertune_load_parametergrid(tuning_root)
histories = hypertune_load_history(tuning_root)

In [43]:
histories[0].head()

Unnamed: 0,epoch,d_accuracy,d_loss,g_accuracy,g_loss,g_mse,loss,val_d_accuracy,val_d_loss,val_g_accuracy,val_g_loss,val_g_mse,val_loss
0,0,0.750086,20.212833,0.162676,2.787466,0.163027,11.50015,0.0,3.84821,0.048839,2.732713,0.236527,3.290461
1,1,0.782929,7.554433,0.126665,2.155943,0.274054,4.855188,0.287285,1.314022,0.104591,2.13145,0.180038,1.722736
2,2,0.87935,9.079504,0.048779,3.450586,0.27794,6.265045,0.23023,2.654163,0.123409,3.63499,0.137055,3.144577
3,3,0.93587,5.313097,0.031573,3.272716,0.27799,4.292906,0.241706,2.702956,0.008946,3.40155,0.219423,3.052253
4,4,0.924646,4.393957,0.03746,3.655999,0.292703,4.024978,0.234116,3.820737,0.10476,3.88164,0.193627,3.851189


In [152]:
def make_latex(
    df: pd.DataFrame,
    label: str = None,
    latex_config: dict = {},
    replace_underscore: bool = True,
    subset_bold_min: list = None,
    subset_bold_max: list = None,
    ):
    latex_df = df.copy()
    styler = latex_df.style
    styler = styler.hide(axis=0) # Hide the index.
    styler = styler.format(str, escape='latex') # Default is to convert all cells to their string representation.

    styler = styler.format(
        formatter='{:.4f}',
        subset=[key for key in latex_df.columns if 'Model' not in key],
    )

    if replace_underscore:
        latex_df.columns = latex_df.columns.map(lambda x: x.replace('_', '\_')) # Escape the header names too.

    if subset_bold_min:
        styler = styler.highlight_min(
            # subset=[key for key in latex_df.columns if 'accuracy' in key], 
            subset=subset_bold_min, 
            axis=0,
            props='textbf:--rwrap;',
        )

    if subset_bold_max:
        styler = styler.highlight_max(
            subset=subset_bold_max, 
            axis=0,
            props='textbf:--rwrap;',
        )

    latex_string = styler.to_latex(
        # buf=latex_path,
        hrules=True,
        label=label,
        **latex_config,
    )

    return latex_string

In [75]:
### Unique hyperparameter selections.

df = pd.DataFrame(grid)
df.index.rename('Model', inplace=True)
df.reset_index(inplace=True)
df.drop(['D_layer_type', 'G_layer_type', 'D_model_type', 'G_model_type', 'latent_dim', 'optim', 'feat_len', 'k', 'beta_1', 'beta_2', 'window_len'], axis=1, inplace=True)

latex_path = roots['tables']/'gan_hypertune_params_unique.tex'
latex_string = make_latex(df)
with open(latex_path, 'w') as f:
    f.write(latex_string)

df.head()

Unnamed: 0,Model,D_hidden_units,D_n_layers,G_hidden_units,G_n_layers,lr
0,0,24,3,24,3,0.001
1,1,24,3,24,3,0.0001
2,2,24,3,24,6,0.001
3,3,24,3,24,6,0.0001
4,4,24,3,72,3,0.001


In [76]:
### Common hyperparameter selection for all models.

df = pd.DataFrame(grid)
df = df[['D_model_type', 'D_layer_type', 'feat_len', 'latent_dim', 'window_len', 'k', 'beta_1', 'beta_2', 'optim']]
df.rename(columns={'D_layer_type': 'layer_type', 'D_model_type': 'model_type'}, inplace=True)
df = df.iloc[[0]]
df = df.T
df.reset_index(inplace=True)
df.columns = ['Parameter', 'Value']

latex_path = roots['tables']/'gan_hypertune_params_common.tex'
latex_string = make_latex(df)
with open(latex_path, 'w') as f:
    f.write(latex_string)

df.head()

Unnamed: 0,Parameter,Value
0,model_type,rnn
1,layer_type,gru
2,feat_len,2
3,latent_dim,128
4,window_len,24


In [153]:
met = hypertune_load_metrics(tuning_root)
df = pd.DataFrame(met)

# Isolate only the keys we want.
base_keys = ['loss', 'd_accuracy', 'g_mse']
base_key_names = ['Loss', 'Discriminator Accuracy', 'Generator MSE']
keys = []
tups = []
for i, k in enumerate(base_keys):
    for stub in ['', 'val_']:
        keys.append(f"{stub}{k}")
    for stub in ['train', 'val']:
        tups.append((base_key_names[i], f"{stub}"))
df = df[keys]

# Create a new multi-column index so that the base key spans train/val/test child columns.
cols = pd.MultiIndex.from_tuples(tups)
df.columns = cols


# Replace underscores everywhere.
tups = []
for tup in df.columns:
    ntup = []
    for c in tup:
        ntup.append(c.replace('_', '\_'))
    tups.append(ntup)
cols = pd.MultiIndex.from_tuples(tups)
df.columns = cols

df.index.rename('Model', inplace=True)
df.reset_index(inplace=True)


subset_bold_min = [
    ('Loss', 'train'),
    ('Loss', 'val'),
    ('Generator MSE', 'train'),
    ('Generator MSE', 'val'),
]

subset_bold_max = [
    ('Discriminator Accuracy', 'train'),
    ('Discriminator Accuracy', 'val'),
]

# Safe rename columns.
latex_path = roots['tables']/'gan_hypertune_results.tex'
latex_string = make_latex(df, 
    replace_underscore=False,
    subset_bold_min=subset_bold_min,
    subset_bold_max=subset_bold_max,
    latex_config=dict(multicol_align='c', multirow_align='c'),
)
with open(latex_path, 'w') as f:
    f.write(latex_string)

## PAST THIS IN MANUALY
##
"""
\toprule
\multirow[c]{2}{*}{Model} & \multicolumn{2}{c}{Loss} & \multicolumn{2}{c}{Discriminator Accuracy} & \multicolumn{2}{c}{Generator MSE} \\
\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7}
 & \emph{train} & \emph{val} & \emph{train} & \emph{val} & \emph{train} & \emph{val} \\
\midrule
"""

df.head()

Unnamed: 0_level_0,Model,Loss,Loss,Discriminator Accuracy,Discriminator Accuracy,Generator MSE,Generator MSE
Unnamed: 0_level_1,Unnamed: 1_level_1,train,val,train,val,train,val
0,0,2.963685,4.188546,0.955192,0.267932,0.226913,0.239819
1,1,3.114832,2.986632,0.890876,0.271779,0.18922,0.058162
2,2,3.67178,3.365423,0.967473,0.989137,0.292926,0.290281
3,3,4.596316,1.002229,0.83441,0.868766,0.20817,0.123155
4,4,3.372417,5.626608,0.944963,0.271557,0.249717,0.313532
