In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import torch
from utils.visualization import visualize_rel_error_report
from utils.data import (
    DATA_SOURCE,
    PROC_SOURCE,
    PRED_SINK,
    MODEL_SINK,
    EXP_CSV_COLS,
    EXP_CSV_PATH,
    TRIALS_CSV_COLS,
    TRIALS_CSV_PATH,
)

VAL_SOURCE = DATA_SOURCE.parent / "validation"
pd.options.display.max_rows = 1000
pd.options.display.max_columns = 100

In [None]:
hpo_exp_ids = []

In [None]:
# load up meta info
exp_tab = pd.read_csv(EXP_CSV_PATH, dtype=EXP_CSV_COLS)
trials_tab = pd.read_csv(
    TRIALS_CSV_PATH,
    dtype=TRIALS_CSV_COLS,
    parse_dates=["start_date", "end_date"],
)
# only one row should be returned
meta_info_df = trials_tab.merge(exp_tab, on="experiment_uid")
exp_tab

| Description | # Parameters |
| ----------   | --------- |
| no expensive layer | 1135 |
| 8 neurons in expensive layer | 2231 |
| standard topo (24 in expensive) | 4711 |
| 32 in expensive | 5951|
| 32 in expensive and kernel size 17 (instead of 9) | 10943|

In [None]:
meta_info_df.query(f"debug == False").head()

In [None]:
fig, axes = plt.subplots(1, 5, figsize=(15, 10), sharey=True)
err_cols = [c for c in meta_info_df if c.endswith("rel_err") and not c.startswith("max")]
expid2modelsize = {'75010': 1135, '14abf': 4711,
                   'ea89c': 2231, '60642': 5951, '0a2ef': 10943}

for ax, (mat_lbl, mat_df) in zip(axes.flatten(), meta_info_df.query(f"debug == False").groupby("material")):
    #errs = mat_df.loc[:, err_cols].to_numpy().ravel()
    for c in err_cols:
        sns.stripplot(
            mat_df.assign(model_size= lambda df: df.experiment_uid.map(expid2modelsize)), x="model_size", y=c, hue="experiment_uid",
            ax=ax, legend=False)
    ax.set_ylabel(mat_lbl)
fig.tight_layout()
    