# Evaluation Notebook

In [85]:
import os
import pickle
import numpy as np
import pandas as pd

import utils

In [None]:
def concatenate_results(results_dir: str,
                        results: list,
                        get_skill: bool = False,
                        pers: pd.DataFrame = pd.DataFrame(),
                        reset_index: bool = True,
                        sort_skill: bool = False) -> pd.DataFrame:
    index_cols = ['Models', 'output_dim', 'freq', 't_0']
    indices = {}
    metrics = []
    # initialize the dict list
    for col in index_cols:
        indices[col] = []
    for file in results:  # Iterate through all result files
        # Load the data
        with open(os.path.join(results_dir, file), 'rb') as f:
            pkl = pickle.load(f)
        df = pkl['evaluation']
        if get_skill:
            df= pd.merge(df, pers[['RMSE', 'key', 'output_dim', 'freq']],
               on=['key', 'output_dim', 'freq'],
               how='left',
               suffixes=('', '_p'))
            df['Skill'] = 1 - df.RMSE / df.RMSE_p
            df.drop('RMSE_p', axis=1, inplace=True)
        if reset_index:
            df.reset_index(inplace=True)
        for col in index_cols:
            indices[col].append(df[col].iloc[0])
        df.drop(index_cols, axis=1, inplace=True)
        df.drop('key', axis=1, inplace=True)
        metric = df.mean(axis=0)
        metrics.append(metric)
    df = pd.DataFrame(metrics, columns=metric.index)
    df_index = pd.DataFrame(indices)
    df = pd.concat([df, df_index], axis=1)
    df.sort_values(['output_dim', 'freq', 'Models'], inplace=True)
    if sort_skill:
        df.sort_values(['Skill'], ascending=False, inplace=True)
    return df

In [187]:
d = 'pvod'

config = utils.load_config('config.yaml')
results_dir = config['eval']['results_path']
results_dir = os.path.join(results_dir, d)
result_files = os.listdir(results_dir)
# pickle files where training was performed on single dataset sequentially
sep_train = [f for f in result_files if (f.endswith('.pkl')) & ('all' not in f)]
# pickle files where training was performed on concatenated datasets
all_train = [f for f in result_files if 'all' in f]
persistence_file = [f for f in result_files if 'persistence' in f][0]

In [148]:
pers = pd.read_csv(os.path.join(results_dir, persistence_file))

In [157]:
df_sep = concatenate_results(results_dir=results_dir, results=sep_train)
df_sep

Unnamed: 0,R^2,RMSE,MAE,Skill,Models,output_dim,freq,t_0
18,0.857465,0.095972,0.050327,0.378651,bilstm,1,15min,12
8,0.845256,0.100843,0.052443,0.349403,cnn,1,15min,12
3,0.844612,0.101161,0.052489,0.348887,fnn,1,15min,12
6,0.858707,0.095572,0.050155,0.381414,lstm,1,15min,12
4,0.849617,0.100071,0.052115,0.357922,tcn,1,15min,12
19,0.870719,0.089594,0.049339,0.381564,bigru,1,1h,12
7,0.873163,0.0886,0.048287,0.387726,bilstm,1,1h,12
21,0.859765,0.094497,0.052505,0.349293,cnn,1,1h,12
15,0.873613,0.088568,0.046202,0.388044,cnn-lstm,1,1h,12
17,0.853211,0.096074,0.054213,0.33783,fnn,1,1h,12


In [185]:
results = all_train
get_skill = True
reset_index = False
index_cols = ['Models', 'output_dim', 'freq', 't_0']
indices = {}
metrics = []
a = []
# initialize the dict list
for col in index_cols:
    indices[col] = []
for file in results:  # Iterate through all result files
    # Load the data
    with open(os.path.join(results_dir, file), 'rb') as f:
        pkl = pickle.load(f)
    df = pkl['evaluation']
    if get_skill:
        df= pd.merge(df, pers[['RMSE', 'key', 'output_dim', 'freq']],
            on=['key', 'output_dim', 'freq'],
            how='left',
            suffixes=('', '_p'))
        df['Skill'] = 1 - df.RMSE / df.RMSE_p
        df.drop('RMSE_p', axis=1, inplace=True)
    if reset_index:
        df.reset_index(inplace=True)
    for col in index_cols:
        indices[col].append(df[col].iloc[0])
    df.drop(index_cols, axis=1, inplace=True)
    df.drop('key', axis=1, inplace=True)
    metric = df.mean(axis=0)
    metrics.append(metric)
df = pd.DataFrame(metrics, columns=metric.index)
df_index = pd.DataFrame(indices)
df = pd.concat([df, df_index], axis=1)
df.sort_values(['output_dim', 'freq', 'Models'], inplace=True)

In [188]:
df_all = concatenate_results(results_dir=results_dir,
                             get_skill=True,
                             pers=pers,
                             results=all_train,
                             reset_index=False)
df_all

Unnamed: 0,R^2,RMSE,MAE,Skill,Models,output_dim,freq,t_0
2,0.735481,0.111133,0.060635,0.189328,fnn,1,1h,12.0
0,0.722776,0.110379,0.061411,0.191636,lstm,1,1h,12.0
3,0.718284,0.116861,0.064494,0.147346,tcn,1,1h,12.0
1,0.84556,0.093883,0.051869,0.336433,bigru,48,1h,12.0
7,0.84753,0.093128,0.050771,0.344333,bilstm,48,1h,12.0
4,0.85254,0.089634,0.048427,0.360495,cnn-lstm,48,1h,
5,0.824514,0.097809,0.057004,0.300097,gru,48,1h,12.0
6,0.83628,0.098061,0.05294,0.310716,lstm,48,1h,12.0
8,0.792907,0.102311,0.05774,0.260075,tcn,48,1h,12.0
