# Evaluation Notebook

In [4]:
import os
import pickle
import pandas as pd

from utils import tools

In [18]:
d = 'pvod'

config = tools.load_config('config.yaml')
results_dir = config['eval']['results_path']
results_dir = os.path.join(results_dir, d)
result_files = os.listdir(results_dir)
# pickle files where training was performed on single dataset sequentially
sep_train = [f for f in result_files if (f.endswith('.pkl')) & ('all' not in f)]
# pickle files where training was performed on concatenated datasets
all_train = [f for f in result_files if 'all' in f]
# pickle files for federated learning simulations
fl_sims = [f for f in result_files if 'fl' in f]
persistence_file = [f for f in result_files if 'persistence' in f][0]

In [19]:
def concatenate_results(results_dir: str,
                        results: list,
                        get_skill: bool = False,
                        pers: pd.DataFrame = pd.DataFrame(),
                        reset_index: bool = True,
                        sort_skill: bool = False) -> pd.DataFrame:
    index_cols = ['Models', 'output_dim', 'freq', 't_0']
    indices = {}
    metrics = []
    # initialize the dict list
    for col in index_cols:
        indices[col] = []
    for file in results:  # Iterate through all result files
        # Load the data
        with open(os.path.join(results_dir, file), 'rb') as f:
            pkl = pickle.load(f)
        df = pkl['evaluation']
        if get_skill:
            df= pd.merge(df, pers[['RMSE', 'key', 'output_dim', 'freq']],
               on=['key', 'output_dim', 'freq'],
               how='left',
               suffixes=('', '_p'))
            df['Skill'] = 1 - df.RMSE / df.RMSE_p
            df.drop('RMSE_p', axis=1, inplace=True)
        if reset_index:
            df.reset_index(inplace=True)
        for col in index_cols:
            indices[col].append(df[col].iloc[0])
        df.drop(index_cols, axis=1, inplace=True)
        df.drop('key', axis=1, inplace=True)
        metric = df.mean(axis=0)
        metrics.append(metric)
    df = pd.DataFrame(metrics, columns=metric.index)
    df_index = pd.DataFrame(indices)
    df = pd.concat([df, df_index], axis=1)
    df.sort_values(['output_dim', 'freq', 'Models'], inplace=True)
    if sort_skill:
        df.sort_values(['Skill'], ascending=False, inplace=True)
    return df

In [11]:
pers = pd.read_csv(os.path.join(results_dir, persistence_file))

In [8]:
df_sep = concatenate_results(results_dir=results_dir, results=sep_train)
df_sep

  saveable.load_own_variables(weights_store.get(inner_path))


Unnamed: 0,R^2,RMSE,MAE,Skill,Models,output_dim,freq,t_0
22,0.857465,0.095972,0.050327,0.378651,bilstm,1,15min,12
8,0.845256,0.100843,0.052443,0.349403,cnn,1,15min,12
3,0.518219,0.163737,0.098211,-0.083962,fnn,1,15min,12
6,0.858707,0.095572,0.050155,0.381414,lstm,1,15min,12
4,0.849617,0.100071,0.052115,0.357922,tcn,1,15min,12
23,0.870719,0.089594,0.049339,0.381564,bigru,1,1h,12
7,0.873163,0.0886,0.048287,0.387726,bilstm,1,1h,12
28,0.859765,0.094497,0.052505,0.349293,cnn,1,1h,12
11,0.863299,0.091664,0.049481,0.363642,cnn-bigru,1,1h,12
26,0.872181,0.088906,0.047072,0.384253,cnn-bilstm,1,1h,12


In [None]:
df_all = concatenate_results(results_dir=results_dir,
                             get_skill=True,
                             pers=pers,
                             results=all_train,
                             reset_index=False)
df_all

Unnamed: 0,R^2,RMSE,MAE,Skill,Models,output_dim,freq,t_0
0,0.722776,0.110379,0.061411,0.192256,lstm,1,1h,12
2,0.718284,0.116861,0.064494,0.147898,tcn,1,1h,12
14,0.763193,0.108566,0.060176,0.223417,tcn-gru,1,1h,12
3,0.761113,0.10882,0.06,0.217579,tcn-lstm,1,1h,12
1,0.84556,0.093883,0.051869,0.336433,bigru,48,1h,12
15,0.84753,0.093128,0.050771,0.344333,bilstm,48,1h,12
6,0.86203,0.08738,0.048375,0.378867,cnn-gru,48,1h,12
7,0.848349,0.0895,0.048074,0.358844,cnn-lstm,48,1h,12
10,0.843576,0.094631,0.05259,0.331614,convlstm,48,1h,12
8,0.824514,0.097809,0.057004,0.300097,gru,48,1h,12


In [22]:
df_fl = concatenate_results(results_dir=results_dir,
                            get_skill=False,
                            pers=pers,
                            results=fl_sims,
                            reset_index=True)
df_fl

Unnamed: 0,R^2,RMSE,MAE,Skill,Models,output_dim,freq,t_0
0,0.660603,0.116681,0.067431,0.122899,fnn,1,1h,12
