In [None]:
# This notebooks calculates and exports absolute error tables that can be then 
# read for statistical analysis

In [None]:
import pandas as pd
from pyprojroot import here
import numpy as np
from pathlib import Path

from sklearn.metrics import mean_squared_error, mean_absolute_error
import scikit_posthocs as sp

from nutils import interval_score, bootstrap, name_mask

In [None]:
TARGET = 'occ'

In [None]:
true_matrix = pd.read_csv(
    here() / f'data/processed/true_matrices/{TARGET}.csv',
    index_col='datetime',
    parse_dates=True
)

In [None]:
frame = list()
data_path = Path('data/processed/prediction_matrices/')

for p in here(data_path / '50').glob('*1.csv'):
    
    result = dict()
    
    parts = p.stem.split('-')
    
    model = parts[1]
    fs = parts[2]
    name= model + '-' + fs
    
    pred_matrix = pd.read_csv(
        p, 
        index_col='datetime', 
        parse_dates=True)
    
    idx = pred_matrix.dropna().index.intersection(true_matrix.dropna().index)

    pred_vector = pred_matrix.loc[idx].values.flatten()
    true_vector = true_matrix.loc[idx].values.flatten()
        
    error = np.abs(pred_vector - true_vector)
    error = pd.Series(error, name=name)
    frame.append(error)

In [None]:
frame = pd.concat(frame, axis=1)

In [None]:
melt = frame.melt()

In [None]:
melt

In [None]:
table = sp.posthoc_dunn(melt, val_col='value', group_col='variable', p_adjust='holm')
table.round(3)