In [None]:
import pandas as pd
from pyprojroot import here
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error, mean_absolute_error

from nutils import name_mask, colordict

In [None]:
TARGET = 'occ'
FS = 'u'
HPO = '1'

In [None]:
true_matrix = pd.read_csv(
    here() / f'data/processed/true_matrices/{TARGET}.csv',
    index_col='datetime',
    parse_dates=True
)

In [None]:
def flatten(matrix):
    return matrix.values.flatten()

In [None]:
result_list = list()
error_list = list()
data_path = Path('data/processed/prediction_matrices/')

for p in here(data_path / '50').glob(f'*{FS}-1.csv'):
    
    result = dict()
    
    parts = p.stem.split('-')
    
    result['Model'] = parts[1]
    result['FS'] = parts[2]
    name = parts[1] + '-' + parts[2]
    
    pred_matrix = pd.read_csv(
        p, 
        index_col='datetime', 
        parse_dates=True)
    
    idx = pred_matrix.dropna().index.intersection(true_matrix.dropna().index)

    pred_matrix = pred_matrix.loc[idx]
    true_matrix = true_matrix.loc[idx]
    
    true = true_matrix.groupby(true_matrix.index.month).apply(flatten).to_dict()
    pred = pred_matrix.groupby(pred_matrix.index.month).apply(flatten).to_dict()
    
    for month in range(1,13):
        rmse = mean_squared_error(true[month], pred[month], squared=False)
        result[month] = rmse
        
    result_list.append(result)

In [None]:
df = pd.DataFrame(result_list)

In [None]:
df['Mean'] = df.iloc[:,2:].mean(axis=1)
df = df.sort_values(by='Mean', ascending=False)
df = df.drop(columns='Mean')

In [None]:
df = df.set_index('Model')
df = df.drop(columns='FS')

In [None]:
fig, ax = plt.subplots(figsize=(10,3))

df.T.plot(kind='bar',
          edgecolor='k', 
          lw=.5,
          color=[colordict.get(x, 'grey') for x in df.T.columns],
          ax=ax,
          width=.6)

# Shrink current axis by 20%
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

ax.legend([name_mask[x] for x in df.T.columns], 
          frameon=False, 
          loc='center left', 
          bbox_to_anchor=(1, 0.5));

ax.set_ylabel('RMSE')
ax.set_xlabel('Month')
ax.set_ylim(top=15)
ax.set_axisbelow(True)
ax.grid(lw=.3, ls='--', axis='y')

plt.xticks(rotation=0)

plt.savefig(here() / f'output/plots/performance_monthly-{FS}-{HPO}.jpg', 
            bbox_inches='tight',
            dpi=300)