In [19]:
import pandas as pd
from pyprojroot import here
import numpy as np

from nutils import interval_score

In [20]:
TARGET = 'occ'

In [21]:
true_matrix = pd.read_csv(
    here() / f'data/processed/true_matrices/{TARGET}.csv',
    index_col='datetime',
    parse_dates=True
)
print(true_matrix)

             t+1   t+2   t+3   t+4   t+5   t+6   t+7   t+8   t+9  t+10  ...  \
datetime                                                                ...   
2017-01-01  30.0  27.0  19.0  24.0  24.0  26.0  35.0  37.0  36.0  38.0  ...   
2017-01-02  51.0  48.0  43.0  34.0  27.0  28.0  35.0  31.0  38.0  43.0  ...   
2017-01-03  75.0  67.0  65.0  59.0  57.0  53.0  50.0  46.0  44.0  45.0  ...   
2017-01-04  36.0  27.0  24.0  23.0  20.0  15.0  11.0  19.0  23.0  26.0  ...   
2017-01-05  26.0  21.0  19.0  16.0  14.0  14.0  24.0  22.0  21.0  25.0  ...   
...          ...   ...   ...   ...   ...   ...   ...   ...   ...   ...  ...   
2019-06-15  37.0  31.0  25.0  21.0  23.0  22.0  22.0  26.0  30.0  40.0  ...   
2019-06-16  34.0  27.0  29.0  23.0  25.0  25.0  20.0  19.0  19.0  18.0  ...   
2019-06-17  27.0  24.0  13.0   9.0   7.0   7.0  13.0  17.0  21.0  32.0  ...   
2019-06-18  21.0  16.0  18.0  13.0   9.0   5.0   8.0   8.0  16.0  24.0  ...   
2019-06-19   NaN   NaN   NaN   NaN   NaN   NaN   NaN

In [22]:
result_list = list()

for lpath in here('data/processed/prediction_matrices/05').glob('*.csv'):
    
    result = dict()
    
    parts = lpath.stem.split('-')
    
    result['Model'] = parts[1].upper()
    result['FS'] = parts[2].upper()
    result['HPO'] = parts[3]
    
    l = pd.read_csv(
        lpath, 
        index_col='datetime', 
        parse_dates=True
    )
    
    upath = here() / 'data/processed/prediction_matrices/95'
    upath = upath / f"{lpath.stem}.csv"
    
    u = pd.read_csv(
        upath, 
        index_col='datetime', 
        parse_dates=True
    ).dropna()
    print(lpath)
    true_vector = true_matrix.loc[u.index].dropna()
    
    l_vector = l.loc[true_vector.index].values.flatten()
    u_vector = u.loc[true_vector.index].values.flatten()
    true_vector = true_vector.values.flatten()
    
    try:
        msis = interval_score(
            observations=true_vector,
            alpha=0.05, 
            q_left=l_vector, 
            q_right=u_vector,
            mean=True,
            scaled=True,
            seasonality=24
        )
    except Exception as e:
        msis = np.nan
        print(f'Encountered error when calculating MSIS for {p.stem}')
        print(e)
    result['MSIS'] = msis.round(2)
    result_list.append(result)

In [23]:
print(result_list)

[]


In [None]:
df = pd.DataFrame(result_list)
df = df.sort_values(by='MSIS', ascending=False)
df = df.set_index('Model')

[]


KeyError: 'MSIS'

In [None]:
df

In [None]:
df.to_latex(
    buf=here() / 'output/tables/msis.tex',
    float_format='%.2f',
    column_format='rccc',
    label='tab:msis',
    caption='Mean scaled interval score (MSIS) for the tested models',
    position='h'
)