# Read Forecasts

In [10]:
import pandas as pd
import configparser

config = configparser.ConfigParser()
config.read('C:/Users/Admin/Code/PROPHET-Load_LSTM/lstm_forecaster.ini')
n_timesteps = config.getint('data_opt', 'n_timesteps')

In [11]:
forecast_filename = 'test_forecasts_25541525.csv'

df = pd.read_csv(r'C:/Users/Admin/Code/PROPHET-Load_LSTM/data/output/impianto_4/peak/'+forecast_filename,
                 index_col=0,
                 parse_dates=['timestamp_forecast_update','timestamp_forecast'])

df = df.rename(columns={'predicted_activepower_ev_1':'predict',
                        'PotenzaMax':'load'})

df #[df.timestamp_forecast_update=='2019-9-13']

Unnamed: 0,timestamp_forecast_update,timestamp_forecast,predict,persist,load
0,2019-09-13,2019-09-13,0.585481,0.556918,0.554331
1,2019-09-14,2019-09-14,0.557913,0.509760,0.475878
2,2019-09-15,2019-09-15,0.476820,0.338624,0.383797
3,2019-09-16,2019-09-16,0.404428,0.529883,0.596332
4,2019-09-17,2019-09-17,0.599193,0.525415,0.595295
...,...,...,...,...,...
102,2019-12-24,2019-12-24,0.653666,0.707321,0.710890
103,2019-12-25,2019-12-25,0.707750,0.734613,0.716163
104,2019-12-26,2019-12-26,0.712486,0.735433,0.684824
105,2019-12-27,2019-12-27,0.683565,0.706836,0.725218


To match the test code, resample to 4-hourly forecast updates

In [12]:
# idx = pd.date_range('2019-9-13 0:00','2019-12-27 23:00',freq='4h')
# df = df.loc[df.timestamp_forecast_update.isin(idx)].copy()
# df

Checks

In [13]:
assert df.loc[(df.persist-df.load)==0,:].shape[0] == 0 # is persist perfect?
assert df.isna().sum().sum() == 0 # any nans?
for t in df.timestamp_forecast_update.unique(): # completeness?
    assert df.loc[df.timestamp_forecast_update==t,:].shape[0] == n_timesteps

# Load

Errors

In [14]:
mae_pers = (df.persist-df.load).abs().mean()
mae_pred = (df.predict-df.load).abs().mean()
skill = (1 - mae_pred / mae_pers)

print(f'MAE%: pers {100*mae_pers:.2f} pred {100*mae_pred:.2f}')
print(f'Skill%: {100*skill:.3f}')

MAE%: pers 3.70 pred 6.15
Skill%: -66.088


Fraction of positive skills

In [15]:
skills = 1 - (df.load - df.predict).abs() / \
              (df.load - df.persist).abs()
              
len(skills[skills>0]) / len(skills)

0.45794392523364486

Fraction of postive skills (for each `timestamp_update`)

In [16]:
skills = [] 

for t in df.timestamp_forecast_update.unique():
    forecast_now = df.loc[df.timestamp_forecast_update==t]
    test_mae_pers_now = (forecast_now.persist-forecast_now.load).abs().mean()
    test_mae_pred_now = (forecast_now.predict-forecast_now.load).abs().mean()
    test_skill_now = 1 - test_mae_pred_now / test_mae_pers_now
    skills.append( test_skill_now )
    
len([x for x in skills if x > 0])/len(skills)


0.45794392523364486

Percentage of postive skills (for each `timestamp_forecast_update` at 0:00)

In [17]:
skills = [] 

for t in df.timestamp_forecast_update.unique():
    if t.hour == 0 and t.minute == 0:
        forecast_now = df.loc[df.timestamp_forecast_update==t]
        test_mae_pers_now = (forecast_now.persist-forecast_now.load).abs().mean()
        test_mae_pred_now = (forecast_now.predict-forecast_now.load).abs().mean()
        test_skill_now = 1 - test_mae_pred_now / test_mae_pers_now
        skills.append( test_skill_now )
    
len([x for x in skills if x > 0])/len(skills)


0.45794392523364486

# Peaks

In [18]:
df = df.loc[(df.timestamp_forecast_update.dt.hour==0)&(df.timestamp_forecast_update.dt.minute==0)]

df_daily = pd.DataFrame()
for t in df.timestamp_forecast_update.unique():
    df_daily = pd.concat((df_daily,
                        df.loc[df.timestamp_forecast_update==t].iloc[:96,:]\
                            .drop(columns=['timestamp_forecast_update'])))
df = df_daily.set_index('timestamp_forecast').copy()

df

Unnamed: 0_level_0,predict,persist,load
timestamp_forecast,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-09-13,0.585481,0.556918,0.554331
2019-09-14,0.557913,0.509760,0.475878
2019-09-15,0.476820,0.338624,0.383797
2019-09-16,0.404428,0.529883,0.596332
2019-09-17,0.599193,0.525415,0.595295
...,...,...,...
2019-12-24,0.653666,0.707321,0.710890
2019-12-25,0.707750,0.734613,0.716163
2019-12-26,0.712486,0.735433,0.684824
2019-12-27,0.683565,0.706836,0.725218


In [19]:
dfp = df.resample('1d').max()

In [20]:
df.resample('1d').apply(lambda x: x['load'].idxmax().hour)

timestamp_forecast
2019-09-13    0
2019-09-14    0
2019-09-15    0
2019-09-16    0
2019-09-17    0
             ..
2019-12-24    0
2019-12-25    0
2019-12-26    0
2019-12-27    0
2019-12-28    0
Freq: D, Length: 107, dtype: int64

## Load

In [21]:
mae_pers = (dfp.load - dfp.persist).abs().mean()
mae_pred = (dfp.load - dfp.predict).abs().mean()
skill = 1 - mae_pred / mae_pers

print(f'MAE%: pers={100*mae_pers:.2f} pred={100*mae_pred:.2f}')
print(f'Skill%: {100*skill:.2f}')

MAE%: pers=3.70 pred=6.15
Skill%: -66.09


## Hour

In [22]:
dfp['hour'] =         df.resample('1d').apply(lambda x: x['load'].idxmax().hour)
dfp['persist_hour'] = df.resample('1d').apply(lambda x: x['persist'].idxmax().hour)
dfp['predict_hour'] = df.resample('1d').apply(lambda x: x['predict'].idxmax().hour)

In [23]:
acc_pers = (dfp.hour == dfp.persist_hour).sum() / len(dfp)
acc_pred = (dfp.hour == dfp.predict_hour).sum() / len(dfp)
skill = 1 - acc_pers / acc_pred

print(f'ACC%: pers={100*acc_pers:.2f} pred={100*acc_pred:.2f}')
print(f'Skill%: {100*skill:.2f}')

ACC%: pers=100.00 pred=100.00
Skill%: 0.00
