In [41]:
import forecast_tools as ft
import numpy as np
from numpy import sqrt, mean, square
import pandas as pd
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler

pd.options.plotting.backend = "plotly"
pd.set_option('precision', 2)

In [42]:
def one_hot_of_peaks(ds,freq='D'):
  df = pd.DataFrame(ds)
  df['peak'] = 0
  df.loc[df.groupby(pd.Grouper(freq=freq)).idxmax().iloc[:,0], 'peak'] = 1  
  return df['peak']   

def accuracy_one_hot(true,pred):
    """ Measure the accuracy of two one hot vectors, inputs can be 1d numpy or dataseries"""
    n_misses = sum(true != pred)/2     # every miss gives two 'False' entries
    return 1 - n_misses/sum(true)   # basis is the number of one-hots

In [43]:
#### roughly best so far
# site='prpa'
# units=24
# layers=2
# sequence_length=24
# epochs=100
# dropout=0
# patience=20
# verbose=1
# output = True
# plots = False
# filename = 'data/PRPA_load_cleaned_mjw.csv'
# shift_steps = 1
# dir = 'models'
# features = [  'Load (kW)',
#              'Day',
#              'Weekday',
#              'Hour',
#               'IMF1',                                
#               'IMF2',                                
#               'IMF3',
#               'IMF4',
#               'IMF5',
#               'IMF6',
#               'IMF7',
#               'IMF8',]
# targets = ['TargetsOH']
# train_split = 0.9
# afuncs={'lstm':'relu','dense':'relu'}
# loss='binary_crossentropy'
# metrics=['accuracy']

site='prpa'
units=24
layers=2
sequence_length=24
epochs=100
dropout=0
patience=20
verbose=1
output = True
plots = False
filename = 'data/PRPA_load_cleaned_mjw.csv'
shift_steps = 1
dir = 'models'
features = [  'Load (kW)',
             'Day',
             'Weekday',
             'Hour',
              'IMF1',                                
              'IMF2',                                
              'IMF3',
              'IMF4',
              'IMF5',
              'IMF6',
              'IMF7',
              'IMF8',]
targets = ['TargetsOH']
train_split = 0.9
afuncs={'lstm':'relu','dense':'relu','gru':'relu'}
loss='binary_crossentropy'
metrics=['accuracy']

t = datetime.now()
path_checkpoint = f'{dir}/{site}/{t.year}-{t.month:02}-{t.day:02}_'+\
                f'{t.hour:02}-{t.minute:02}-{t.second:02}_lstm_{units}x{layers}x{shift_steps}.keras'

In [44]:
df,dppd,np_days = ft.get_dat_v4(site,filename,emd=True,rename=True,start='2021-6-7',end='2022-8-10')

In [45]:
df['LoadOH'] =      one_hot_of_peaks(df[['Load (kW)']])
df['TargetsOH'] =   one_hot_of_peaks(df[['Load (kW)']]).shift(-shift_steps)
df['PredNPOH'] =    one_hot_of_peaks(df[['Load (kW)']]).shift(np_days*dppd-shift_steps)
df = df.dropna()

In [46]:
# split
num_data = len(df)
num_train = int(train_split * num_data)
df_train = df.iloc[:num_train,:]
df_valid = df.iloc[num_train:,:]

feature_scaler = MinMaxScaler()
X_train = feature_scaler.fit_transform(df_train[features].values)
X_valid = feature_scaler.fit_transform(df_valid[features].values)

y_train = df_train.TargetsOH.values[:,np.newaxis]
y_valid = df_valid.TargetsOH.values[:,np.newaxis]

In [47]:
generator = ft.batch_generator( batch_size=32,
                                sequence_length=sequence_length,
                                num_x_signals=len(features),
                                num_y_signals=len(targets),
                                num_train=num_train,
                                x_train_scaled=X_train,
                                y_train_scaled=y_train)

In [48]:
X_batch, y_batch = next(generator)

In [49]:
X_valid = X_valid[np.newaxis,:,:]
y_valid = y_valid[np.newaxis,:,:]

In [50]:
# model, hx = ft.lstm_build_train(len(features), len(targets), 
#                                 path_checkpoint, generator, 
#                                 (X_valid,y_valid), units=units, epochs=epochs,
#                                 layers=layers, patience=patience, 
#                                 verbose=verbose, dropout=dropout,
#                                 afuncs=afuncs,
#                                 loss=loss,metrics=metrics)

In [51]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, GRU, LSTM, Embedding, Dropout
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import  EarlyStopping, ModelCheckpoint, TensorBoard, \
                                        ReduceLROnPlateau

model = Sequential()
model.add( GRU(  units=units,
                return_sequences=True,
                input_shape=(None, len(features),),
                activation=afuncs['gru']))
model.add(Dense(len(targets), activation=afuncs['dense']))

model.compile(loss=loss, optimizer='adam',metrics=metrics)
model.summary()                  

callback_checkpoint = ModelCheckpoint(  filepath=path_checkpoint,
                                        monitor='val_loss',
                                        verbose=verbose,
                                        save_weights_only=True,
                                        save_best_only=True)

callback_early_stopping = EarlyStopping(  monitor='val_loss',
                                        patience=patience,
                                        verbose=verbose)

callbacks = [ callback_early_stopping,
            callback_checkpoint,]

hx = model.fit(  x=generator,
            epochs=100,
            steps_per_epoch=100,
            validation_data=(X_valid,y_valid),
            callbacks=callbacks)    

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_2 (GRU)                 (None, None, 24)          2736      
                                                                 
 dense_2 (Dense)             (None, None, 1)           25        
                                                                 
Total params: 2,761
Trainable params: 2,761
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 1: val_loss improved from inf to 0.27444, saving model to models/prpa/2023-01-16_01-46-26_lstm_24x2x1.keras
Epoch 2/100
Epoch 2: val_loss improved from 0.27444 to 0.26475, saving model to models/prpa/2023-01-16_01-46-26_lstm_24x2x1.keras
Epoch 3/100
Epoch 3: val_loss improved from 0.26475 to 0.18147, saving model to models/prpa/2023-01-16_01-46-26_lstm_24x2x1.keras
Epoch 4/100
Epoch 4: val_loss improved from 0.18147 to 0.16673

In [52]:
pd.DataFrame(hx.history).plot()

In [53]:
model.load_weights(path_checkpoint)
y_valid_pred = model.predict(X_valid)



In [54]:
y_valid_flat      = y_valid[:,:,0].flatten()
y_valid_pred_flat = y_valid_pred[:,:,0].flatten()

In [55]:
df_valid.loc[:,'y'] = y_valid_flat
df_valid.loc[:,'y_pred'] = y_valid_pred_flat



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



'y' in this graph will be "shift_steps" off from the real data set, but y, y_pred, and PredNPOH should be accurate relative to each other

In [56]:
df_valid[['y','y_pred','PredNPOH']].plot()

In [57]:
df2 = df[['Load (kW)']]
df2['NP'] = df['Load (kW)'].shift(24)
df2 = df2.dropna()

accuracy_one_hot(one_hot_of_peaks(df2['Load (kW)']),
                 one_hot_of_peaks(df2['NP']))



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



0.5174825174825175

In [58]:
accuracy_one_hot(df_valid['y'],df_valid['PredNPOH'])

0.2093023255813954

In [59]:
accuracy_one_hot(df_valid['y'],one_hot_of_peaks(df_valid['y_pred']))

0.3023255813953488

In [60]:
df_valid['2022-6-29']

Unnamed: 0_level_0,Load (kW),IMF1,IMF2,IMF3,IMF4,IMF5,IMF6,IMF7,IMF8,Day,Hour,Weekday,LoadOH,TargetsOH,PredNPOH,y,y_pred
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2022-06-29 01:00:00,330.21,-42.17,-89.45,62.73,-14.55,-24.52,23.11,6.61,408.46,180,1,2,0,0.0,0.0,0.0,0.0
2022-06-29 02:00:00,312.37,-65.92,-84.85,63.73,-14.12,-24.75,23.1,6.71,408.47,180,2,2,0,0.0,0.0,0.0,0.0
2022-06-29 03:00:00,301.4,-85.34,-77.51,64.54,-13.69,-24.98,23.09,6.81,408.49,180,3,2,0,0.0,0.0,0.0,0.0
2022-06-29 04:00:00,297.24,-100.76,-67.19,65.16,-13.26,-25.2,23.08,6.91,408.51,180,4,2,0,0.0,0.0,0.0,0.0
2022-06-29 05:00:00,304.39,-107.8,-53.73,65.58,-12.83,-25.43,23.07,7.01,408.53,180,5,2,0,0.0,0.0,0.0,0.0
2022-06-29 06:00:00,322.7,-106.21,-37.54,65.8,-12.41,-25.66,23.05,7.11,408.55,180,6,2,0,0.0,0.0,0.0,0.0
2022-06-29 07:00:00,350.69,-96.68,-19.41,65.82,-11.98,-25.88,23.04,7.22,408.56,180,7,2,0,0.0,0.0,0.0,0.0
2022-06-29 08:00:00,375.61,-91.09,-0.19,65.62,-11.55,-26.1,23.03,7.32,408.58,180,8,2,0,0.0,0.0,0.0,0.0
2022-06-29 09:00:00,416.6,-69.45,19.26,65.2,-11.12,-26.33,23.02,7.42,408.6,180,9,2,0,0.0,0.0,0.0,0.0
2022-06-29 10:00:00,463.77,-40.79,38.1,64.56,-10.69,-26.55,23.0,7.52,408.62,180,10,2,0,0.0,0.0,0.0,0.00557


In [64]:
df_valid['2022-6-30 16:00':'2022-7-1 17:00']

Unnamed: 0_level_0,Load (kW),IMF1,IMF2,IMF3,IMF4,IMF5,IMF6,IMF7,IMF8,Day,Hour,Weekday,LoadOH,TargetsOH,PredNPOH,y,y_pred
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2022-06-30 16:00:00,455.68,16.59,44.69,-16.92,1.71,-32.6,22.51,10.56,409.15,181,16,3,0,1.0,0.0,1.0,0.0449
2022-06-30 17:00:00,457.05,20.33,44.11,-19.02,2.1,-32.78,22.49,10.66,409.16,181,17,3,1,0.0,0.0,0.0,0.0376
2022-06-30 18:00:00,455.95,23.33,41.65,-20.96,2.5,-32.96,22.47,10.76,409.18,181,18,3,0,0.0,0.0,0.0,0.0275
2022-06-30 19:00:00,444.66,17.55,37.63,-22.76,2.88,-33.14,22.45,10.86,409.2,181,19,3,0,0.0,0.0,0.0,0.00376
2022-06-30 20:00:00,419.44,-1.08,32.39,-24.43,3.27,-33.32,22.43,10.96,409.22,181,20,3,0,0.0,0.0,0.0,0.0
2022-06-30 21:00:00,395.22,-17.93,26.24,-25.96,3.66,-33.49,22.4,11.06,409.23,181,21,3,0,0.0,0.0,0.0,0.0
2022-06-30 22:00:00,363.81,-41.49,19.51,-27.37,4.04,-33.66,22.38,11.16,409.25,181,22,3,0,0.0,0.0,0.0,0.0
2022-06-30 23:00:00,331.71,-65.59,12.49,-28.67,4.42,-33.84,22.36,11.26,409.27,181,23,3,0,0.0,0.0,0.0,0.0
2022-07-01 00:00:00,307.68,-81.75,5.52,-29.87,4.8,-34.01,22.34,11.36,409.29,182,0,4,0,0.0,0.0,0.0,0.0
2022-07-01 01:00:00,293.2,-88.81,-1.1,-30.97,5.17,-34.17,22.32,11.46,409.3,182,1,4,0,0.0,0.0,0.0,0.0
