In [1]:
import forecast_tools as ft
import numpy as np
from numpy import sqrt, mean, square
import pandas as pd
from datetime import datetime

pd.options.plotting.backend = "plotly"
pd.set_option('precision', 2)

2023-01-15 04:36:54.834613: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-01-15 04:36:54.838168: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-01-15 04:36:54.838180: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# s=1

In [2]:
def run_the_joules( site,          
                  units,
                  layers,                    
                  sequence_length,
                  epochs=100,
                  dropout=0.,
                  patience=5,
                  verbose=0,
                  output = True,
                  plots = False,
                  filename = None,
                  shift_steps = None,
                  features = [  'Load (kW)',
                                'Day',
                                'Weekday',
                                'Hour',
                                'IMF1',                                
                                'IMF2',                                
                                'IMF3',
                                'IMF4',
                                'IMF5',
                                'IMF6',
                                'IMF7',
                                'IMF8',],
                  dir = '/content/drive/MyDrive/Colab Notebooks/Models' ):
              
            
  # header
  print(f'\n\n////////// u={units} l={layers} d={dropout} s={shift_steps} //////////\n')

  # meta
  y, m, d, mi = datetime.now().year-2000, datetime.now().month, datetime.now().day, datetime.now().minute
  path_checkpoint = f'{dir}/{site} lstm {units}x{layers}x{shift_steps} {y}{m}{d}{mi}.keras'

  # data
  df,dppd,np_days = ft.get_dat_v4(site,filename,emd=True,rename=True)
  ( n_features_x, n_features_y, 
    batchgen, dat_valid, 
    scaler, df_test) = ft.organize_dat_v3(  df=df, 
                                            shift_steps=shift_steps,
                                            sequence_length=sequence_length,
                                            train_split=0.9,
                                            batch_size=32,
                                            onehot=True)
  (x_test, y_test) = dat_valid 

  # model
  model, hx = ft.lstm_build_train(  n_features_x, n_features_y, 
                                  path_checkpoint, batchgen, 
                                  dat_valid, units=units, epochs=epochs,
                                  layers=layers, patience=patience, 
                                  verbose=verbose, dropout=dropout)

  # predict
  y_test_pred = model.predict(x_test)
  y_test_pred_kw = scaler.inverse_transform(y_test_pred[:,:,0]).flatten()
  
  # rmse
  y_test_kw      = scaler.inverse_transform(y_test[:,:,0]).flatten()
  test_rmse_np = ft.rmse(  y_test_kw[np_days*dppd:], 
                            y_test_kw[:-(np_days*dppd)] )

  # accuracy
  df_test['yPred'] =  y_test_pred_kw
  df_test['yNP']   =  np.concatenate((y_test_pred_kw[dppd*np_days:], \
                                      y_test_pred_kw[-(dppd*np_days):])) 
  df_test['yTrue']      =  y_test_kw
  
  df_test['LoadPeakH'] = ft.create_one_hot_vector_of_daily_peak_hr(df_test[['yTrue']])
  df_test['NP1dPeakH'] = ft.create_one_hot_vector_of_daily_peak_hr(df_test[['yNP']])
  df_test['PredPeakH'] = ft.create_one_hot_vector_of_daily_peak_hr(df_test[['yPred']])  

  # results
  results = {}
  results['test_rmse_pred']     = ft.rmse(y_test_kw, y_test_pred_kw)
  results['test_skill']         = 1 - results['test_rmse_pred'] / test_rmse_np  
  results['test_std_diff_pred'] = np.diff(y_test_pred_kw).std()
  results['epochs']             = len(hx.history['loss']) - patience
  results['acc_np']             = ft.calc_accuracy(df_test.LoadPeakH.values,df_test.NP1dPeakH.values)
  results['acc']                = ft.calc_accuracy(df_test.LoadPeakH.values,df_test.PredPeakH.values)

  if output:
    print(results)
    # print('test set')
    # print(f'rmse np      {test_rmse_np:.2f}')
    # print(f'rmse pred    {ft.rmse(y_test_kw, y_test_pred_kw):.2f}')
    # print(f'acc          {ft.calc_accuracy(df_test.LoadPeakH.values,df_test.PredPeakH.values):.2f}')
    # print(f'skill (rmse) {1 - ft.rmse(y_test_kw, y_test_pred_kw)/test_rmse_np:.3f}')

  if plots:
    ft.plot_training_history(hx)
    ft.plot_predictions_week(y_test_kw, y_test_pred_kw, week=0)

  return results, hx.history, df_test[['yPred']], model,batchgen,dat_valid
  
df,dppd,np_days = ft.get_dat_v4('prpa','data/PRPA_load_cleaned_mjw.csv',emd=True,rename=True)  

results,hx,pred,model,b,valid = run_the_joules( site='prpa',
                                  filename='data/PRPA_load_cleaned_mjw.csv',
                                  units=24,
                                  layers=2,
                                  epochs=100,
                                  patience=20,
                                  sequence_length=24,
                                  shift_steps=1,
                                  dir='models',
                                  verbose=1)





////////// u=24 l=2 d=0.0 s=1 //////////



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['peak'] = np.zeros(df.shape[0],dtype=int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
2023-01-15 04:37:03.006208: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-01-15 04:37:03.006237: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2023-01-15 04:37:03.006249: I tensorflow/stream_executor/cud

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, None, 24)          3648      
                                                                 
 dropout (Dropout)           (None, None, 24)          0         
                                                                 
 lstm_1 (LSTM)               (None, None, 24)          4704      
                                                                 
 dropout_1 (Dropout)         (None, None, 24)          0         
                                                                 
 dense (Dense)               (None, None, 1)           25        
                                                                 
Total params: 8,377
Trainable params: 8,377
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 1: val_loss improved from inf to 0.1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['peak'] = np.zeros(df.shape[0],dtype=int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


## analysis

In [3]:
x,y = valid
ypred = model.predict(x)



## plots

In [4]:
pd.DataFrame({'y':y.flatten(),'pred':ypred.flatten()},
             index=pred.index) \
                 .plot()

In [8]:
ypred_oh = ft.create_one_hot_vector_of_daily_peak_hr(pd.DataFrame(ypred.flatten(),index=pred.index))

pd.DataFrame({'y':y.flatten(),'pred':ypred_oh.values.flatten()},
             index=range(len(y.flatten()))) \
                 .plot()

# s=2

In [10]:
def run_the_joules( site,          
                  units,
                  layers,                    
                  sequence_length,
                  epochs=100,
                  dropout=0.,
                  patience=5,
                  verbose=0,
                  output = True,
                  plots = False,
                  filename = None,
                  shift_steps = None,
                  features = [  'Load (kW)',
                                'Day',
                                'Weekday',
                                'Hour',
                                'IMF1',                                
                                'IMF2',                                
                                'IMF3',
                                'IMF4',
                                'IMF5',
                                'IMF6',
                                'IMF7',
                                'IMF8',],
                  dir = '/content/drive/MyDrive/Colab Notebooks/Models' ):
              
            
  # header
  print(f'\n\n////////// u={units} l={layers} d={dropout} s={shift_steps} //////////\n')

  # meta
  y, m, d, mi = datetime.now().year-2000, datetime.now().month, datetime.now().day, datetime.now().minute
  path_checkpoint = f'{dir}/{site} lstm {units}x{layers}x{shift_steps} {y}{m}{d}{mi}.keras'

  # data
  df,dppd,np_days = ft.get_dat_v4(site,filename,emd=True,rename=True)
  ( n_features_x, n_features_y, 
    batchgen, dat_valid, 
    scaler, df_test) = ft.organize_dat_v3(  df=df, 
                                            shift_steps=shift_steps,
                                            sequence_length=sequence_length,
                                            train_split=0.9,
                                            batch_size=32,
                                            onehot=True)
  (x_test, y_test) = dat_valid 

  # model
  model, hx = ft.lstm_build_train(  n_features_x, n_features_y, 
                                  path_checkpoint, batchgen, 
                                  dat_valid, units=units, epochs=epochs,
                                  layers=layers, patience=patience, 
                                  verbose=verbose, dropout=dropout)

  # predict
  y_test_pred = model.predict(x_test)
  y_test_pred_kw = scaler.inverse_transform(y_test_pred[:,:,0]).flatten()
  
  # rmse
  y_test_kw      = scaler.inverse_transform(y_test[:,:,0]).flatten()
  test_rmse_np = ft.rmse(  y_test_kw[np_days*dppd:], 
                            y_test_kw[:-(np_days*dppd)] )

  # accuracy
  df_test['yPred'] =  y_test_pred_kw
  df_test['yNP']   =  np.concatenate((y_test_pred_kw[dppd*np_days:], \
                                      y_test_pred_kw[-(dppd*np_days):])) 
  df_test['yTrue']      =  y_test_kw
  
  df_test['LoadPeakH'] = ft.create_one_hot_vector_of_daily_peak_hr(df_test[['yTrue']])
  df_test['NP1dPeakH'] = ft.create_one_hot_vector_of_daily_peak_hr(df_test[['yNP']])
  df_test['PredPeakH'] = ft.create_one_hot_vector_of_daily_peak_hr(df_test[['yPred']])  

  # results
  results = {}
  results['test_rmse_pred']     = ft.rmse(y_test_kw, y_test_pred_kw)
  results['test_skill']         = 1 - results['test_rmse_pred'] / test_rmse_np  
  results['test_std_diff_pred'] = np.diff(y_test_pred_kw).std()
  results['epochs']             = len(hx.history['loss']) - patience
  results['acc_np']             = ft.calc_accuracy(df_test.LoadPeakH.values,df_test.NP1dPeakH.values)
  results['acc']                = ft.calc_accuracy(df_test.LoadPeakH.values,df_test.PredPeakH.values)

  if output:
    print(results)
    # print('test set')
    # print(f'rmse np      {test_rmse_np:.2f}')
    # print(f'rmse pred    {ft.rmse(y_test_kw, y_test_pred_kw):.2f}')
    # print(f'acc          {ft.calc_accuracy(df_test.LoadPeakH.values,df_test.PredPeakH.values):.2f}')
    # print(f'skill (rmse) {1 - ft.rmse(y_test_kw, y_test_pred_kw)/test_rmse_np:.3f}')

  if plots:
    ft.plot_training_history(hx)
    ft.plot_predictions_week(y_test_kw, y_test_pred_kw, week=0)

  return results, hx.history, df_test[['yPred']], model,batchgen,dat_valid
  
df,dppd,np_days = ft.get_dat_v4('prpa','data/PRPA_load_cleaned_mjw.csv',emd=True,rename=True)  

results,hx,pred,model,b,valid = run_the_joules( site='prpa',
                                  filename='data/PRPA_load_cleaned_mjw.csv',
                                  units=24,
                                  layers=2,
                                  epochs=100,
                                  patience=20,
                                  sequence_length=24,
                                  shift_steps=2,
                                  dir='models',
                                  verbose=1)





////////// u=24 l=2 d=0.0 s=2 //////////





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, None, 24)          3648      
                                                                 
 dropout_2 (Dropout)         (None, None, 24)          0         
                                                                 
 lstm_3 (LSTM)               (None, None, 24)          4704      
                                                                 
 dropout_3 (Dropout)         (None, None, 24)          0         
                                                                 
 dense_1 (Dense)             (None, None, 1)           25        
                                                                 
Total params: 8,377
Trainable params: 8,377
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 1: val_loss improved from inf to 0



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



## analysis

In [17]:
x_valid,y_valid = valid
ypred = model.predict(x_valid)



## plots

In [18]:
pd.DataFrame({'y':y_valid.flatten(),'pred':ypred.flatten()},
             index=pred.index) \
                 .plot()

In [19]:
pd.DataFrame({'yTrue':y_valid.flatten(),'yPred':pred['yPred']},index=pred.index).plot()

In [13]:
ypred_oh = ft.create_one_hot_vector_of_daily_peak_hr(pd.DataFrame(ypred.flatten(),index=pred.index))

pd.DataFrame({'y':y.flatten(),'pred':ypred_oh.values.flatten()},
             index=range(len(y.flatten()))) \
                 .plot()