In [1]:
import functools
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_pinball_loss
from keras.models import Sequential
from keras.layers import Dense, Activation
import keras.backend as K
from tensorflow.keras.callbacks import EarlyStopping
!pip install tensorflow_addons
import tensorflow_addons as tfa
from CRPS import CRPS

2023-03-02 11:03:30.524508: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.




In [4]:
# import data
hour = 17
data = pd.read_csv(f"../../data/final_{hour}.csv")
data["datetime"] = pd.to_datetime(data["datetime"])

In [5]:
# make lagged variables
data["priceBE_lag1"] = data["priceBE"].shift(1)
data["priceBE_lag2"] = data["priceBE"].shift(2)
data["priceBE_lag3"] = data["priceBE"].shift(3)
data["priceBE_lag4"] = data["priceBE"].shift(4)
data["priceBE_lag5"] = data["priceBE"].shift(5)

In [9]:
# cleanup
data.drop(["loadFR"], axis=1, inplace=True)
data.dropna(inplace=True)

In [10]:
# train/test split
data_train = data[data["datetime"].dt.year <= 2021]
data_test = data[(data["datetime"].dt.year == 2021) 
                 & (data["datetime"].dt.month < 7)]
X_train = data_train.drop(["datetime", "priceBE"], axis=1)
y_train = data_train["priceBE"]
X_test = data_test.drop(["datetime", "priceBE"], axis=1)
y_test = data_test["priceBE"]

# Gradient Boosting

In [None]:
all_models = {}
common_params = dict(
    learning_rate = 0.05,
    n_estimators = 200,
    max_depth = 2,
    min_samples_leaf = 9,
    min_samples_split = 9,
    validation_fraction = 0.3,
    n_iter_no_change=5,
    tol=0.01,
    random_state=0
)
for tau in np.linspace(0.01, 0.99, 99):
    gbr = GradientBoostingRegressor(
        loss="quantile", 
        alpha=tau, 
        **common_params
    )
    all_models["%1.2f" % alpha] = gbr.fit(X_train, y_train)

In [None]:
predictions = pd.DataFrame()
for model in all_models:
    predictions[model] = all_models[model].predict(X_test)

In [None]:
mean_pinball_loss(y_test, predictions["0.05"], alpha=0.05)
mean_pinball_loss(y_test, predictions["0.50"], alpha=0.50)
mean_pinball_loss(y_test, predictions["0.95"], alpha=0.95)
CRPS(predictions, y_test)

# Neural Networks

In [None]:
es = EarlyStopping(
    monitor = 'val_loss', 
    mode = 'min', 
    min_delta = 1,
    patience = 50,
    verbose = 1
)

In [None]:
def quantileModel1():
    # make model graph
    model = Sequential()
    # add 10 neurons in hidden layer with RELU activation
    model.add(Dense(units = 10, input_dim = len(X_train.columns), activation = 'relu'))
    # add 1 output layer
    model.add(Dense(1))
    
    return model

In [None]:
for tau in np.linspace(0.01, 0.99, 99):
    model = quantileModel1()
    model.compile(
        loss=functools.partial(tfa.losses.pinball_loss, tau=tau),
        optimizer = "adadelta"
    )
    all_models["%1.2f" % tau] = model.fit(
        X_train, y_train, 
        validation_split = 0.3,
        epochs = 500, 
        batch_size = 32,
        verbose = 1,
        callbacks = [es]
    )