In [1]:
import functools
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_pinball_loss
from sklearn.preprocessing import PolynomialFeatures
#!pip install xgboost
#import xgboost as xgb
#from keras.models import Sequential
#from keras.layers import Dense, Activation
#import keras.backend as K
#from tensorflow.keras.callbacks import EarlyStopping
#!pip install tensorflow_addons
#import tensorflow_addons as tfa
from tqdm import tqdm

In [2]:
# import data
hour = 17
data = pd.read_csv(f"../../data/final_{hour}.csv")
data["datetime"] = pd.to_datetime(data["datetime"])

In [3]:
# make lagged variables
# BE
data["priceBE_lag1"] = data["priceBE"].shift(1)
data["priceBE_lag2"] = data["priceBE"].shift(2)
data["priceBE_lag3"] = data["priceBE"].shift(3)
data["priceBE_lag4"] = data["priceBE"].shift(4)
data["priceBE_lag5"] = data["priceBE"].shift(5)
# NL
data["priceNL_lag1"] = data["priceNL"].shift(1)
data["priceNL_lag2"] = data["priceNL"].shift(2)
data["priceNL_lag3"] = data["priceNL"].shift(3)
data["priceNL_lag4"] = data["priceNL"].shift(4)
data["priceNL_lag5"] = data["priceNL"].shift(5)
# FR
data["priceFR_lag1"] = data["priceFR"].shift(1)
data["priceFR_lag2"] = data["priceFR"].shift(2)
data["priceFR_lag3"] = data["priceFR"].shift(3)
data["priceFR_lag4"] = data["priceFR"].shift(4)
data["priceFR_lag5"] = data["priceFR"].shift(5)
# DE
data["priceDE_lag1"] = data["priceDE"].shift(1)
data["priceDE_lag2"] = data["priceDE"].shift(2)
data["priceDE_lag3"] = data["priceDE"].shift(3)
data["priceDE_lag4"] = data["priceDE"].shift(4)
data["priceDE_lag5"] = data["priceDE"].shift(5)
# drop other variables
data.drop(["priceNL", "priceFR", "priceDE"], axis=1, inplace=True)

In [4]:
# cleanup
data.drop(["loadFR"], axis=1, inplace=True)
data.dropna(inplace=True)
data.drop_duplicates(subset="datetime", keep="last", inplace=True)

In [5]:
# train/test split
data = data[data["datetime"].dt.year < 2022]
data_train = data[data["datetime"].dt.year < 2021]
data_test = data[(data["datetime"].dt.year == 2021)] 
                 #& (data["datetime"].dt.month < 7)]
X_train = data_train.drop(["datetime", "priceBE"], axis=1)
y_train = data_train["priceBE"]
X_test = data_test.drop(["datetime", "priceBE"], axis=1)
y_test = data_test["priceBE"]

# Gradient Boosting

## Degree = 1

In [6]:
all_models = {}
common_params = dict(
    learning_rate = 0.05,
    n_estimators = 200,
    max_depth = 2,
    min_samples_leaf = 9,
    min_samples_split = 9,
    validation_fraction = 0.3,
    n_iter_no_change=5,
    tol=0.01,
    random_state=0
)
for tau in tqdm(np.linspace(0.01, 0.99, 99)):
    gbr = GradientBoostingRegressor(
        loss="quantile", 
        alpha=tau, 
        **common_params
    )
    all_models["%1.2f" % tau] = gbr.fit(X_train, y_train)

100%|███████████████████████████████████████████| 99/99 [01:06<00:00,  1.49it/s]


In [7]:
predictions = pd.DataFrame()
for model in all_models:
    predictions[model] = all_models[model].predict(X_test)

In [8]:
predictions.to_csv("../../results/predictions_GB1.csv", 
                  index = False)

## Degree = 2

In [9]:
from sklearn.preprocessing import PolynomialFeatures
poly_2 = PolynomialFeatures(2)

all_models = {}
common_params = dict(
    learning_rate = 0.05,
    n_estimators = 200,
    max_depth = 2,
    min_samples_leaf = 9,
    min_samples_split = 9,
    validation_fraction = 0.3,
    n_iter_no_change=5,
    tol=0.01,
    random_state=0
)
for tau in tqdm(np.linspace(0.01, 0.99, 99)):
    gbr = GradientBoostingRegressor(
        loss="quantile", 
        alpha=tau, 
        **common_params
    )
    all_models["%1.2f" % tau] = gbr.fit(
        pd.DataFrame(poly_2.fit_transform(X_train)), y_train)

100%|███████████████████████████████████████████| 99/99 [15:01<00:00,  9.10s/it]


In [10]:
predictions = pd.DataFrame()
for model in all_models:
    predictions[model] = all_models[model].predict(pd.DataFrame(poly_2.fit_transform(X_test)))

In [11]:
predictions.to_csv("../../results/predictions_GB2.csv", 
                  index = False)

# Neural Networks

In [None]:
es = EarlyStopping(
    monitor = 'val_loss', 
    mode = 'min', 
    min_delta = 1,
    patience = 50,
    verbose = 1
)

In [None]:
def quantileModel1():
    # make model graph
    model = Sequential()
    # add 50 neurons in hidden layer with RELU activation
    model.add(Dense(units = 50, input_dim = len(X_train.columns), activation = 'relu'))
    # add 1 output layer
    model.add(Dense(1))
    return model

In [None]:
predictions = pd.DataFrame()
for tau in tqdm(np.linspace(0.01, 0.99, 99)):
    model = quantileModel1()
    model.compile(
        loss=functools.partial(tfa.losses.pinball_loss, tau=tau),
        optimizer = "adadelta"
    )
    model.fit(
        X_train, y_train, 
        validation_split = 0.3,
        epochs = 500, 
        batch_size = 32,
        verbose = 0,
        callbacks = [es]
    )
    predictions["%1.2f" % tau] = model.predict(X_test).flatten()

In [None]:
predictions

In [None]:
predictions.to_csv("../../results/predictions_NN.csv", 
                  index = False)