In [34]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_pinball_loss, mean_squared_error

In [35]:
data_17 = pd.read_csv("../../data/final_17.csv")

In [36]:
data_17["datetime"] = pd.to_datetime(data_17["datetime"])
data_17.drop(["loadFR"], axis=1, inplace=True)

In [37]:
data_17.dropna(inplace=True)

In [38]:
data_17_train = data_17[data_17["datetime"].dt.year <= 2021]
data_17_test = data_17[(data_17["datetime"].dt.year == 2021) & (data_17["datetime"].dt.month < 7)]

In [64]:
X_train = data_17_train.drop(["datetime", "priceBE"], axis=1)
y_train = data_17_train["priceBE"]
X_test = data_17_test.drop(["datetime", "priceBE"], axis=1)
y_test = data_17_test["priceBE"]

In [76]:
y_test

5466    60.61
5467    60.61
5468    60.61
5469    60.61
5470    60.61
        ...  
6366    99.97
6367    99.97
6368    99.97
6369    99.97
6370    99.97
Name: priceBE, Length: 870, dtype: float64

In [81]:
all_models = {}
common_params = dict(
    learning_rate=0.05,
    n_estimators=200,
    max_depth=2,
    min_samples_leaf=9,
    min_samples_split=9,
)
for alpha in np.linspace(0.01, 0.99, 99):
    gbr = GradientBoostingRegressor(loss="quantile", alpha=alpha, **common_params)
    all_models["%1.2f" % alpha] = gbr.fit(X_train, y_train)

In [82]:
predictions = pd.DataFrame()
for model in all_models:
    predictions[model] = all_models[model].predict(X_test)

In [84]:
mean_pinball_loss(y_test, predictions["0.05"], alpha=0.05)

0.9689885467395148

In [85]:
mean_pinball_loss(y_test, predictions["0.50"], alpha=0.5)

1.2191481014252783

In [86]:
mean_pinball_loss(y_test, predictions["0.95"], alpha=0.95)

0.7054219207627703

In [146]:
def CRPS(predictions, observations):
    results = pd.DataFrame([])
    for column in predictions:
        delta = np.array(predictions[column].values - observations.values)
        results[predictions[column].name] = np.square(float(predictions[column].name) - np.heaviside(delta, 1))
    return results.sum(axis=1).mean()

In [147]:
CRPS(predictions, y_test)

16.077850574712592