# Usage

In [1]:
import datetime as dt
import pandas as pd

## #1 Data downloading

In [2]:
from remodels.data.EntsoeApi import EntsoeApi

start_date = dt.date(2015, 1, 1)
end_date = dt.date(2023, 7, 1)

In [3]:
# to use Entsoe API, you need a free account to obtain a security token
security_token = "7032e795-c8ae-4a50-aac8-a377b64b1c9e"

entsoeApi = EntsoeApi(security_token)

### #1.1 sample data - Germany
Data used by B. Uniejewski in his article "Smoothing Quantile Regression Averaging: A new approach to probabilistic forecasting of electricity prices"

In [4]:
# downloading prices
# we need to download two time series for the desired period
prices_1 = entsoeApi.get_day_ahead_pricing(
    start_date,
    end_date,
    in_domain="10Y1001A1001A63L",
    resolution_preference=60,
)
prices_2 = entsoeApi.get_day_ahead_pricing(
    start_date,
    end_date,
    in_domain="10Y1001A1001A82H",
    resolution_preference=60,
)
prices = pd.concat([prices_1, prices_2])

# downloading load forecast
# load forecast is an additional variable that helps predict future prices
forecast_load_1 = entsoeApi.get_forecast_load(start_date, end_date, "10Y1001A1001A63L")
forecast_load_2 = entsoeApi.get_forecast_load(start_date, end_date, "10Y1001A1001A82H")
forecast_load_1 = forecast_load_1.resample("H").mean()
forecast_load_2 = forecast_load_2.resample("H").mean()
forecast_load = pd.concat([forecast_load_1, forecast_load_2])

#
germany_data = prices.join(forecast_load)
germany_data

Unnamed: 0_level_0,price_da,quantity
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-04 23:00:00+00:00,22.34,50326.50
2015-01-05 00:00:00+00:00,17.93,48599.50
2015-01-05 01:00:00+00:00,15.17,47364.00
2015-01-05 02:00:00+00:00,16.38,47292.25
2015-01-05 03:00:00+00:00,17.38,48370.25
...,...,...
2023-07-01 17:00:00+00:00,82.36,48027.25
2023-07-01 18:00:00+00:00,89.60,46469.00
2023-07-01 19:00:00+00:00,92.79,44480.00
2023-07-01 20:00:00+00:00,92.97,43819.00


### #1.2 sample data - Spain
Data used by B. Uniejewski in his article "Smoothing Quantile Regression Averaging: A new approach to probabilistic forecasting of electricity prices"

In [5]:
prices_spain = entsoeApi.get_day_ahead_pricing(
    start_date, end_date, "10YES-REE------0", resolution_preference=60
)
forecast_load_spain = entsoeApi.get_forecast_load(
    start_date, end_date, "10YES-REE------0"
)
forecast_load_spain = forecast_load_spain.resample("H").mean()
spain_data = prices_spain.join(forecast_load_spain)

### #1.3 sample data - France

In [7]:
prices_france = entsoeApi.get_day_ahead_pricing(
    start_date, end_date, "10YFR-RTE------C", resolution_preference=60
)
forecast_load_france = entsoeApi.get_forecast_load(
    start_date, end_date, "10YFR-RTE------C"
)
france_data = prices_france.join(forecast_load_france)

In [8]:
germany_data.to_csv("germany_data.csv")
spain_data.to_csv("spain_data.csv")
france_data.to_csv("france_data.csv")

## #2 Point prediction model

In [4]:
data = pd.read_csv("germany_data.csv", index_col=0, parse_dates=True)
data = data.rename(columns={"quantity": "load"})
data.head(5)

Unnamed: 0_level_0,price_da,load
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-04 23:00:00+00:00,22.34,50326.5
2015-01-05 00:00:00+00:00,17.93,48599.5
2015-01-05 01:00:00+00:00,15.17,47364.0
2015-01-05 02:00:00+00:00,16.38,47292.25
2015-01-05 03:00:00+00:00,17.38,48370.25


### #2.1 Adjusting data for Daylight Saving Time changes

In [5]:
# useless - data is already properly adjusted
from remodels.transformers.TimeTransformers import DSTAdjuster 

data = DSTAdjuster().fit_transform(data)

#### Example data preparation - lags, additional variables

In [6]:
data = data.assign(
    price_da_1D=lambda x: x["price_da"].shift(24),
    price_da_2D=lambda x: x["price_da"].shift(2 * 24),
    price_da_7D=lambda x: x["price_da"].shift(7 * 24),
    price_da_23_1D=lambda x: x.resample("D")["price_da"].transform("last").shift(24),
    min_price_da_1D=lambda x: x.resample("D")["price_da"].transform("min").shift(24),
    max_price_da_1D=lambda x: x.resample("D")["price_da"].transform("max").shift(24),
)
data.tail(5)

Unnamed: 0_level_0,price_da,load,price_da_1D,price_da_2D,price_da_7D,price_da_23_1D,min_price_da_1D,max_price_da_1D
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-07-01 17:00:00,82.36,48027.25,117.96,166.25,129.95,108.91,88.46,138.97
2023-07-01 18:00:00,89.6,46469.0,133.43,171.58,141.44,108.91,88.46,138.97
2023-07-01 19:00:00,92.79,44480.0,130.74,154.73,150.72,108.91,88.46,138.97
2023-07-01 20:00:00,92.97,43819.0,122.39,141.81,130.46,108.91,88.46,138.97
2023-07-01 21:00:00,89.86,41717.0,109.47,121.11,118.31,108.91,88.46,138.97


### #2.2 Variance Stabilizing Transformations

In [7]:
from remodels.transformers.VSTransformers import ArcsinhScaler
from remodels.transformers.VSTransformers import BoxCoxScaler
from remodels.transformers.VSTransformers import ClippingScaler
from remodels.transformers.VSTransformers import LogClippingScaler
from remodels.transformers.VSTransformers import LogisticScaler
from remodels.transformers.VSTransformers import MLogScaler
from remodels.transformers.VSTransformers import PolyScaler

In [8]:
# use VSTransformer directly
arcsinh_scaler = ArcsinhScaler()
transformed_data = arcsinh_scaler.fit_transform(data)
transformed_data.tail(5)


Unnamed: 0_level_0,price_da,load,price_da_1D,price_da_2D,price_da_7D,price_da_23_1D,min_price_da_1D,max_price_da_1D
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-07-01 17:00:00,5.104284,11.472671,5.463511,5.806649,5.560312,5.38369,5.17573,5.627418
2023-07-01 18:00:00,5.188534,11.439688,5.586738,5.838205,5.645035,5.38369,5.17573,5.627418
2023-07-01 19:00:00,5.223515,11.395942,5.566372,5.734839,5.708582,5.38369,5.17573,5.627418
2023-07-01 20:00:00,5.225453,11.38097,5.500377,5.647648,5.564229,5.38369,5.17573,5.627418
2023-07-01 21:00:00,5.191431,11.331811,5.388819,5.489863,5.466473,5.38369,5.17573,5.627418


In [9]:
# apply inverse transformation
arcsinh_scaler.inverse_transform(transformed_data)[0].tail(5)

Unnamed: 0_level_0,price_da,load,price_da_1D,price_da_2D,price_da_7D,price_da_23_1D,min_price_da_1D,max_price_da_1D
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-07-01 17:00:00,82.36,48027.25,117.96,166.25,129.95,108.91,88.46,138.97
2023-07-01 18:00:00,89.6,46469.0,133.43,171.58,141.44,108.91,88.46,138.97
2023-07-01 19:00:00,92.79,44480.0,130.74,154.73,150.72,108.91,88.46,138.97
2023-07-01 20:00:00,92.97,43819.0,122.39,141.81,130.46,108.91,88.46,138.97
2023-07-01 21:00:00,89.86,41717.0,109.47,121.11,118.31,108.91,88.46,138.97


In [10]:
# some VSTransformers may require addtional arguments
# e.g. PolyScaler
# lamb: exponent used in the polynomial transformation

PolyScaler(lamb=0.125).fit_transform(data).tail(5)

Unnamed: 0_level_0,price_da,load,price_da_1D,price_da_2D,price_da_7D,price_da_23_1D,min_price_da_1D,max_price_da_1D
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-07-01 17:00:00,0.603203,2.707741,0.68095,0.759117,0.702614,0.663313,0.618333,0.717811
2023-07-01 18:00:00,0.621062,2.691912,0.708581,0.766497,0.721825,0.663313,0.618333,0.717811
2023-07-01 19:00:00,0.628545,2.671017,0.703981,0.742442,0.736387,0.663313,0.618333,0.717811
2023-07-01 20:00:00,0.62896,2.663892,0.689165,0.722421,0.703497,0.663313,0.618333,0.717811
2023-07-01 21:00:00,0.62168,2.640592,0.66444,0.686817,0.681608,0.663313,0.618333,0.717811


In [11]:
# later we will pass VSTransformer as an argument in out pipeline

### 2.3 Point Model definition & predictions

In [12]:
from remodels.transformers import StandardizingScaler
from remodels.pointsModels import PointModel
from remodels.pipelines import RePipeline

# you can use any model represented by the class with .fit(X, y) and .predict(X) methods
from sklearn.linear_model import LinearRegression

# pipeline - to specify sequence of steps
pipeline = RePipeline(
    [
        ("standardScaler", StandardizingScaler()),
        ("vstScaler", PolyScaler()),
        ("LinearRegression", LinearRegression()),
    ]
)

# for point model, you have to specify mapping from hour ranges to the variables to be used in those hours
# our case is very simple - all variables for each hour
X_cols_to_pipeline = [
    "price_da_1D",
    "price_da_2D",
    "price_da_7D",
    "price_da_23_1D",
    "max_price_da_1D",
    "min_price_da_1D",
    "load",
]
y_col = "price_da"

variables_per_hour = {(0, 25): X_cols_to_pipeline}

pointModel = PointModel(
    pipeline=pipeline,
    variables_per_hour=variables_per_hour,
    y_column="price_da",
)

In [13]:
# obtaining point predictions

# set start date and end date
start = dt.date(2017, 1, 1)
end = dt.date(2017, 1, 31)

# fit point model
pointModel.fit(data, start=start, end=end)

# get predictions for differend calibration windows
point_prediction_182 = pointModel.predict(rolling_window=182, inverse_predictions=True)
point_prediction_364 = pointModel.predict(rolling_window=364, inverse_predictions=True)
point_prediction_728 = pointModel.predict(rolling_window=728, inverse_predictions=True)

# print summary
pointModel.summary()

Unnamed: 0,MAE,MSE,RMSE,MAPE,R2
prediction_182rw,10.604293,255.073186,15.971011,38.311686,0.616126
prediction_364rw,10.57555,242.093443,15.559352,56.93676,0.63566
prediction_728rw,11.036905,263.647687,16.237231,68.703245,0.603222


In [14]:
point_predictions = pd.concat([
    point_prediction_182,
    point_prediction_364,
    point_prediction_728
], axis=1)

point_predictions

Unnamed: 0_level_0,prediction_182rw,prediction_364rw,prediction_728rw
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01 00:00:00,17.873043,20.770648,20.528032
2017-01-01 01:00:00,16.679405,19.760847,19.634005
2017-01-01 02:00:00,17.734410,18.835915,18.598335
2017-01-01 03:00:00,17.860881,16.665106,16.783094
2017-01-01 04:00:00,13.688883,11.613117,10.663689
...,...,...,...
2017-01-31 19:00:00,48.427391,51.057764,51.552124
2017-01-31 20:00:00,40.407145,43.323584,43.491338
2017-01-31 21:00:00,39.663584,42.214230,40.840423
2017-01-31 22:00:00,32.503318,33.842903,33.933611


## #3 QRA model

In [15]:
# for now, QRA models require numpy arrays
X = point_predictions.to_numpy()
y = point_predictions.join(data)["price_da"].to_numpy()

In [16]:
# all QRA variants
from remodels.qra import QRA
from remodels.qra import QRM 
from remodels.qra import LQRA
from remodels.qra import FQRA
from remodels.qra import FQRM
from remodels.qra import sFQRA
from remodels.qra import sFQRM
from remodels.qra import SQRA
from remodels.qra import SQRM

### #3.1 QRA models - direct usage

In [17]:
# sample prediction
# three different quantiles
# output is returned as np.array
y_pred_q25 = QRA(quantile=0.25, fit_intercept=True).fit(X, y).predict(X)
y_pred_q50 = QRA(quantile=0.50, fit_intercept=True).fit(X, y).predict(X)
y_pred_q75 = QRA(quantile=0.75, fit_intercept=True).fit(X, y).predict(X)

y_pred_q50[:10]

array([19.71594299, 18.51816111, 18.40119957, 16.80886388, 12.58936152,
        2.92735718, 18.6152022 , 23.41801672, 23.46669472, 23.17472373])

In [18]:
pd.DataFrame(
    dict(q25=y_pred_q25, q50=y_pred_q50, q75=y_pred_q75, y_true=y),
    index=point_predictions.index,
).tail(10)

Unnamed: 0_level_0,q25,q50,q75,y_true
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-31 14:00:00,54.186739,66.394722,80.972197,89.75
2017-01-31 15:00:00,55.208994,66.808502,83.680725,90.2
2017-01-31 16:00:00,61.288296,75.258385,95.0,95.0
2017-01-31 17:00:00,58.12867,70.469383,88.409293,104.33
2017-01-31 18:00:00,51.582541,61.424381,76.406883,90.0
2017-01-31 19:00:00,43.948801,50.856552,64.150751,84.15
2017-01-31 20:00:00,37.97905,42.90925,54.060156,60.92
2017-01-31 21:00:00,37.354382,42.899396,52.528673,54.9
2017-01-31 22:00:00,31.108137,33.7103,40.956407,40.69
2017-01-31 23:00:00,32.103146,35.591743,43.332177,44.91


In [19]:
# some QRA models may require addtional parameters
# e.g. LQRA
# lambda_: LASSO regularization parameter
LQRA(quantile=0.50, lambda_=1, fit_intercept=True).fit(X, y).predict(X)[:10]

array([20.82443897, 19.74028314, 19.55087324, 18.22740215, 13.4766444 ,
        4.2748866 , 18.34540101, 22.75212594, 23.07812124, 23.05093359])

### #3.2 QRA Tester

In [20]:
from remodels.qra.tester import QR_Tester

In [65]:
# QR_Tester fits model on initial `calibration_window` period
# predicts next `prediction_window` values (every quantile)
# moves window and repeats

qra_model = QRA(fit_intercept=True)

results = QR_Tester(
    calibration_window=72,
    prediction_window=24,
    qr_model=qra_model,     # any QR model
    max_workers=4,          # multiprocessing max workers
).fit_predict(X, y)

In [66]:
print("X.shape:", X.shape)
print("y.shape:", y.shape)
print("Y_pred.shape:", results.Y_pred.shape)  # without initial `calibration_window` period

X.shape: (744, 3)
y.shape: (744,)
Y_pred.shape: (672, 99)


In [72]:
# all 99 percentiles
pd.DataFrame(results.Y_pred).tail(3)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,89,90,91,92,93,94,95,96,97,98
669,34.551064,36.642725,36.662199,36.662199,37.051204,36.430373,36.430373,36.491054,37.220091,37.410027,...,46.410087,46.410324,46.40991,46.151936,46.587195,46.587195,46.587195,57.053207,57.053207,57.053207
670,31.233392,31.960833,31.947549,31.947549,31.682189,31.795983,31.795983,31.746661,31.701985,31.473622,...,34.409566,34.41044,34.406479,34.320289,34.385573,34.385573,34.385573,38.622451,38.622451,38.622451
671,31.059022,32.656342,32.653686,32.653686,32.600641,32.391029,32.391029,32.375221,32.264271,32.204126,...,38.068782,38.066693,38.063597,37.817822,38.070741,38.070741,38.070741,46.204973,46.204973,46.204973


### # 3.3 Probabilistic predictions metrics

In [68]:
# from 3.2
results

<remodels.qra.tester.qr_tester._Results at 0x2265c7dba30>

In [71]:
# average empirical coverage
# alpha: length of prediction interval
# e.g. alpha=50 --> prediction interval from 25 to 75 percentile

# desired: 50, obtained: 34 --> prediction intervals are too narrow
results.aec(alpha=50) 

0.34970238095238093

In [74]:
# average empirical coverage per hour
results.ec_h(alpha=50) 

array([0.5       , 0.39285714, 0.5       , 0.42857143, 0.39285714,
       0.17857143, 0.25      , 0.39285714, 0.39285714, 0.25      ,
       0.25      , 0.21428571, 0.28571429, 0.35714286, 0.39285714,
       0.39285714, 0.39285714, 0.39285714, 0.28571429, 0.25      ,
       0.35714286, 0.39285714, 0.39285714, 0.35714286])

In [75]:
# mean absolute deviation of empirical coverage per hour
results.ec_mad(alpha=50) 

0.15029761904761904

In [76]:
# Kupiec test for condidional coverage

# returns: number of hours that test is not rejected
# the higher, the better
results.kupiec_test(alpha=50, significance_level=0.05) 

16

In [78]:
# Christoffersen test 

# returns: number of hours that test is not rejected
# the higher, the better
results.christoffersen_test(alpha=50, significance_level=0.05) 

15

In [80]:
# aggregate pinball score.
results.aps()

5.600464806241713