Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/encoders extension #1093

Merged
merged 12 commits into from
Aug 10, 2022
Merged
22 changes: 22 additions & 0 deletions darts/models/forecasting/catboost_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def __init__(
lags_past_covariates: Union[int, List[int]] = None,
lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
output_chunk_length: int = 1,
add_encoders: Optional[dict] = None,
likelihood: str = None,
quantiles: List = None,
random_state: Optional[int] = None,
Expand All @@ -48,6 +49,26 @@ def __init__(
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
be useful if the covariates don't extend far enough into the future.
add_encoders
A large number of past and future covariates can be automatically generated with `add_encoders`.
This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
will be used as index encoders. Additionally, a transformer such as Darts' :class:`Scaler` can be added to
transform the generated covariates. This happens all under one hood and only needs to be specified at
model creation.
Read :meth:`SequentialEncoder <darts.utils.data.encoders.SequentialEncoder>` to find out more about
``add_encoders``. Default: ``None``. An example showing some of ``add_encoders`` features:

.. highlight:: python
.. code-block:: python

add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['absolute'], 'future': ['relative']},
'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
'transformer': Scaler()
}
..
likelihood
Can be set to 'quantile', 'poisson' or 'gaussian'. If set, the model will be probabilistic,
allowing sampling at prediction time. When set to 'gaussian', the model will use CatBoost's
Expand Down Expand Up @@ -96,6 +117,7 @@ def __init__(
lags_past_covariates=lags_past_covariates,
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
add_encoders=add_encoders,
model=CatBoostRegressor(**kwargs),
)

Expand Down
5 changes: 5 additions & 0 deletions darts/models/forecasting/ensemble_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ def _stack_ts_multiseq(self, predictions_list):
# stacks multiple sequences of timeseries elementwise
return [self._stack_ts_seq(ts_list) for ts_list in zip(*predictions_list)]

def _model_encoder_settings(self):
raise NotImplementedError(
"Encoders are not supported by EnsembleModels. Instead add encoder to the underlying `models`."
)

def _make_multiple_predictions(
self,
n: int,
Expand Down
35 changes: 35 additions & 0 deletions darts/models/forecasting/forecasting_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
_parallel_apply,
_with_sanity_checks,
)
from darts.utils.data.encoders import SequentialEncoder
from darts.utils.timeseries_generation import (
_build_forecast_series,
_generate_new_dates,
Expand Down Expand Up @@ -932,6 +933,13 @@ class GlobalForecastingModel(ForecastingModel, ABC):
_expect_past_covariates, _expect_future_covariates = False, False
past_covariate_series, future_covariate_series = None, None

def __init__(self, add_encoders: Optional[dict] = None):
super().__init__()

# by default models do not use encoders
self.add_encoders = add_encoders
self.encoders: Optional[SequentialEncoder] = None

@abstractmethod
def fit(
self,
Expand Down Expand Up @@ -1084,6 +1092,33 @@ def _supports_non_retrainable_historical_forecasts(self) -> bool:
"""GlobalForecastingModel supports historical forecasts without retraining the model"""
return True

@property
@abstractmethod
def _model_encoder_settings(self) -> Tuple[int, int, bool, bool]:
"""Abstract property that returns model specific encoder settings that are used to initialize the encoders.

Must return Tuple (input_chunk_length, output_chunk_length, takes_past_covariates, takes_future_covariates)
"""
pass

def initialize_encoders(self) -> SequentialEncoder:
"""instantiates the SequentialEncoder object based on self._model_encoder_settings and parameter
``add_encoders`` used at model creation"""
(
input_chunk_length,
output_chunk_length,
takes_past_covariates,
takes_future_covariates,
) = self._model_encoder_settings

return SequentialEncoder(
add_encoders=self.add_encoders,
input_chunk_length=input_chunk_length,
output_chunk_length=output_chunk_length,
takes_past_covariates=takes_past_covariates,
takes_future_covariates=takes_future_covariates,
)


class DualCovariatesForecastingModel(ForecastingModel, ABC):
"""The base class for the forecasting models that are not global, but support future covariates.
Expand Down
22 changes: 22 additions & 0 deletions darts/models/forecasting/gradient_boosted_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(
lags_past_covariates: Union[int, List[int]] = None,
lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
output_chunk_length: int = 1,
add_encoders: Optional[dict] = None,
likelihood: str = None,
quantiles: List[float] = None,
random_state: Optional[int] = None,
Expand All @@ -52,6 +53,26 @@ def __init__(
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
be useful if the covariates don't extend far enough into the future.
add_encoders
A large number of past and future covariates can be automatically generated with `add_encoders`.
This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
will be used as index encoders. Additionally, a transformer such as Darts' :class:`Scaler` can be added to
transform the generated covariates. This happens all under one hood and only needs to be specified at
model creation.
Read :meth:`SequentialEncoder <darts.utils.data.encoders.SequentialEncoder>` to find out more about
``add_encoders``. Default: ``None``. An example showing some of ``add_encoders`` features:

.. highlight:: python
.. code-block:: python

add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['absolute'], 'future': ['relative']},
'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
'transformer': Scaler()
}
..
likelihood
Can be set to `quantile` or `poisson`. If set, the model will be probabilistic, allowing sampling at
prediction time.
Expand Down Expand Up @@ -87,6 +108,7 @@ def __init__(
lags_past_covariates=lags_past_covariates,
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
add_encoders=add_encoders,
model=lgb.LGBMRegressor(**kwargs),
)

Expand Down
22 changes: 22 additions & 0 deletions darts/models/forecasting/linear_regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(
lags_past_covariates: Union[int, List[int]] = None,
lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
output_chunk_length: int = 1,
add_encoders: Optional[dict] = None,
likelihood: str = None,
quantiles: List[float] = None,
random_state: Optional[int] = None,
Expand All @@ -50,6 +51,26 @@ def __init__(
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
be useful if the covariates don't extend far enough into the future.
add_encoders
A large number of past and future covariates can be automatically generated with `add_encoders`.
This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
will be used as index encoders. Additionally, a transformer such as Darts' :class:`Scaler` can be added to
transform the generated covariates. This happens all under one hood and only needs to be specified at
model creation.
Read :meth:`SequentialEncoder <darts.utils.data.encoders.SequentialEncoder>` to find out more about
``add_encoders``. Default: ``None``. An example showing some of ``add_encoders`` features:

.. highlight:: python
.. code-block:: python

add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['absolute'], 'future': ['relative']},
'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
'transformer': Scaler()
}
..
likelihood
Can be set to `quantile` or `poisson`. If set, the model will be probabilistic, allowing sampling at
prediction time. If set to `quantile`, the `sklearn.linear_model.QuantileRegressor` is used. Similarly, if
Expand Down Expand Up @@ -94,6 +115,7 @@ def __init__(
lags_past_covariates=lags_past_covariates,
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
add_encoders=add_encoders,
model=model,
)

Expand Down
22 changes: 22 additions & 0 deletions darts/models/forecasting/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def __init__(
lags_past_covariates: Union[int, List[int]] = None,
lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
output_chunk_length: int = 1,
add_encoders: Optional[dict] = None,
n_estimators: Optional[int] = 100,
max_depth: Optional[int] = None,
**kwargs,
Expand All @@ -55,6 +56,26 @@ def __init__(
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
be useful if the covariates don't extend far enough into the future.
add_encoders
A large number of past and future covariates can be automatically generated with `add_encoders`.
This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
will be used as index encoders. Additionally, a transformer such as Darts' :class:`Scaler` can be added to
transform the generated covariates. This happens all under one hood and only needs to be specified at
model creation.
Read :meth:`SequentialEncoder <darts.utils.data.encoders.SequentialEncoder>` to find out more about
``add_encoders``. Default: ``None``. An example showing some of ``add_encoders`` features:

.. highlight:: python
.. code-block:: python

add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['absolute'], 'future': ['relative']},
'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
'transformer': Scaler()
}
..
n_estimators : int
The number of trees in the forest.
max_depth : int
Expand All @@ -74,6 +95,7 @@ def __init__(
lags_past_covariates=lags_past_covariates,
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
add_encoders=add_encoders,
model=RandomForestRegressor(**kwargs),
)

Expand Down
81 changes: 80 additions & 1 deletion darts/models/forecasting/regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def __init__(
lags_past_covariates: Union[int, List[int]] = None,
lags_future_covariates: Union[Tuple[int, int], List[int]] = None,
output_chunk_length: int = 1,
add_encoders: Optional[dict] = None,
model=None,
):
"""Regression Model
Expand All @@ -71,14 +72,34 @@ def __init__(
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
be useful if the covariates don't extend far enough into the future.
add_encoders
A large number of past and future covariates can be automatically generated with `add_encoders`.
This can be done by adding multiple pre-defined index encoders and/or custom user-made functions that
will be used as index encoders. Additionally, a transformer such as Darts' :class:`Scaler` can be added to
transform the generated covariates. This happens all under one hood and only needs to be specified at
model creation.
Read :meth:`SequentialEncoder <darts.utils.data.encoders.SequentialEncoder>` to find out more about
``add_encoders``. Default: ``None``. An example showing some of ``add_encoders`` features:

.. highlight:: python
.. code-block:: python

add_encoders={
'cyclic': {'future': ['month']},
'datetime_attribute': {'future': ['hour', 'dayofweek']},
'position': {'past': ['absolute'], 'future': ['relative']},
'custom': {'past': [lambda idx: (idx.year - 1950) / 50]},
'transformer': Scaler()
}
..
model
Scikit-learn-like model with ``fit()`` and ``predict()`` methods. Also possible to use model that doesn't
support multi-output regression for multivariate timeseries, in which case one regressor
will be used per component in the multivariate series.
If None, defaults to: ``sklearn.linear_model.LinearRegression(n_jobs=-1)``.
"""

super().__init__()
super().__init__(add_encoders=add_encoders)

self.model = model
self.lags = {}
Expand Down Expand Up @@ -200,6 +221,46 @@ def __init__(
)
self.output_chunk_length = output_chunk_length

@property
def _model_encoder_settings(self) -> Tuple[int, int, bool, bool]:
lags_covariates = {
lag for key in ["past", "future"] for lag in self.lags.get(key, [])
}
if lags_covariates:
# for lags < 0 we need to take `n` steps backwards from past and/or historic future covariates
# for minimum lag = -1 -> steps_back_inclusive = 1
# inclusive means n steps back including the end of the target series
n_steps_back_inclusive = abs(min(min(lags_covariates), 0))
# for lags >= 0 we need to take `n` steps ahead from future covariates
# for maximum lag = 0 -> output_chunk_length = 1
# exclusive means n steps ahead after the last step of the target series
n_steps_ahead_exclusive = max(max(lags_covariates), 0) + 1
takes_past_covariates = "past" in self.lags
takes_future_covariates = "future" in self.lags
else:
n_steps_back_inclusive = 0
n_steps_ahead_exclusive = 0
takes_past_covariates = False
takes_future_covariates = False
return (
n_steps_back_inclusive,
n_steps_ahead_exclusive,
takes_past_covariates,
takes_future_covariates,
)

def _get_encoders_n(self, n):
"""Returns the `n` encoder prediction steps specific to RegressionModels.
This will generate slightly more past covariates than the minimum requirement when using past and future
covariate lags simultaneously. This is because encoders were written for TorchForecastingModels where we only
needed `n` future covariates. For RegressionModel we need `n + max_future_lag`
"""
_, n_steps_ahead, _, takes_future_covariates = self._model_encoder_settings
if not takes_future_covariates:
return n
else:
return n + (n_steps_ahead - 1)

@property
def min_train_series_length(self) -> int:
return max(
Expand Down Expand Up @@ -319,6 +380,7 @@ def _fit_model(
Function that fit the model. Deriving classes can override this method for adding additional parameters (e.g.,
adding validation data), keeping the sanity checks on series performed by fit().
"""

training_samples, training_labels = self._create_lagged_data(
target_series, past_covariates, future_covariates, max_samples_per_ts
)
Expand Down Expand Up @@ -361,6 +423,15 @@ def fit(
**kwargs
Additional keyword arguments passed to the `fit` method of the model.
"""

self.encoders = self.initialize_encoders()
if self.encoders.encoding_available:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neat 👌

past_covariates, future_covariates = self.encoders.encode_train(
target=series,
past_covariate=past_covariates,
future_covariate=future_covariates,
)

super().fit(
series=series,
past_covariates=past_covariates,
Expand Down Expand Up @@ -477,6 +548,14 @@ def predict(
logger,
)

if self.encoders.encoding_available:
past_covariates, future_covariates = self.encoders.encode_inference(
n=self._get_encoders_n(n),
target=series,
past_covariate=past_covariates,
future_covariate=future_covariates,
)

super().predict(n, series, past_covariates, future_covariates, num_samples)

if series is None:
Expand Down