Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/RegressionEnsemble.extreme_lags when models use only covariates lags #1942

Merged
merged 9 commits into from
Aug 11, 2023
1 change: 1 addition & 0 deletions CHANGELOG.md
madtoinou marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co

**Fixed**
- Fixed a bug in `TimeSeries.from_dataframe()` when using a pandas.DataFrame with `df.columns.name != None`. [#1938](https://github.com/unit8co/darts/pull/1938) by [Antoine Madrona](https://github.com/madtoinou).
- Fixed a bug in `RegressionEnsembleModel.extreme_lags` when the forecasting models have only covariates lags. [#1942](https://github.com/unit8co/darts/pull/1942) by [Antoine Madrona](https://github.com/madtoinou).


## [0.25.0](https://github.com/unit8co/darts/tree/0.25.0) (2023-08-04)
Expand Down
10 changes: 7 additions & 3 deletions darts/models/forecasting/regression_ensemble_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ def fit(

raise_if(
train_n_points_too_big,
"`regression_train_n_points` parameter too big (must be smaller or "
"equal to the number of points in training_series)",
"`regression_train_n_points` parameter too big (must be strictly smaller than "
"the number of points in training_series)",
logger,
)

Expand Down Expand Up @@ -217,7 +217,11 @@ def extreme_lags(
Optional[int],
]:
extreme_lags_ = super().extreme_lags
return (extreme_lags_[0] - self.train_n_points,) + extreme_lags_[1:]
# shift min_target_lag in the past to account for the regression model training set
if extreme_lags_[0] is None:
return (-self.train_n_points,) + extreme_lags_[1:]
else:
return (extreme_lags_[0] - self.train_n_points,) + extreme_lags_[1:]

@property
def output_chunk_length(self) -> int:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,18 +366,41 @@ def test_ensemble_models_denoising_multi_input(self):
self.helper_test_models_accuracy(ensemble, horizon, ts_sum2, ts_cov2, 3)

def test_call_backtest_regression_ensemble_local_models(self):
series = tg.sine_timeseries(
value_frequency=(1 / 5), value_y_offset=10, length=50
)
regr_train_n = 10
ensemble = RegressionEnsembleModel(
[NaiveSeasonal(5), Theta(2, 5)], regression_train_n_points=regr_train_n
)
ensemble.fit(series)
ensemble.fit(self.sine_series)
assert max(m_.min_train_series_length for m_ in ensemble.models) == 10
# -10 comes from the maximum minimum train series length of all models
assert ensemble.extreme_lags == (-10 - regr_train_n, 0, None, None, None, None)
ensemble.backtest(series)
ensemble.backtest(self.sine_series)

def test_extreme_lags(self):
# forecasting models do not use target lags
train_n_points = 10
model1 = RandomForest(
lags_future_covariates=[0],
)
model2 = RegressionModel(lags_past_covariates=3)
model = RegressionEnsembleModel(
forecasting_models=[model1, model2],
regression_train_n_points=train_n_points,
)

self.assertEqual(model.extreme_lags, (-train_n_points, 0, -3, -1, 0, 0))

# mix of all the lags
model3 = RandomForest(
lags_future_covariates=[-2, 5],
)
model4 = RegressionModel(lags=[-7, -3], lags_past_covariates=3)
model = RegressionEnsembleModel(
forecasting_models=[model3, model4],
regression_train_n_points=train_n_points,
)

self.assertEqual(model.extreme_lags, (-7 - train_n_points, 0, -3, -1, -2, 5))

def test_stochastic_regression_ensemble_model(self):
quantiles = [0.25, 0.5, 0.75]
Expand Down