Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor/1080 onehsot regressionmodels #1291

Merged
merged 21 commits into from
Nov 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
0b5cca2
- adapted _create_lagged_data() to prepare data for one_shot models
eliane-maalouf Oct 14, 2022
ced4483
Merge remote-tracking branch 'origin/master' into refactor/1080-onehs…
eliane-maalouf Oct 14, 2022
12fdb65
updated predict() to support one_shot models
eliane-maalouf Oct 14, 2022
229bb19
- added pred_dim parameter to uniformly update all prediction output …
eliane-maalouf Oct 14, 2022
f7e118d
modified variable name for lags in list comprehension
eliane-maalouf Oct 14, 2022
e62a66b
formatting
eliane-maalouf Oct 14, 2022
d29cc23
Merge branch 'master' into refactor/1080-onehsot-regressionmodels
eliane-maalouf Oct 23, 2022
a71943c
Merge branch 'master' into refactor/1080-onehsot-regressionmodels
eliane-maalouf Oct 28, 2022
176f38c
Update darts/utils/data/tabularization.py
eliane-maalouf Oct 28, 2022
abc5513
Merge branch 'master' into refactor/1080-onehsot-regressionmodels
eliane-maalouf Oct 28, 2022
6031657
subsetting numpy.array instead of TimeSeries // propagating the multi…
eliane-maalouf Oct 28, 2022
0bcc07c
Merge branch 'master' into refactor/1080-onehsot-regressionmodels
eliane-maalouf Oct 29, 2022
dd089d5
changed order of argument in super().__init__() // updated first two …
eliane-maalouf Oct 31, 2022
27fc92e
changed order of multi_models argument in super().__init__()
eliane-maalouf Oct 31, 2022
266258f
corrected prediction data preparation (added a remainder for when n %…
eliane-maalouf Oct 31, 2022
ce13837
corrected remainder behavior // first 4 unittests adaptation
eliane-maalouf Oct 31, 2022
639e76e
corrected test_models_runnability() adaptation
eliane-maalouf Oct 31, 2022
4c5c1d5
corrected one_shot last_pred_ts in predict() for one_shot regression …
eliane-maalouf Nov 1, 2022
c65a592
corrected last_pred_ts in test_prediction_data_creation
eliane-maalouf Nov 1, 2022
0f28247
Merge branch 'master' into refactor/1080-onehsot-regressionmodels
eliane-maalouf Nov 3, 2022
fd8fa59
Merge branch 'master' into refactor/1080-onehsot-regressionmodels
hrzn Nov 5, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions darts/models/forecasting/catboost_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(
likelihood: str = None,
quantiles: List = None,
random_state: Optional[int] = None,
multi_models: Optional[bool] = True,
**kwargs,
):
"""CatBoost Model
Expand Down Expand Up @@ -79,6 +80,9 @@ def __init__(
random_state
Control the randomness in the fitting procedure and for sampling.
Default: ``None``.
multi_models
If True, a separate model will be trained for each future lag to predict. If False, a single model is
trained to predict at step 'output_chunk_length' in the future. Default: True.
**kwargs
Additional keyword arguments passed to `catboost.CatBoostRegressor`.
"""
Expand Down Expand Up @@ -122,6 +126,7 @@ def __init__(
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
add_encoders=add_encoders,
multi_models=multi_models,
model=CatBoostRegressor(**kwargs),
)

Expand Down
5 changes: 5 additions & 0 deletions darts/models/forecasting/gradient_boosted_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def __init__(
likelihood: str = None,
quantiles: List[float] = None,
random_state: Optional[int] = None,
multi_models: Optional[bool] = True,
**kwargs,
):
"""Light Gradient Boosted Model
Expand Down Expand Up @@ -81,6 +82,9 @@ def __init__(
random_state
Control the randomness in the fitting procedure and for sampling.
Default: ``None``.
multi_models
If True, a separate model will be trained for each future lag to predict. If False, a single model is
trained to predict at step 'output_chunk_length' in the future. Default: True.
**kwargs
Additional keyword arguments passed to `lightgbm.LGBRegressor`.
"""
Expand Down Expand Up @@ -109,6 +113,7 @@ def __init__(
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
add_encoders=add_encoders,
multi_models=multi_models,
model=lgb.LGBMRegressor(**kwargs),
)

Expand Down
5 changes: 5 additions & 0 deletions darts/models/forecasting/linear_regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def __init__(
likelihood: str = None,
quantiles: List[float] = None,
random_state: Optional[int] = None,
multi_models: Optional[bool] = True,
**kwargs,
):
"""Linear regression model.
Expand Down Expand Up @@ -83,6 +84,9 @@ def __init__(
<https://numpy.org/doc/stable/reference/random/generator.html#numpy.random.Generator>`_. Ignored when
no `likelihood` is set.
Default: ``None``.
multi_models
If True, a separate model will be trained for each future lag to predict. If False, a single model is
trained to predict at step 'output_chunk_length' in the future. Default: True.
**kwargs
Additional keyword arguments passed to `sklearn.linear_model.LinearRegression` (by default), to
`sklearn.linear_model.PoissonRegressor` (if `likelihood="poisson"`), or to
Expand Down Expand Up @@ -117,6 +121,7 @@ def __init__(
output_chunk_length=output_chunk_length,
add_encoders=add_encoders,
model=model,
multi_models=multi_models,
)

def __str__(self):
Expand Down
5 changes: 5 additions & 0 deletions darts/models/forecasting/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(
add_encoders: Optional[dict] = None,
n_estimators: Optional[int] = 100,
max_depth: Optional[int] = None,
multi_models: Optional[bool] = True,
**kwargs,
):
"""Random Forest Model
Expand Down Expand Up @@ -81,6 +82,9 @@ def __init__(
max_depth : int
The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all
leaves contain less than min_samples_split samples.
multi_models
If True, a separate model will be trained for each future lag to predict. If False, a single model is
trained to predict at step 'output_chunk_length' in the future. Default: True.
**kwargs
Additional keyword arguments passed to `sklearn.ensemble.RandomForest`.
"""
Expand All @@ -96,6 +100,7 @@ def __init__(
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
add_encoders=add_encoders,
multi_models=multi_models,
model=RandomForestRegressor(**kwargs),
)

Expand Down
68 changes: 47 additions & 21 deletions darts/models/forecasting/regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(
output_chunk_length: int = 1,
add_encoders: Optional[dict] = None,
model=None,
multi_models: Optional[bool] = True,
eliane-maalouf marked this conversation as resolved.
Show resolved Hide resolved
):
"""Regression Model
Can be used to fit any scikit-learn-like regressor class to predict the target time series from lagged values.
Expand Down Expand Up @@ -97,6 +98,10 @@ def __init__(
support multi-output regression for multivariate timeseries, in which case one regressor
will be used per component in the multivariate series.
If None, defaults to: ``sklearn.linear_model.LinearRegression(n_jobs=-1)``.

multi_models
If True, a separate model will be trained for each future lag to predict. If False, a single model is
trained to predict at step 'output_chunk_length' in the future. Default: True.
"""

super().__init__(add_encoders=add_encoders)
Expand All @@ -105,6 +110,7 @@ def __init__(
self.lags = {}
self.output_chunk_length = None
self.input_dim = None
self.multi_models = multi_models

# model checks
if self.model is None:
Expand Down Expand Up @@ -221,6 +227,8 @@ def __init__(
)
self.output_chunk_length = output_chunk_length

self.pred_dim = self.output_chunk_length if self.multi_models else 1

@property
def _model_encoder_settings(self) -> Tuple[int, int, bool, bool]:
lags_covariates = {
Expand Down Expand Up @@ -305,6 +313,7 @@ def _create_lagged_data(
lags_past_covariates=lags_past_covariates,
lags_future_covariates=lags_future_covariates,
max_samples_per_ts=max_samples_per_ts,
multi_models=self.multi_models,
)

return training_samples, training_labels
Expand Down Expand Up @@ -398,7 +407,9 @@ def fit(
}

# if multi-output regression
if not series[0].is_univariate or self.output_chunk_length > 1:
if not series[0].is_univariate or (
self.output_chunk_length > 1 and self.multi_models
):
# and model isn't wrapped already
if not isinstance(self.model, MultiOutputRegressor):
# check whether model supports multi-output regression natively
Expand Down Expand Up @@ -528,6 +539,14 @@ def predict(
"future": (future_covariates, self.lags.get("future")),
}

# prepare one_shot shift and step
if self.multi_models:
shift = 0
step = self.output_chunk_length
else:
shift = self.output_chunk_length - 1
step = 1

# dictionary containing covariate data over time span required for prediction
covariate_matrices = {}
# dictionary containing covariate lags relative to minimum covariate lag
Expand All @@ -542,13 +561,19 @@ def predict(
# calculating first and last prediction time steps
first_pred_ts = ts.end_time() + 1 * ts.freq
last_pred_ts = (
first_pred_ts
+ ((n_pred_steps - 1) * self.output_chunk_length) * ts.freq
(
first_pred_ts
+ ((n_pred_steps - 1) * self.output_chunk_length) * ts.freq
)
if self.multi_models
else (first_pred_ts + (n - 1) * ts.freq)
)
# calculating first and last required time steps
first_req_ts = first_pred_ts + lags[0] * ts.freq
last_req_ts = last_pred_ts + lags[-1] * ts.freq

# calculating first and last required time steps
first_req_ts = (
first_pred_ts + (lags[0] - shift) * ts.freq
) # shift lags if using one_shot
last_req_ts = last_pred_ts + (lags[-1] - shift) * ts.freq
# check for sufficient covariate data
raise_if_not(
cov.start_time() <= first_req_ts
Expand Down Expand Up @@ -578,7 +603,10 @@ def predict(
series_matrix = None
if "target" in self.lags:
series_matrix = np.stack(
[ts[self.lags["target"][0] :].values(copy=False) for ts in series]
[
ts.values(copy=False)[self.lags["target"][0] - shift :, :]
for ts in series
]
)

# repeat series_matrix to shape (num_samples * num_series, n_lags, n_components)
Expand All @@ -591,7 +619,7 @@ def predict(
# prediction
predictions = []
# t_pred indicates the number of time steps after the first prediction
for t_pred in range(0, n, self.output_chunk_length):
for t_pred in range(0, n, step):
np_X = []
# retrieve target lags
if "target" in self.lags:
Expand All @@ -602,9 +630,9 @@ def predict(
else series_matrix
)
np_X.append(
target_matrix[:, self.lags["target"]].reshape(
len(series) * num_samples, -1
)
target_matrix[
:, [lag - shift for lag in self.lags["target"]]
].reshape(len(series) * num_samples, -1)
)
# retrieve covariate lags, enforce order (dict only preserves insertion order for python 3.6+)
for cov_type in ["past", "future"]:
Expand Down Expand Up @@ -643,7 +671,8 @@ def _predict_and_sample(
) -> np.ndarray:
prediction = self.model.predict(x, **kwargs)
k = x.shape[0]
return prediction.reshape(k, self.output_chunk_length, -1)

return prediction.reshape(k, self.pred_dim, -1)

def __str__(self):
return self.model.__str__()
Expand Down Expand Up @@ -694,18 +723,17 @@ def _predict_quantiles(
X is of shape (n_series * n_samples, n_regression_features)
"""
k = x.shape[0]

if num_samples == 1:
# return median
fitted = self._model_container[0.5]
return fitted.predict(x, **kwargs).reshape(k, self.output_chunk_length, -1)
return fitted.predict(x, **kwargs).reshape(k, self.pred_dim, -1)

model_outputs = []
for quantile, fitted in self._model_container.items():
self.model = fitted
# model output has shape (n_series * n_samples, output_chunk_length, n_components)
model_output = fitted.predict(x, **kwargs).reshape(
k, self.output_chunk_length, -1
)
model_output = fitted.predict(x, **kwargs).reshape(k, self.pred_dim, -1)
model_outputs.append(model_output)
model_outputs = np.stack(model_outputs, axis=-1)
# model_outputs has shape (n_series * n_samples, output_chunk_length, n_components, n_quantiles)
Expand Down Expand Up @@ -738,7 +766,7 @@ def _predict_normal(self, x: np.ndarray, num_samples: int, **kwargs) -> np.ndarr
else:
output_slice = model_output[0, :, :]

return output_slice.reshape(k, self.output_chunk_length, -1)
return output_slice.reshape(k, self.pred_dim, -1)

# probabilistic case
# univariate & single-chunk output
Expand All @@ -759,7 +787,7 @@ def _normal_sampling(self, model_output: np.ndarray, n_samples: int) -> np.ndarr
where the last 2 dimensions are mu and sigma.
"""
shape = model_output.shape
chunk_len = self.output_chunk_length
chunk_len = self.pred_dim

# treating each component separately
mu_sigma_list = [model_output[i, :, :] for i in range(shape[0])]
Expand All @@ -783,9 +811,7 @@ def _predict_poisson(self, x: np.ndarray, num_samples: int, **kwargs) -> np.ndar
"""
k = x.shape[0]

model_output = self.model.predict(x, **kwargs).reshape(
k, self.output_chunk_length, -1
)
model_output = self.model.predict(x, **kwargs).reshape(k, self.pred_dim, -1)
if num_samples == 1:
return model_output

Expand Down