Skip to content

Commit

Permalink
Refactor/1080 onehsot regressionmodels (#1291)
Browse files Browse the repository at this point in the history
* adapted RegressionModel _create_lagged_data() to prepare data for one_shot models + tabularization.py

* updated RegressionModel predict() to support one_shot models

* adapted regression models unittests

Co-authored-by: Julien Herzen <julien@unit8.co>
  • Loading branch information
eliane-maalouf and hrzn committed Nov 7, 2022
1 parent 2cb66b6 commit cab33a1
Show file tree
Hide file tree
Showing 7 changed files with 972 additions and 688 deletions.
5 changes: 5 additions & 0 deletions darts/models/forecasting/catboost_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(
likelihood: str = None,
quantiles: List = None,
random_state: Optional[int] = None,
multi_models: Optional[bool] = True,
**kwargs,
):
"""CatBoost Model
Expand Down Expand Up @@ -79,6 +80,9 @@ def __init__(
random_state
Control the randomness in the fitting procedure and for sampling.
Default: ``None``.
multi_models
If True, a separate model will be trained for each future lag to predict. If False, a single model is
trained to predict at step 'output_chunk_length' in the future. Default: True.
**kwargs
Additional keyword arguments passed to `catboost.CatBoostRegressor`.
"""
Expand Down Expand Up @@ -122,6 +126,7 @@ def __init__(
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
add_encoders=add_encoders,
multi_models=multi_models,
model=CatBoostRegressor(**kwargs),
)

Expand Down
5 changes: 5 additions & 0 deletions darts/models/forecasting/gradient_boosted_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def __init__(
likelihood: str = None,
quantiles: List[float] = None,
random_state: Optional[int] = None,
multi_models: Optional[bool] = True,
**kwargs,
):
"""Light Gradient Boosted Model
Expand Down Expand Up @@ -81,6 +82,9 @@ def __init__(
random_state
Control the randomness in the fitting procedure and for sampling.
Default: ``None``.
multi_models
If True, a separate model will be trained for each future lag to predict. If False, a single model is
trained to predict at step 'output_chunk_length' in the future. Default: True.
**kwargs
Additional keyword arguments passed to `lightgbm.LGBRegressor`.
"""
Expand Down Expand Up @@ -109,6 +113,7 @@ def __init__(
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
add_encoders=add_encoders,
multi_models=multi_models,
model=lgb.LGBMRegressor(**kwargs),
)

Expand Down
5 changes: 5 additions & 0 deletions darts/models/forecasting/linear_regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def __init__(
likelihood: str = None,
quantiles: List[float] = None,
random_state: Optional[int] = None,
multi_models: Optional[bool] = True,
**kwargs,
):
"""Linear regression model.
Expand Down Expand Up @@ -83,6 +84,9 @@ def __init__(
<https://numpy.org/doc/stable/reference/random/generator.html#numpy.random.Generator>`_. Ignored when
no `likelihood` is set.
Default: ``None``.
multi_models
If True, a separate model will be trained for each future lag to predict. If False, a single model is
trained to predict at step 'output_chunk_length' in the future. Default: True.
**kwargs
Additional keyword arguments passed to `sklearn.linear_model.LinearRegression` (by default), to
`sklearn.linear_model.PoissonRegressor` (if `likelihood="poisson"`), or to
Expand Down Expand Up @@ -117,6 +121,7 @@ def __init__(
output_chunk_length=output_chunk_length,
add_encoders=add_encoders,
model=model,
multi_models=multi_models,
)

def __str__(self):
Expand Down
5 changes: 5 additions & 0 deletions darts/models/forecasting/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(
add_encoders: Optional[dict] = None,
n_estimators: Optional[int] = 100,
max_depth: Optional[int] = None,
multi_models: Optional[bool] = True,
**kwargs,
):
"""Random Forest Model
Expand Down Expand Up @@ -81,6 +82,9 @@ def __init__(
max_depth : int
The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all
leaves contain less than min_samples_split samples.
multi_models
If True, a separate model will be trained for each future lag to predict. If False, a single model is
trained to predict at step 'output_chunk_length' in the future. Default: True.
**kwargs
Additional keyword arguments passed to `sklearn.ensemble.RandomForest`.
"""
Expand All @@ -96,6 +100,7 @@ def __init__(
lags_future_covariates=lags_future_covariates,
output_chunk_length=output_chunk_length,
add_encoders=add_encoders,
multi_models=multi_models,
model=RandomForestRegressor(**kwargs),
)

Expand Down
68 changes: 47 additions & 21 deletions darts/models/forecasting/regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(
output_chunk_length: int = 1,
add_encoders: Optional[dict] = None,
model=None,
multi_models: Optional[bool] = True,
):
"""Regression Model
Can be used to fit any scikit-learn-like regressor class to predict the target time series from lagged values.
Expand Down Expand Up @@ -97,6 +98,10 @@ def __init__(
support multi-output regression for multivariate timeseries, in which case one regressor
will be used per component in the multivariate series.
If None, defaults to: ``sklearn.linear_model.LinearRegression(n_jobs=-1)``.
multi_models
If True, a separate model will be trained for each future lag to predict. If False, a single model is
trained to predict at step 'output_chunk_length' in the future. Default: True.
"""

super().__init__(add_encoders=add_encoders)
Expand All @@ -105,6 +110,7 @@ def __init__(
self.lags = {}
self.output_chunk_length = None
self.input_dim = None
self.multi_models = multi_models

# model checks
if self.model is None:
Expand Down Expand Up @@ -221,6 +227,8 @@ def __init__(
)
self.output_chunk_length = output_chunk_length

self.pred_dim = self.output_chunk_length if self.multi_models else 1

@property
def _model_encoder_settings(self) -> Tuple[int, int, bool, bool]:
lags_covariates = {
Expand Down Expand Up @@ -305,6 +313,7 @@ def _create_lagged_data(
lags_past_covariates=lags_past_covariates,
lags_future_covariates=lags_future_covariates,
max_samples_per_ts=max_samples_per_ts,
multi_models=self.multi_models,
)

return training_samples, training_labels
Expand Down Expand Up @@ -398,7 +407,9 @@ def fit(
}

# if multi-output regression
if not series[0].is_univariate or self.output_chunk_length > 1:
if not series[0].is_univariate or (
self.output_chunk_length > 1 and self.multi_models
):
# and model isn't wrapped already
if not isinstance(self.model, MultiOutputRegressor):
# check whether model supports multi-output regression natively
Expand Down Expand Up @@ -528,6 +539,14 @@ def predict(
"future": (future_covariates, self.lags.get("future")),
}

# prepare one_shot shift and step
if self.multi_models:
shift = 0
step = self.output_chunk_length
else:
shift = self.output_chunk_length - 1
step = 1

# dictionary containing covariate data over time span required for prediction
covariate_matrices = {}
# dictionary containing covariate lags relative to minimum covariate lag
Expand All @@ -542,13 +561,19 @@ def predict(
# calculating first and last prediction time steps
first_pred_ts = ts.end_time() + 1 * ts.freq
last_pred_ts = (
first_pred_ts
+ ((n_pred_steps - 1) * self.output_chunk_length) * ts.freq
(
first_pred_ts
+ ((n_pred_steps - 1) * self.output_chunk_length) * ts.freq
)
if self.multi_models
else (first_pred_ts + (n - 1) * ts.freq)
)
# calculating first and last required time steps
first_req_ts = first_pred_ts + lags[0] * ts.freq
last_req_ts = last_pred_ts + lags[-1] * ts.freq

# calculating first and last required time steps
first_req_ts = (
first_pred_ts + (lags[0] - shift) * ts.freq
) # shift lags if using one_shot
last_req_ts = last_pred_ts + (lags[-1] - shift) * ts.freq
# check for sufficient covariate data
raise_if_not(
cov.start_time() <= first_req_ts
Expand Down Expand Up @@ -578,7 +603,10 @@ def predict(
series_matrix = None
if "target" in self.lags:
series_matrix = np.stack(
[ts[self.lags["target"][0] :].values(copy=False) for ts in series]
[
ts.values(copy=False)[self.lags["target"][0] - shift :, :]
for ts in series
]
)

# repeat series_matrix to shape (num_samples * num_series, n_lags, n_components)
Expand All @@ -591,7 +619,7 @@ def predict(
# prediction
predictions = []
# t_pred indicates the number of time steps after the first prediction
for t_pred in range(0, n, self.output_chunk_length):
for t_pred in range(0, n, step):
np_X = []
# retrieve target lags
if "target" in self.lags:
Expand All @@ -602,9 +630,9 @@ def predict(
else series_matrix
)
np_X.append(
target_matrix[:, self.lags["target"]].reshape(
len(series) * num_samples, -1
)
target_matrix[
:, [lag - shift for lag in self.lags["target"]]
].reshape(len(series) * num_samples, -1)
)
# retrieve covariate lags, enforce order (dict only preserves insertion order for python 3.6+)
for cov_type in ["past", "future"]:
Expand Down Expand Up @@ -643,7 +671,8 @@ def _predict_and_sample(
) -> np.ndarray:
prediction = self.model.predict(x, **kwargs)
k = x.shape[0]
return prediction.reshape(k, self.output_chunk_length, -1)

return prediction.reshape(k, self.pred_dim, -1)

def __str__(self):
return self.model.__str__()
Expand Down Expand Up @@ -694,18 +723,17 @@ def _predict_quantiles(
X is of shape (n_series * n_samples, n_regression_features)
"""
k = x.shape[0]

if num_samples == 1:
# return median
fitted = self._model_container[0.5]
return fitted.predict(x, **kwargs).reshape(k, self.output_chunk_length, -1)
return fitted.predict(x, **kwargs).reshape(k, self.pred_dim, -1)

model_outputs = []
for quantile, fitted in self._model_container.items():
self.model = fitted
# model output has shape (n_series * n_samples, output_chunk_length, n_components)
model_output = fitted.predict(x, **kwargs).reshape(
k, self.output_chunk_length, -1
)
model_output = fitted.predict(x, **kwargs).reshape(k, self.pred_dim, -1)
model_outputs.append(model_output)
model_outputs = np.stack(model_outputs, axis=-1)
# model_outputs has shape (n_series * n_samples, output_chunk_length, n_components, n_quantiles)
Expand Down Expand Up @@ -738,7 +766,7 @@ def _predict_normal(self, x: np.ndarray, num_samples: int, **kwargs) -> np.ndarr
else:
output_slice = model_output[0, :, :]

return output_slice.reshape(k, self.output_chunk_length, -1)
return output_slice.reshape(k, self.pred_dim, -1)

# probabilistic case
# univariate & single-chunk output
Expand All @@ -759,7 +787,7 @@ def _normal_sampling(self, model_output: np.ndarray, n_samples: int) -> np.ndarr
where the last 2 dimensions are mu and sigma.
"""
shape = model_output.shape
chunk_len = self.output_chunk_length
chunk_len = self.pred_dim

# treating each component separately
mu_sigma_list = [model_output[i, :, :] for i in range(shape[0])]
Expand All @@ -783,9 +811,7 @@ def _predict_poisson(self, x: np.ndarray, num_samples: int, **kwargs) -> np.ndar
"""
k = x.shape[0]

model_output = self.model.predict(x, **kwargs).reshape(
k, self.output_chunk_length, -1
)
model_output = self.model.predict(x, **kwargs).reshape(k, self.pred_dim, -1)
if num_samples == 1:
return model_output

Expand Down

0 comments on commit cab33a1

Please sign in to comment.