Improvement/statsforecastets: make sf_ets probabilistic + add future_…

…covariate support for sf_ets + add AutoTheta (#1476) * StatsForecastETS now is probabilistic in the same way as StatsForecastAutoARIMA * include future covariates in sf_ets * sf_ets with future_covariates works.. probably it is underestimating the uncertainty because it doesn't take into account the uncertainty of the coef esimation of the OLS * Create separate file for StatsForecast models and extract some functions. * Added AutoTheta from the StatsForecast package. * Deleted sf_auto_arima.py and sf_ets.py, because the code is now included in sf_models.py. * Update darts/models/forecasting/sf_models.py Co-authored-by: Julien Herzen <j.herzen@gmail.com> * Update darts/models/forecasting/sf_models.py Co-authored-by: Julien Herzen <j.herzen@gmail.com> * Moved all statsforecast models to their own .py file. Added some comments explaining the handling of future covariates by StatsForecastETS. Included StatsForecastTheta in the tests. Moved the utility functions that the statsforecast models share to a singly .py file. Added the CES model which is supposed to be probabilistic, but that doesn't work yet eventhough it is supposed to be included in statsforecast 1.4.0. Trying to figure out why it isn't working. Removed sf_models.py. * Beginning of test for fit on residuals for statsforecast ets. * - AutoCES not probablisitc anymore, because that is not yet released in statsforecast 1.4.0 - changed AutoETS to SFAutoETS - added models to the base tests - wrote two units tests for future covariates use for sf_ets * - AutoCES not probablisitc anymore, because that is not yet released in statsforecast 1.4.0 - changed AutoETS to SFAutoETS - added models to the base tests - wrote two units tests for future covariates use for sf_ets * Changed StatsForecastETS to StatsForecastAutoETS. --------- Co-authored-by: Julien Herzen <julien@unit8.co> Co-authored-by: Julien Herzen <j.herzen@gmail.com>
unit8co · Feb 10, 2023 · e1c8d34 · e1c8d34
1 parent 31528a4
commit e1c8d34
Show file tree

Hide file tree

Showing 9 changed files with 346 additions and 27 deletions.
diff --git a/darts/models/__init__.py b/darts/models/__init__.py
@@ -90,12 +90,15 @@ class NotImportedCatBoostModel:
 try:
     from darts.models.forecasting.croston import Croston
     from darts.models.forecasting.sf_auto_arima import StatsForecastAutoARIMA
-    from darts.models.forecasting.sf_ets import StatsForecastETS
+    from darts.models.forecasting.sf_auto_ces import StatsForecastAutoCES
+    from darts.models.forecasting.sf_auto_ets import StatsForecastAutoETS
+    from darts.models.forecasting.sf_auto_theta import StatsForecastAutoTheta
+
 except ImportError:
     logger.warning(
         "The statsforecast module could not be imported. "
         "To enable support for the StatsForecastAutoARIMA, "
-        "StatsForecastETS and Croston models, please consider "
+        "StatsForecastAutoETS and Croston models, please consider "
         "installing it."
     )
 
@@ -104,10 +107,10 @@ class NotImportedStatsForecastAutoARIMA:
 
     StatsForecastAutoARIMA = NotImportedStatsForecastAutoARIMA()
 
-    class NotImportedStatsForecastETS:
+    class NotImportedStatsForecastAutoETS:
         usable = False
 
-    StatsForecastETS = NotImportedStatsForecastETS()
+    StatsForecastAutoETS = NotImportedStatsForecastAutoETS()
 
     class NotImportedCroston:
         usable = False

diff --git a/darts/models/components/statsforecast_utils.py b/darts/models/components/statsforecast_utils.py
@@ -0,0 +1,30 @@
+"""
+StatsForecast utils
+-----------
+"""
+
+import numpy as np
+
+# In a normal distribution, 68.27 percentage of values lie within one standard deviation of the mean
+one_sigma_rule = 68.27
+
+
+def create_normal_samples(
+    mu: float,
+    std: float,
+    num_samples: int,
+    n: int,
+) -> np.array:
+    """Generate samples assuming a Normal distribution."""
+    samples = np.random.normal(loc=mu, scale=std, size=(num_samples, n)).T
+    samples = np.expand_dims(samples, axis=1)
+    return samples
+
+
+def unpack_sf_dict(
+    forecast_dict: dict,
+):
+    """Unpack the dictionary that is returned by the StatsForecast 'predict()' method."""
+    mu = forecast_dict["mean"]
+    std = forecast_dict[f"hi-{one_sigma_rule}"] - mu
+    return mu, std
diff --git a/darts/models/forecasting/sf_auto_arima.py b/darts/models/forecasting/sf_auto_arima.py
@@ -5,10 +5,14 @@
 
 from typing import Optional
 
-import numpy as np
 from statsforecast.models import AutoARIMA as SFAutoARIMA
 
 from darts import TimeSeries
+from darts.models.components.statsforecast_utils import (
+    create_normal_samples,
+    one_sigma_rule,
+    unpack_sf_dict,
+)
 from darts.models.forecasting.forecasting_model import (
     FutureCovariatesLocalForecastingModel,
 )
@@ -91,17 +95,15 @@ def _predict(
         verbose: bool = False,
     ):
         super()._predict(n, future_covariates, num_samples)
-        forecast_df = self.model.predict(
+        forecast_dict = self.model.predict(
             h=n,
             X=future_covariates.values(copy=False) if future_covariates else None,
-            level=(68.27,),  # ask one std for the confidence interval.
+            level=(one_sigma_rule,),  # ask one std for the confidence interval.
         )
 
-        mu = forecast_df["mean"]
+        mu, std = unpack_sf_dict(forecast_dict)
         if num_samples > 1:
-            std = forecast_df["hi-68.27"] - mu
-            samples = np.random.normal(loc=mu, scale=std, size=(num_samples, n)).T
-            samples = np.expand_dims(samples, axis=1)
+            samples = create_normal_samples(mu, std, num_samples, n)
         else:
             samples = mu
 

diff --git a/darts/models/forecasting/sf_auto_ces.py b/darts/models/forecasting/sf_auto_ces.py
@@ -0,0 +1,80 @@
+"""
+StatsForecastAutoCES
+-----------
+"""
+
+from statsforecast.models import AutoCES as SFAutoCES
+
+from darts import TimeSeries
+from darts.models.forecasting.forecasting_model import LocalForecastingModel
+
+
+class StatsForecastAutoCES(LocalForecastingModel):
+    def __init__(self, *autoces_args, **autoces_kwargs):
+        """Auto-CES based on `Statsforecasts package
+        <https://github.com/Nixtla/statsforecast>`_.
+
+        Automatically selects the best Complex Exponential Smoothing model using an information criterion.
+        <https://onlinelibrary.wiley.com/doi/full/10.1002/nav.22074>
+
+        We refer to the `statsforecast AutoCES documentation
+        <https://nixtla.github.io/statsforecast/models.html#autoces>`_
+        for the documentation of the arguments.
+
+        Parameters
+        ----------
+        autoces_args
+            Positional arguments for ``statsforecasts.models.AutoCES``.
+        autoces_kwargs
+            Keyword arguments for ``statsforecasts.models.AutoCES``.
+
+            ..
+
+        Examples
+        --------
+        >>> from darts.models import StatsForecastAutoCES
+        >>> from darts.datasets import AirPassengersDataset
+        >>> series = AirPassengersDataset().load()
+        >>> model = StatsForecastAutoCES(season_length=12)
+        >>> model.fit(series[:-36])
+        >>> pred = model.predict(36, num_samples=100)
+        """
+        super().__init__()
+        self.model = SFAutoCES(*autoces_args, **autoces_kwargs)
+
+    def __str__(self):
+        return "Auto-CES-Statsforecasts"
+
+    def fit(self, series: TimeSeries):
+        super().fit(series)
+        self._assert_univariate(series)
+        series = self.training_series
+        self.model.fit(
+            series.values(copy=False).flatten(),
+        )
+        return self
+
+    def predict(
+        self,
+        n: int,
+        num_samples: int = 1,
+        verbose: bool = False,
+    ):
+        super().predict(n, num_samples)
+        forecast_dict = self.model.predict(
+            h=n,
+        )
+
+        mu = forecast_dict["mean"]
+
+        return self._build_forecast_series(mu)
+
+    @property
+    def min_train_series_length(self) -> int:
+        return 10
+
+    def _supports_range_index(self) -> bool:
+        return True
+
+    def _is_probabilistic(self) -> bool:
+        return False
diff --git a/darts/models/forecasting/sf_ets.py → darts/models/forecasting/sf_auto_ets.py b/darts/models/forecasting/sf_ets.py → darts/models/forecasting/sf_auto_ets.py
@@ -1,19 +1,25 @@
 """
-StatsForecastETS
+StatsForecastAutoETS
 -----------
 """
 
 from typing import Optional
 
-from statsforecast.models import ETS
+from statsforecast.models import AutoETS as SFAutoETS
 
 from darts import TimeSeries
+from darts.models import LinearRegressionModel
+from darts.models.components.statsforecast_utils import (
+    create_normal_samples,
+    one_sigma_rule,
+    unpack_sf_dict,
+)
 from darts.models.forecasting.forecasting_model import (
     FutureCovariatesLocalForecastingModel,
 )
 
 
-class StatsForecastETS(FutureCovariatesLocalForecastingModel):
+class StatsForecastAutoETS(FutureCovariatesLocalForecastingModel):
     def __init__(self, *ets_args, add_encoders: Optional[dict] = None, **ets_kwargs):
         """ETS based on `Statsforecasts package
         <https://github.com/Nixtla/statsforecast>`_.
@@ -25,6 +31,12 @@ def __init__(self, *ets_args, add_encoders: Optional[dict] = None, **ets_kwargs)
         This model accepts the same arguments as the `statsforecast ETS
         <https://nixtla.github.io/statsforecast/models.html#ets>`_. package.
 
+        In addition to the StatsForecast implementation, this model can handle future covariates. It does so by first
+        regressing the series against the future covariates using the :class:'LinearRegressionModel' model and then
+        running StatsForecast's AutoETS on the in-sample residuals from this original regression. This approach was
+        inspired by 'this post of Stephan Kolassa< https://stats.stackexchange.com/q/220885>'_.
+
+
         Parameters
         ----------
         season_length
@@ -64,14 +76,15 @@ def __init__(self, *ets_args, add_encoders: Optional[dict] = None, **ets_kwargs)
         Examples
         --------
         >>> from darts.datasets import AirPassengersDataset
-        >>> from darts.models import StatsForecastETS
+        >>> from darts.models import StatsForecastAutoETS
         >>> series = AirPassengersDataset().load()
-        >>> model = StatsForecastETS(season_length=12, model="AZZ")
+        >>> model = StatsForecastAutoETS(season_length=12, model="AZZ")
         >>> model.fit(series[:-36])
         >>> pred = model.predict(36)
         """
         super().__init__(add_encoders=add_encoders)
-        self.model = ETS(*ets_args, **ets_kwargs)
+        self.model = SFAutoETS(*ets_args, **ets_kwargs)
+        self._linreg = None
 
     def __str__(self):
         return "ETS-Statsforecasts"
@@ -80,9 +93,25 @@ def _fit(self, series: TimeSeries, future_covariates: Optional[TimeSeries] = Non
         super()._fit(series, future_covariates)
         self._assert_univariate(series)
         series = self.training_series
+
+        if future_covariates is not None:
+            # perform OLS and get in-sample residuals
+            linreg = LinearRegressionModel(lags_future_covariates=[0])
+            linreg.fit(series, future_covariates=future_covariates)
+            fitted_values = linreg.model.predict(
+                X=future_covariates.slice_intersect(series).values(copy=False)
+            )
+            fitted_values_ts = TimeSeries.from_times_and_values(
+                times=series.time_index, values=fitted_values
+            )
+            resids = series - fitted_values_ts
+            self._linreg = linreg
+            target = resids
+        else:
+            target = series
+
         self.model.fit(
-            series.values(copy=False).flatten(),
-            X=future_covariates.values(copy=False) if future_covariates else None,
+            target.values(copy=False).flatten(),
         )
         return self
 
@@ -94,12 +123,27 @@ def _predict(
         verbose: bool = False,
     ):
         super()._predict(n, future_covariates, num_samples)
-        forecast_df = self.model.predict(
+        forecast_dict = self.model.predict(
             h=n,
-            X=future_covariates.values(copy=False) if future_covariates else None,
+            level=(one_sigma_rule,),  # ask one std for the confidence interval
         )
 
-        return self._build_forecast_series(forecast_df["mean"])
+        mu_ets, std = unpack_sf_dict(forecast_dict)
+
+        if future_covariates is not None:
+            mu_linreg = self._linreg.predict(n, future_covariates=future_covariates)
+            mu_linreg_values = mu_linreg.values(copy=False).reshape(
+                n,
+            )
+            mu = mu_ets + mu_linreg_values
+        else:
+            mu = mu_ets
+
+        if num_samples > 1:
+            samples = create_normal_samples(mu, std, num_samples, n)
+        else:
+            samples = mu
+        return self._build_forecast_series(samples)
 
     @property
     def min_train_series_length(self) -> int:
@@ -109,4 +153,4 @@ def _supports_range_index(self) -> bool:
         return True
 
     def _is_probabilistic(self) -> bool:
-        return False
+        return True