unit8co · dennisbader · Aug 15, 2023 · Jun 29, 2023 · Jun 29, 2023 · Jun 29, 2023
@@ -10,6 +10,10 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 
 ### For users of the library:
 
+**Improvement**
+- `TimeSeries` with a `RangeIndex` starting in the negative start are now supported by `historical_forecasts`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou).
+- Added a new argument `start_format` to `historical_forecasts`, `start` can now be provided as an absolute index (positive or negative) instead of a point of the time index. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou).
+
 **Fixed**
 - Fixed a bug in `TimeSeries.from_dataframe()` when using a pandas.DataFrame with `df.columns.name != None`. [#1938](https://github.com/unit8co/darts/pull/1938) by [Antoine Madrona](https://github.com/madtoinou).
 - Fixed a bug in `RegressionEnsembleModel.extreme_lags` when the forecasting models have only covariates lags. [#1942](https://github.com/unit8co/darts/pull/1942) by [Antoine Madrona](https://github.com/madtoinou).

@@ -22,7 +22,18 @@
 from collections import OrderedDict
 from itertools import product
 from random import sample
-from typing import Any, BinaryIO, Callable, Dict, List, Optional, Sequence, Tuple, Union
+from typing import (
+    Any,
+    BinaryIO,
+    Callable,
+    Dict,
+    List,
+    Literal,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+)
 
 import numpy as np
 import pandas as pd
@@ -560,6 +571,7 @@ def historical_forecasts(
         num_samples: int = 1,
         train_length: Optional[int] = None,
         start: Optional[Union[pd.Timestamp, float, int]] = None,
+        start_format: Literal["point", "index"] = "point",
         forecast_horizon: int = 1,
         stride: int = 1,
         retrain: Union[bool, int, Callable[..., bool]] = True,
@@ -610,7 +622,7 @@ def historical_forecasts(
             `min_train_series_length`.
         start
             Optionally, the first point in time at which a prediction is computed for a future time.
-            This parameter supports: ``float``, ``int`` and ``pandas.Timestamp``, and ``None``.
+            This parameter supports: ``float``, ``int``, ``pandas.Timestamp``, and ``None``.
             If a ``float``, the parameter will be treated as the proportion of the time series
             that should lie before the first prediction point.
             If an ``int``, the parameter will be treated as an integer index to the time index of
@@ -628,6 +640,9 @@ def historical_forecasts(
             Note: Raises a ValueError if `start` yields a time outside the time index of `series`.
             Note: If `start` is outside the possible historical forecasting times, will ignore the parameter
             (default behavior with ``None``) and start at the first trainable/predictable point.
+        start_format
+            If set to 'index', `start` must be an integer and corresponds to the absolute position of the first point
+            in time at which the prediction is generated. Default: ``'point'``.
         forecast_horizon
             The forecast horizon for the predictions.
         stride
@@ -798,6 +813,7 @@ def retrain_func(
                 future_covariates=future_covariates,
                 num_samples=num_samples,
                 start=start,
+                start_format=start_format,
                 forecast_horizon=forecast_horizon,
                 stride=stride,
                 overlap_end=overlap_end,
@@ -876,6 +892,7 @@ def retrain_func(
                 forecast_horizon=forecast_horizon,
                 overlap_end=overlap_end,
                 start=start,
+                start_format=start_format,
                 show_warnings=show_warnings,
             )
 
@@ -1893,6 +1910,7 @@ def _optimized_historical_forecasts(
         future_covariates: Optional[Sequence[TimeSeries]] = None,
         num_samples: int = 1,
         start: Optional[Union[pd.Timestamp, float, int]] = None,
+        start_format: Literal["point", "index"] = "point",
         forecast_horizon: int = 1,
         stride: int = 1,
         overlap_end: bool = False,

@@ -27,7 +27,7 @@
 if their static covariates do not have the same size, the shorter ones are padded with 0 valued features.
 """
 from collections import OrderedDict
-from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -897,6 +897,7 @@ def _optimized_historical_forecasts(
         future_covariates: Optional[Sequence[TimeSeries]] = None,
         num_samples: int = 1,
         start: Optional[Union[pd.Timestamp, float, int]] = None,
+        start_format: Literal["point", "index"] = "point",
         forecast_horizon: int = 1,
         stride: int = 1,
         overlap_end: bool = False,
@@ -949,6 +950,7 @@ def _optimized_historical_forecasts(
                 future_covariates=future_covariates,
                 num_samples=num_samples,
                 start=start,
+                start_format=start_format,
                 forecast_horizon=forecast_horizon,
                 stride=stride,
                 overlap_end=overlap_end,
@@ -963,6 +965,7 @@ def _optimized_historical_forecasts(
                 future_covariates=future_covariates,
                 num_samples=num_samples,
                 start=start,
+                start_format=start_format,
                 forecast_horizon=forecast_horizon,
                 stride=stride,
                 overlap_end=overlap_end,

@@ -374,6 +374,48 @@ def test_historical_forecasts_local_models(self):
             "LocalForecastingModel does not support historical forecasting with `retrain` set to `False`"
         )
 
+    def test_historical_forecasts_index_start(self):
+        series = tg.sine_timeseries(length=10)
+
+        model = LinearRegressionModel(lags=2)
+        model.fit(series[:8])
+
+        # negative index
+        forecasts = model.historical_forecasts(
+            series=series, start=-2, start_format="index", retrain=False
+        )
+        self.assertEqual(len(forecasts), 2)
+        self.assertTrue((series.time_index[-2:] == forecasts.time_index).all())
+
+        # positive index
+        forecasts = model.historical_forecasts(
+            series=series, start=5, start_format="index", retrain=False
+        )
+        self.assertEqual(len(forecasts), 5)
+        self.assertTrue((series.time_index[5:] == forecasts.time_index).all())
+
+    def test_historical_forecasts_negative_rangeindex(self):
+        series = TimeSeries.from_times_and_values(
+            times=pd.RangeIndex(start=-5, stop=5, step=1), values=np.arange(10)
+        )
+
+        model = LinearRegressionModel(lags=2)
+        model.fit(series[:8])
+
+        # start as point
+        forecasts = model.historical_forecasts(
+            series=series, start=-2, start_format="point", retrain=False
+        )
+        self.assertEqual(len(forecasts), 7)
+        self.assertTrue((series.time_index[-7:] == forecasts.time_index).all())
+
+        # start as index
+        forecasts = model.historical_forecasts(
+            series=series, start=-2, start_format="index", retrain=False
+        )
+        self.assertEqual(len(forecasts), 2)
+        self.assertTrue((series.time_index[-2:] == forecasts.time_index).all())
+
     def test_historical_forecasts(self):
         train_length = 10
         forecast_horizon = 8
@@ -551,7 +593,7 @@ def test_sanity_check_invalid_start(self):
         rangeidx_step1 = tg.linear_timeseries(start=0, length=10, freq=1)
         rangeidx_step2 = tg.linear_timeseries(start=0, length=10, freq=2)
 
-        # index too large
+        # point (int) too large
         with pytest.raises(ValueError) as msg:
             LinearRegressionModel(lags=1).historical_forecasts(timeidx_, start=11)
         assert str(msg.value).startswith("`start` index `11` is out of bounds")
@@ -562,26 +604,32 @@ def test_sanity_check_invalid_start(self):
             LinearRegressionModel(lags=1).historical_forecasts(rangeidx_step2, start=11)
         assert str(msg.value).startswith("The provided point is not a valid index")
 
-        # value too low
+        # point (int) too low
         with pytest.raises(ValueError) as msg:
             LinearRegressionModel(lags=1).historical_forecasts(
-                timeidx_, start=timeidx_.start_time() - timeidx_.freq
+                rangeidx_step1, start=rangeidx_step1.start_time() - rangeidx_step1.freq
             )
         assert str(msg.value).startswith(
-            "`start` time `1999-12-31 00:00:00` is before the first timestamp `2000-01-01 00:00:00`"
+            "The index corresponding to the provided point ("
         )
         with pytest.raises(ValueError) as msg:
             LinearRegressionModel(lags=1).historical_forecasts(
-                rangeidx_step1, start=rangeidx_step1.start_time() - rangeidx_step1.freq
+                rangeidx_step2, start=rangeidx_step2.start_time() - rangeidx_step2.freq
             )
-        assert str(msg.value).startswith("if `start` is an integer, must be `>= 0`")
+        assert str(msg.value).startswith(
+            "The index corresponding to the provided point ("
+        )
+
+        # point (timestamp) too low
         with pytest.raises(ValueError) as msg:
             LinearRegressionModel(lags=1).historical_forecasts(
-                rangeidx_step2, start=rangeidx_step2.start_time() - rangeidx_step2.freq
+                timeidx_, start=timeidx_.start_time() - timeidx_.freq
             )
-        assert str(msg.value).startswith("if `start` is an integer, must be `>= 0`")
+        assert str(msg.value).startswith(
+            "`start` time `1999-12-31 00:00:00` is before the first timestamp `2000-01-01 00:00:00`"
+        )
 
-        # value too high
+        # point (timestamp) too high
         with pytest.raises(ValueError) as msg:
             LinearRegressionModel(lags=1).historical_forecasts(
                 timeidx_, start=timeidx_.end_time() + timeidx_.freq
@@ -602,6 +650,52 @@ def test_sanity_check_invalid_start(self):
             "`start` index `20` is larger than the last index `18`"
         )
 
+        # index too high when start_format = 'index'
+        with pytest.raises(ValueError) as msg:
+            LinearRegressionModel(lags=1).historical_forecasts(
+                timeidx_, start=11, start_format="index"
+            )
+        assert str(msg.value).startswith(
+            "`start` index `11` is out of bounds for series of length 10"
+        )
+        with pytest.raises(ValueError) as msg:
+            LinearRegressionModel(lags=1).historical_forecasts(
+                rangeidx_step1, start=11, start_format="index"
+            )
+        assert str(msg.value).startswith(
+            "`start` index `11` is out of bounds for series of length 10"
+        )
+        with pytest.raises(ValueError) as msg:
+            LinearRegressionModel(lags=1).historical_forecasts(
+                rangeidx_step2, start=11, start_format="index"
+            )
+        assert str(msg.value).startswith(
+            "`start` index `11` is out of bounds for series of length 10"
+        )
+
+        # index too high (negative) when start_format = 'index'
+        with pytest.raises(ValueError) as msg:
+            LinearRegressionModel(lags=1).historical_forecasts(
+                timeidx_, start=-11, start_format="index"
+            )
+        assert str(msg.value).startswith(
+            "`start` index `-11` is out of bounds for series of length 10"
+        )
+        with pytest.raises(ValueError) as msg:
+            LinearRegressionModel(lags=1).historical_forecasts(
+                rangeidx_step1, start=-11, start_format="index"
+            )
+        assert str(msg.value).startswith(
+            "`start` index `-11` is out of bounds for series of length 10"
+        )
+        with pytest.raises(ValueError) as msg:
+            LinearRegressionModel(lags=1).historical_forecasts(
+                rangeidx_step2, start=-11, start_format="index"
+            )
+        assert str(msg.value).startswith(
+            "`start` index `-11` is out of bounds for series of length 10"
+        )
+
     def test_regression_auto_start_multiple_no_cov(self):
         train_length = 15
         forecast_horizon = 10

@@ -216,7 +216,7 @@ def __init__(self, xa: xr.DataArray):
                     logger,
                 )
         else:
-            self._freq = self._time_index.step
+            self._freq: int = self._time_index.step
             self._freq_str = None
 
         # check static covariates
@@ -2064,7 +2064,7 @@ def get_index_at_point(
         Parameters
         ----------
         point
-            This parameter supports 3 different data types: ``pd.Timestamp``, ``float`` and ``int``.
+            This parameter supports 4 different data types: ``pd.Timestamp``, ``float``, ``int`` and ``dict``.
 
             ``pd.Timestamp`` work only on series that are indexed with a ``pd.DatetimeIndex``. In such cases, the
             returned point will be the index of this timestamp if it is present in the series time index.
@@ -2103,7 +2103,7 @@ def get_index_at_point(
                 )
             raise_if_not(
                 0 <= point_index < len(self),
-                "point (int) should be a valid index in series",
+                f"The index corresponding to the provided point ({point}) should be a valid index in series",
                 logger,
             )
         elif isinstance(point, pd.Timestamp):
@@ -2142,8 +2142,8 @@ def get_timestamp_at_point(
             This parameter supports 3 different data types: `float`, `int` and `pandas.Timestamp`.
             In case of a `float`, the parameter will be treated as the proportion of the time series
             that should lie before the point.
-            In the case of `int`, the parameter will be treated as an integer index to the time index of
-            `series`. Will raise a ValueError if not a valid index in `series`
+            In case of `int`, the parameter will be treated as an integer index to the time index of
+            `series`. Will raise a ValueError if not a valid index in `series`.
             In case of a `pandas.Timestamp`, point will be returned as is provided that the timestamp
             is present in the series time index, otherwise will raise a ValueError.
         """

@@ -1,4 +1,4 @@
-from typing import List, Optional, Sequence, Union
+from typing import List, Literal, Optional, Sequence, Union
 
 import numpy as np
 import pandas as pd
@@ -20,6 +20,7 @@ def _optimized_historical_forecasts_regression_last_points_only(
     future_covariates: Optional[Sequence[TimeSeries]] = None,
     num_samples: int = 1,
     start: Optional[Union[pd.Timestamp, float, int]] = None,
+    start_format: Literal["point", "index"] = "point",
     forecast_horizon: int = 1,
     stride: int = 1,
     overlap_end: bool = False,
@@ -63,6 +64,7 @@ def _optimized_historical_forecasts_regression_last_points_only(
             past_covariates=past_covariates_,
             future_covariates=future_covariates_,
             start=start,
+            start_format=start_format,
             forecast_horizon=forecast_horizon,
             overlap_end=overlap_end,
             freq=freq,
@@ -156,6 +158,7 @@ def _optimized_historical_forecasts_regression_all_points(
     future_covariates: Optional[Sequence[TimeSeries]] = None,
     num_samples: int = 1,
     start: Optional[Union[pd.Timestamp, float, int]] = None,
+    start_format: Literal["point", "index"] = "point",
     forecast_horizon: int = 1,
     stride: int = 1,
     overlap_end: bool = False,
@@ -199,6 +202,7 @@ def _optimized_historical_forecasts_regression_all_points(
             past_covariates=past_covariates_,
             future_covariates=future_covariates_,
             start=start,
+            start_format=start_format,
             forecast_horizon=forecast_horizon,
             overlap_end=overlap_end,
             freq=freq,