unit8co · dennisbader · Aug 15, 2023 · Jun 29, 2023 · Jun 29, 2023 · Jun 29, 2023
@@ -10,6 +10,10 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 
 ### For users of the library:
 
+**Improved**
+- `TimeSeries` with a `RangeIndex` starting in the negative start are now supported by `historical_forecasts`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou).
+- Added a new argument `start_format` to `historical_forecasts()`, `backtest()` and `gridsearch` that allows to use an integer `start` either as the index position or index value/label for `series` indexed with a `pd.RangeIndex`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou).
+
 **Fixed**
 - Fixed a bug in `TimeSeries.from_dataframe()` when using a pandas.DataFrame with `df.columns.name != None`. [#1938](https://github.com/unit8co/darts/pull/1938) by [Antoine Madrona](https://github.com/madtoinou).
 - Fixed a bug in `RegressionEnsembleModel.extreme_lags` when the forecasting models have only covariates lags. [#1942](https://github.com/unit8co/darts/pull/1942) by [Antoine Madrona](https://github.com/madtoinou).

@@ -24,6 +24,11 @@
 from random import sample
 from typing import Any, BinaryIO, Callable, Dict, List, Optional, Sequence, Tuple, Union
 
+try:
+    from typing import Literal
+except ImportError:
+    from typing_extensions import Literal
+
 import numpy as np
 import pandas as pd
 
@@ -560,6 +565,7 @@ def historical_forecasts(
         num_samples: int = 1,
         train_length: Optional[int] = None,
         start: Optional[Union[pd.Timestamp, float, int]] = None,
+        start_format: Literal["position", "value"] = "value",
         forecast_horizon: int = 1,
         stride: int = 1,
         retrain: Union[bool, int, Callable[..., bool]] = True,
@@ -609,15 +615,14 @@ def historical_forecasts(
             steps available, all steps up until prediction time are used, as in default case. Needs to be at least
             `min_train_series_length`.
         start
-            Optionally, the first point in time at which a prediction is computed for a future time.
-            This parameter supports: ``float``, ``int`` and ``pandas.Timestamp``, and ``None``.
-            If a ``float``, the parameter will be treated as the proportion of the time series
-            that should lie before the first prediction point.
-            If an ``int``, the parameter will be treated as an integer index to the time index of
-            `series` that will be used as first prediction time.
-            If a ``pandas.Timestamp``, the time stamp will be used to determine the first prediction time
-            directly.
-            If ``None``, the first prediction time will automatically be set to:
+            Optionally, the first point in time at which a prediction is computed. This parameter supports:
+            ``float``, ``int``, ``pandas.Timestamp``, and ``None``.
+            If a ``float``, it is the proportion of the time series that should lie before the first prediction point.
+            If an ``int``, it is either the index position of the first prediction point for `series` with a
+            `pd.DatetimeIndex`, or the index value for `series` with a `pd.RangeIndex`. The latter can be changed to
+            the index position with `start_format="position"`.
+            If a ``pandas.Timestamp``, it is the time stamp of the first prediction point.
+            If ``None``, the first prediction point will automatically be set to:
 
             - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first
               predictable point is earlier than the first trainable point.
@@ -628,6 +633,13 @@ def historical_forecasts(
             Note: Raises a ValueError if `start` yields a time outside the time index of `series`.
             Note: If `start` is outside the possible historical forecasting times, will ignore the parameter
             (default behavior with ``None``) and start at the first trainable/predictable point.
+        start_format
+            Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a
+            `pd.RangeIndex`.
+            If set to 'position', `start` corresponds to the index position of the first predicted point and can range
+            from `(-len(series), len(series) - 1)`.
+            If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise
+            an error if the value is not in `series`' index. Default: ``'value'``
         forecast_horizon
             The forecast horizon for the predictions.
         stride
@@ -798,6 +810,7 @@ def retrain_func(
                 future_covariates=future_covariates,
                 num_samples=num_samples,
                 start=start,
+                start_format=start_format,
                 forecast_horizon=forecast_horizon,
                 stride=stride,
                 overlap_end=overlap_end,
@@ -876,6 +889,7 @@ def retrain_func(
                 forecast_horizon=forecast_horizon,
                 overlap_end=overlap_end,
                 start=start,
+                start_format=start_format,
                 show_warnings=show_warnings,
             )
 
@@ -1030,6 +1044,7 @@ def backtest(
         num_samples: int = 1,
         train_length: Optional[int] = None,
         start: Optional[Union[pd.Timestamp, float, int]] = None,
+        start_format: Literal["position", "value"] = "value",
         forecast_horizon: int = 1,
         stride: int = 1,
         retrain: Union[bool, int, Callable[..., bool]] = True,
@@ -1085,25 +1100,31 @@ def backtest(
             steps available, all steps up until prediction time are used, as in default case. Needs to be at least
             `min_train_series_length`.
         start
-            Optionally, the first point in time at which a prediction is computed for a future time.
-            This parameter supports: ``float``, ``int`` and ``pandas.Timestamp``, and ``None``.
-            If a ``float``, the parameter will be treated as the proportion of the time series
-            that should lie before the first prediction point.
-            If an ``int``, the parameter will be treated as an integer index to the time index of
-            `series` that will be used as first prediction time.
-            If a ``pandas.Timestamp``, the time stamp will be used to determine the first prediction time
-            directly.
-            If ``None``, the first prediction time will automatically be set to:
-                 - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first
-                 predictable point is earlier than the first trainable point.
-
-                 - the first trainable point if `retrain` is ``True`` or ``int`` (given `train_length`),
-                 or `retrain` is a Callable and the first trainable point is earlier than the first predictable point.
-
-                 - the first trainable point (given `train_length`) otherwise
+            Optionally, the first point in time at which a prediction is computed. This parameter supports:
+            ``float``, ``int``, ``pandas.Timestamp``, and ``None``.
+            If a ``float``, it is the proportion of the time series that should lie before the first prediction point.
+            If an ``int``, it is either the index position of the first prediction point for `series` with a
+            `pd.DatetimeIndex`, or the index value for `series` with a `pd.RangeIndex`. The latter can be changed to
+            the index position with `start_format="position"`.
+            If a ``pandas.Timestamp``, it is the time stamp of the first prediction point.
+            If ``None``, the first prediction point will automatically be set to:
+
+            - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first
+              predictable point is earlier than the first trainable point.
+            - the first trainable point if `retrain` is ``True`` or ``int`` (given `train_length`),
+              or `retrain` is a Callable and the first trainable point is earlier than the first predictable point.
+            - the first trainable point (given `train_length`) otherwise
+
             Note: Raises a ValueError if `start` yields a time outside the time index of `series`.
             Note: If `start` is outside the possible historical forecasting times, will ignore the parameter
             (default behavior with ``None``) and start at the first trainable/predictable point.
+        start_format
+            Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a
+            `pd.RangeIndex`.
+            If set to 'position', `start` corresponds to the index position of the first predicted point and can range
+            from `(-len(series), len(series) - 1)`.
+            If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise
+            an error if the value is not in `series`' index. Default: ``'value'``
         forecast_horizon
             The forecast horizon for the point predictions.
         stride
@@ -1160,6 +1181,7 @@ def backtest(
                 num_samples=num_samples,
                 train_length=train_length,
                 start=start,
+                start_format=start_format,
                 forecast_horizon=forecast_horizon,
                 stride=stride,
                 retrain=retrain,
@@ -1210,6 +1232,7 @@ def gridsearch(
         forecast_horizon: Optional[int] = None,
         stride: int = 1,
         start: Union[pd.Timestamp, float, int] = 0.5,
+        start_format: Literal["position", "value"] = "value",
         last_points_only: bool = False,
         show_warnings: bool = True,
         val_series: Optional[TimeSeries] = None,
@@ -1275,17 +1298,38 @@ def gridsearch(
         forecast_horizon
             The integer value of the forecasting horizon. Activates expanding window mode.
         stride
-            The number of time steps between two consecutive predictions. Only used in expanding window mode.
+            Only used in expanding window mode. The number of time steps between two consecutive predictions.
         start
-            The ``int``, ``float`` or ``pandas.Timestamp`` that represents the starting point in the time index
-            of `series` from which predictions will be made to evaluate the model.
-            For a detailed description of how the different data types are interpreted, please see the documentation
-            for `ForecastingModel.backtest`. Only used in expanding window mode.
+            Only used in expanding window mode. Optionally, the first point in time at which a prediction is computed.
+            This parameter supports: ``float``, ``int``, ``pandas.Timestamp``, and ``None``.
+            If a ``float``, it is the proportion of the time series that should lie before the first prediction point.
+            If an ``int``, it is either the index position of the first prediction point for `series` with a
+            `pd.DatetimeIndex`, or the index value for `series` with a `pd.RangeIndex`. The latter can be changed to
+            the index position with `start_format="position"`.
+            If a ``pandas.Timestamp``, it is the time stamp of the first prediction point.
+            If ``None``, the first prediction point will automatically be set to:
+
+            - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first
+              predictable point is earlier than the first trainable point.
+            - the first trainable point if `retrain` is ``True`` or ``int`` (given `train_length`),
+              or `retrain` is a Callable and the first trainable point is earlier than the first predictable point.
+            - the first trainable point (given `train_length`) otherwise
+
+            Note: Raises a ValueError if `start` yields a time outside the time index of `series`.
+            Note: If `start` is outside the possible historical forecasting times, will ignore the parameter
+            (default behavior with ``None``) and start at the first trainable/predictable point.
+        start_format
+            Only used in expanding window mode. Defines the `start` format. Only effective when `start` is an integer
+            and `series` is indexed with a `pd.RangeIndex`.
+            If set to 'position', `start` corresponds to the index position of the first predicted point and can range
+            from `(-len(series), len(series) - 1)`.
+            If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise
+            an error if the value is not in `series`' index. Default: ``'value'``
         last_points_only
-            Whether to use the whole forecasts or only the last point of each forecast to compute the error. Only used
-            in expanding window mode.
+            Only used in expanding window mode. Whether to use the whole forecasts or only the last point of each
+            forecast to compute the error.
         show_warnings
-            Whether to show warnings related to the `start` parameter. Only used in expanding window mode.
+            Only used in expanding window mode. Whether to show warnings related to the `start` parameter.
         val_series
             The TimeSeries instance used for validation in split mode. If provided, this series must start right after
             the end of `series`; so that a proper comparison of the forecast can be made.
@@ -1386,6 +1430,7 @@ def _evaluate_combination(param_combination) -> float:
                     future_covariates=future_covariates,
                     num_samples=1,
                     start=start,
+                    start_format=start_format,
                     forecast_horizon=forecast_horizon,
                     stride=stride,
                     metric=metric,
@@ -1893,6 +1938,7 @@ def _optimized_historical_forecasts(
         future_covariates: Optional[Sequence[TimeSeries]] = None,
         num_samples: int = 1,
         start: Optional[Union[pd.Timestamp, float, int]] = None,
+        start_format: Literal["position", "value"] = "value",
         forecast_horizon: int = 1,
         stride: int = 1,
         overlap_end: bool = False,

@@ -29,6 +29,11 @@
 from collections import OrderedDict
 from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
 
+try:
+    from typing import Literal
+except ImportError:
+    from typing_extensions import Literal
+
 import numpy as np
 import pandas as pd
 from sklearn.linear_model import LinearRegression
@@ -897,6 +902,7 @@ def _optimized_historical_forecasts(
         future_covariates: Optional[Sequence[TimeSeries]] = None,
         num_samples: int = 1,
         start: Optional[Union[pd.Timestamp, float, int]] = None,
+        start_format: Literal["position", "value"] = "value",
         forecast_horizon: int = 1,
         stride: int = 1,
         overlap_end: bool = False,
@@ -949,6 +955,7 @@ def _optimized_historical_forecasts(
                 future_covariates=future_covariates,
                 num_samples=num_samples,
                 start=start,
+                start_format=start_format,
                 forecast_horizon=forecast_horizon,
                 stride=stride,
                 overlap_end=overlap_end,
@@ -963,6 +970,7 @@ def _optimized_historical_forecasts(
                 future_covariates=future_covariates,
                 num_samples=num_samples,
                 start=start,
+                start_format=start_format,
                 forecast_horizon=forecast_horizon,
                 stride=stride,
                 overlap_end=overlap_end,