Merge branch 'master' into support/wrapped-models-in-gridsearch

unit8co · Jan 26, 2024 · dbc31d5 · dbc31d5
2 parents 910bef1 + 20ee5ec
commit dbc31d5
Show file tree

Hide file tree

Showing 21 changed files with 356 additions and 96 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,6 +1,6 @@
 [bumpversion]
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)|dev
-current_version = 0.27.1
+current_version = 0.27.2
 
 [bumpversion:file:setup.py]
 

diff --git a/.gitignore b/.gitignore
@@ -12,7 +12,7 @@ build/
 dist/
 examples/.ipynb_checkpoints/
 runs/
-.coverage
+.coverage*
 htmlcov
 coverage.xml
 .darts

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,7 +6,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 
 ## [Unreleased](https://github.com/unit8co/darts/tree/master)
 
-[Full Changelog](https://github.com/unit8co/darts/compare/0.27.1...master)
+[Full Changelog](https://github.com/unit8co/darts/compare/0.27.2...master)
 
 ### For users of the library:
 **Improved**
@@ -15,6 +15,17 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 
 ### For developers of the library:
 
+## [0.27.2](https://github.com/unit8co/darts/tree/0.27.2) (2023-01-21)
+### For users of the library:
+**Improved**
+- Added `darts.utils.statistics.plot_ccf` that can be used to plot the cross correlation between a time series (e.g. target series) and the lagged values of another time series (e.g. covariates series). [#2122](https://github.com/unit8co/darts/pull/2122) by [Dennis Bader](https://github.com/dennisbader).
+- Improvements to `TimeSeries`: Improved the time series frequency inference when using slices or pandas DatetimeIndex as keys for `__getitem__`. [#2152](https://github.com/unit8co/darts/pull/2152) by [DavidKleindienst](https://github.com/DavidKleindienst).
+
+**Fixed**
+- Fixed a bug when using a `TorchForecastingModel` with `use_reversible_instance_norm=True` and predicting with `n > output_chunk_length`. The input normalized multiple times. [#2160](https://github.com/unit8co/darts/pull/2160) by [FourierMourier](https://github.com/FourierMourier).
+
+### For developers of the library:
+
 ## [0.27.1](https://github.com/unit8co/darts/tree/0.27.1) (2023-12-10)
 ### For users of the library:
 **Improved**

diff --git a/conda_recipe/darts/meta.yaml b/conda_recipe/darts/meta.yaml
@@ -2,7 +2,7 @@
 
 package:
   name: "darts"
-  version: "0.27.1"
+  version: "0.27.2"
 
 source:
   # root folder, not the package

diff --git a/darts/__init__.py b/darts/__init__.py
@@ -10,7 +10,7 @@
 
 from .timeseries import TimeSeries, concatenate
 
-__version__ = "0.27.1"
+__version__ = "0.27.2"
 
 colors = cycler(
     color=["black", "003DFD", "b512b8", "11a9ba", "0d780f", "f77f07", "ba0f0f"]

diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py
@@ -93,7 +93,15 @@ def __init__(
         .. [1] https://en.wikipedia.org/wiki/Mixed-data_sampling
         .. [2] https://pandas.pydata.org/docs/user_guide/timeseries.html#dateoffset-objects
         """
-        self._low_freq = low_freq
+        if pd.tseries.frequencies.get_period_alias(low_freq) is None:
+            raise_log(
+                ValueError(
+                    f"Cannot infer period alias for `low_freq={low_freq}`. "
+                    f"Is it a valid pandas offset/frequency alias?"
+                ),
+                logger=logger,
+            )
+        self._low_freq = pd.tseries.frequencies.to_offset(low_freq).freqstr
         self._strip = strip
         self._drop_static_covariates = drop_static_covariates
         self._sep = "_midas_"

diff --git a/darts/datasets/__init__.py b/darts/datasets/__init__.py
@@ -18,6 +18,8 @@
 
 from .dataset_loaders import DatasetLoaderCSV, DatasetLoaderMetadata
 
+pd_above_v22 = pd.__version__ >= "2.2"
+
 """
     Overall usage of this package:
     from darts.datasets import AirPassengersDataset
@@ -886,6 +888,12 @@ def pre_process_dataset(dataset_path):
             df.index.name = "Timestamp"
             df.to_csv(self._get_path_dataset())
 
+        # pandas v2.2.0 introduced some changes
+        hash_expected = (
+            "485d81e9902cc0ccb1f86d7e01fb37cd"
+            if pd_above_v22
+            else "a019125b7f9c1afeacb0ae60ce7455ef"
+        )
         # hash value for dataset with weather data
         super().__init__(
             metadata=DatasetLoaderMetadata(
@@ -895,7 +903,7 @@ def pre_process_dataset(dataset_path):
                     "ewz_stromabgabe_netzebenen_stadt_zuerich/"
                     "download/ewz_stromabgabe_netzebenen_stadt_zuerich.csv"
                 ),
-                hash="a019125b7f9c1afeacb0ae60ce7455ef",
+                hash=hash_expected,
                 header_time="Timestamp",
                 freq="15min",
                 pre_process_csv_fn=pre_process_dataset,

diff --git a/darts/models/forecasting/pl_forecasting_module.py b/darts/models/forecasting/pl_forecasting_module.py
@@ -50,7 +50,8 @@ def forward_wrapper(self, *args, **kwargs):
 
         # x is input batch tuple which by definition has the past features in the first element starting with the
         # first n target features
-        x: Tuple = args[0][0]
+        # assuming `args[0][0]` is torch.Tensor we could clone it to prevent target re-normalization
+        x: Tuple = args[0][0].clone()
         # apply reversible instance normalization
         x[:, :, : self.n_targets] = self.rin(x[:, :, : self.n_targets])
         # run the forward pass

diff --git a/darts/models/forecasting/prophet_model.py b/darts/models/forecasting/prophet_model.py
@@ -594,6 +594,7 @@ def _freq_to_days(freq: str) -> float:
         freq = "".join(re.split("[^a-zA-Z-]*", freq)).split("-")[0]
 
         seconds_per_day = 86400
+        days = 0
         if freq in ["A", "BA", "Y", "BY", "RE"] or freq.startswith(
             ("A", "BA", "Y", "BY", "RE")
         ):  # year
@@ -612,23 +613,28 @@ def _freq_to_days(freq: str) -> float:
             days = 1 * 7 / 5
         elif freq in ["D"]:  # day
             days = 1.0
-        elif freq in ["H", "BH", "CBH"]:  # hour
-            days = 1 / 24
-        elif freq in ["T", "min"]:  # minute
-            days = 1 / (24 * 60)
-        elif freq in ["S"]:  # second
-            days = 1 / seconds_per_day
-        elif freq in ["L", "ms"]:  # millisecond
-            days = 1 / (seconds_per_day * 10**3)
-        elif freq in ["U", "us"]:  # microsecond
-            days = 1 / (seconds_per_day * 10**6)
-        elif freq in ["N"]:  # nanosecond
-            days = 1 / (seconds_per_day * 10**9)
         else:
-            raise ValueError(
-                "freq {} not understood. Please report if you think this is in error.".format(
-                    freq
-                )
+            # all freqs higher than "D" are lower case in pandas >= 2.2.0
+            freq_lower = freq.lower()
+            if freq_lower in ["h", "bh", "cbh"]:  # hour
+                days = 1 / 24
+            elif freq_lower in ["t", "min"]:  # minute
+                days = 1 / (24 * 60)
+            elif freq_lower in ["s"]:  # second
+                days = 1 / seconds_per_day
+            elif freq_lower in ["l", "ms"]:  # millisecond
+                days = 1 / (seconds_per_day * 10**3)
+            elif freq_lower in ["u", "us"]:  # microsecond
+                days = 1 / (seconds_per_day * 10**6)
+            elif freq_lower in ["n"]:  # nanosecond
+                days = 1 / (seconds_per_day * 10**9)
+
+        if not days:
+            raise_log(
+                ValueError(
+                    f"freq {freq} not understood. Please report if you think this is in error."
+                ),
+                logger=logger,
             )
         return freq_times * days
 

diff --git a/darts/tests/dataprocessing/transformers/test_midas.py b/darts/tests/dataprocessing/transformers/test_midas.py
@@ -7,6 +7,12 @@
 from darts.models import LinearRegressionModel
 from darts.utils.timeseries_generation import generate_index, linear_timeseries
 
+# TODO: remove this once bumping min python version from 3.8 to 3.9 (pandas v2.2.0 not available for p38)
+pd_above_v22 = pd.__version__ >= "2.2"
+freq_quarter_end = "QE" if pd_above_v22 else "Q"
+freq_month_end = "ME" if pd_above_v22 else "M"
+freq_minute = "min" if pd_above_v22 else "T"
+
 
 class TestMIDAS:
     monthly_ts = linear_timeseries(
@@ -57,7 +63,7 @@ def test_complete_monthly_to_quarterly(self):
         assert self.monthly_ts == inversed_quarterly_ts_midas
 
         # to quarter end
-        midas_2 = MIDAS(low_freq="Q")
+        midas_2 = MIDAS(low_freq=freq_quarter_end)
         quarterly_ts_midas = midas_2.fit_transform(self.monthly_ts)
         assert quarterly_ts_midas == self.quarterly_with_quarter_end_index_ts
 
@@ -332,23 +338,28 @@ def test_from_second_to_minute(self):
             columns=[f"values_midas_{i}" for i in range(60)],
         )
 
-        midas = MIDAS(low_freq="T")
+        midas = MIDAS(low_freq=freq_minute)
         minute_ts_midas = midas.fit_transform(second_ts)
         assert minute_ts_midas == minute_ts
         second_ts_midas = midas.inverse_transform(minute_ts_midas)
         assert second_ts_midas == second_ts
 
+    def test_error_with_invalid_freq(self):
+        with pytest.raises(ValueError) as err:
+            _ = MIDAS(low_freq="MEE")
+        assert str(err.value).startswith("Cannot infer period alias for")
+
     def test_error_when_from_low_to_high(self):
         """
         Tests if the transformer raises an error when the user asks for a transform in the wrong direction.
         """
         # wrong direction : low to high freq
-        midas_1 = MIDAS(low_freq="M")
+        midas_1 = MIDAS(low_freq=freq_month_end)
         with pytest.raises(ValueError):
             midas_1.fit_transform(self.quarterly_ts)
 
         # transform to same index requested
-        midas_2 = MIDAS(low_freq="Q")
+        midas_2 = MIDAS(low_freq=freq_quarter_end)
         with pytest.raises(ValueError):
             midas_2.fit_transform(self.quarterly_ts)
 
@@ -365,7 +376,7 @@ def test_error_when_frequency_not_suitable_for_midas(self):
             times=daily_times, values=daily_values, columns=["values"]
         )
 
-        midas = MIDAS(low_freq="M")
+        midas = MIDAS(low_freq=freq_month_end)
         with pytest.raises(ValueError) as msg:
             midas.fit_transform(daily_ts)
         assert str(msg.value).startswith(
@@ -403,7 +414,7 @@ def test_inverse_transform_prediction(self):
         assert pred_monthly.time_index.equals(monthly_test_ts.time_index)
 
         # "Q" = QuarterEnd, the 2 "hidden" months must be retrieved
-        midas_quarterly = MIDAS(low_freq="Q")
+        midas_quarterly = MIDAS(low_freq=freq_quarter_end)
         quarterly_train_ts = midas_quarterly.fit_transform(monthly_train_ts)
         quarterly_test_ts = midas_quarterly.transform(monthly_test_ts)
 
@@ -439,12 +450,12 @@ def test_multiple_ts(self):
 
         ts_to_transform = [self.monthly_ts, quarterly_univariate_ts]
         # ==> with stripping: not enough months, first series will be empty
-        midas_yearly = MIDAS(low_freq="AS", strip=True)
+        midas_yearly = MIDAS(low_freq="YS", strip=True)
 
         list_yearly_ts = midas_yearly.fit_transform(ts_to_transform)
         assert len(list_yearly_ts) == 2
         assert len(list_yearly_ts[0]) == 0
-        assert list_yearly_ts[0].freq == "AS"
+        assert list_yearly_ts[0].freq == "YS"
         assert list_yearly_ts[0].n_components == 12
 
         # 4 quarters in a year
@@ -456,13 +467,13 @@ def test_multiple_ts(self):
         inverse_transformed = midas_yearly.inverse_transform(list_yearly_ts)
         assert len(inverse_transformed) == 2
         assert len(inverse_transformed[0]) == 0
-        assert inverse_transformed[0].freq == "M"
+        assert inverse_transformed[0].freq == freq_month_end
         assert inverse_transformed[0].n_components == 1
 
         assert ts_to_transform[1:] == inverse_transformed[1:]
 
         # ==> without stripping: first series will be partially empty
-        midas_yearly = MIDAS(low_freq="AS", strip=False)
+        midas_yearly = MIDAS(low_freq="YS", strip=False)
         list_yearly_ts = midas_yearly.fit_transform(ts_to_transform)
         # 12 months in a year, original ts contains only 9 values, the missing data are nan
         np.testing.assert_array_almost_equal(

diff --git a/darts/tests/models/forecasting/test_torch_forecasting_model.py b/darts/tests/models/forecasting/test_torch_forecasting_model.py
@@ -707,17 +707,14 @@ def test_load_weights_params_check(self, tmpdir_fn):
             ckpt_path = os.path.join(tmpdir_fn, f"{model_name}.pt")
             # barebone model
             model = DLinearModel(
-                input_chunk_length=4,
-                output_chunk_length=1,
-                n_epochs=1,
+                input_chunk_length=4, output_chunk_length=1, n_epochs=1, **tfm_kwargs
             )
             model.fit(self.series[:10])
             model.save(ckpt_path)
 
             # identical model
             loading_model = DLinearModel(
-                input_chunk_length=4,
-                output_chunk_length=1,
+                input_chunk_length=4, output_chunk_length=1, **tfm_kwargs
             )
             loading_model.load_weights(ckpt_path)
 
@@ -726,21 +723,26 @@ def test_load_weights_params_check(self, tmpdir_fn):
                 input_chunk_length=4,
                 output_chunk_length=1,
                 optimizer_cls=torch.optim.AdamW,
+                **tfm_kwargs,
             )
             loading_model.load_weights(ckpt_path)
 
+            model_summary_kwargs = {
+                "pl_trainer_kwargs": dict(
+                    {"enable_model_sumamry": False}, **tfm_kwargs["pl_trainer_kwargs"]
+                )
+            }
             # different pl_trainer_kwargs
             loading_model = DLinearModel(
                 input_chunk_length=4,
                 output_chunk_length=1,
-                pl_trainer_kwargs={"enable_model_summary": False},
+                **model_summary_kwargs,
             )
             loading_model.load_weights(ckpt_path)
 
             # different input_chunk_length (tfm parameter)
             loading_model = DLinearModel(
-                input_chunk_length=4 + 1,
-                output_chunk_length=1,
+                input_chunk_length=4 + 1, output_chunk_length=1, **tfm_kwargs
             )
             with pytest.raises(ValueError) as error_msg:
                 loading_model.load_weights(ckpt_path)
@@ -754,6 +756,7 @@ def test_load_weights_params_check(self, tmpdir_fn):
                 input_chunk_length=4,
                 output_chunk_length=1,
                 kernel_size=10,
+                **tfm_kwargs,
             )
             with pytest.raises(ValueError) as error_msg:
                 loading_model.load_weights(ckpt_path)