Merge branch 'master' into fix/quantile_detector

unit8co · Feb 16, 2023 · d4dd59e · d4dd59e
2 parents 92e8567 + 8c1bb2a
commit d4dd59e
Show file tree

Hide file tree

Showing 15 changed files with 42 additions and 30 deletions.
diff --git a/darts/ad/scorers/kmeans_scorer.py b/darts/ad/scorers/kmeans_scorer.py
@@ -110,6 +110,9 @@ def __init__(
 
         self.kmeans_kwargs = kwargs
         self.kmeans_kwargs["n_clusters"] = k
+        # stop warning about default value of "n_init" changing from 10 to "auto" in sklearn 1.4
+        if "n_init" not in self.kmeans_kwargs:
+            self.kmeans_kwargs["n_init"] = "auto"
 
         super().__init__(
             univariate_scorer=(not component_wise), window=window, diff_fn=diff_fn

diff --git a/darts/datasets/__init__.py b/darts/datasets/__init__.py
@@ -611,7 +611,7 @@ def pre_proces_fn(extracted_dir, dataset_path):
 
                 output_dict[locationID] = count_series
             output_df = pd.DataFrame(output_dict)
-            output_df.to_csv(dataset_path, line_terminator="\n")
+            output_df.to_csv(dataset_path, lineterminator="\n")
 
         super().__init__(
             metadata=DatasetLoaderMetadata(

diff --git a/darts/tests/ad/test_anomaly_model.py b/darts/tests/ad/test_anomaly_model.py
@@ -581,7 +581,7 @@ def test_univariate_ForecastingAnomalyModel(self):
                 NormScorer(),
                 Difference(),
                 WassersteinScorer(),
-                KMeansScorer(),
+                KMeansScorer(k=5),
                 KMeansScorer(window=10),
                 PyODScorer(model=KNN()),
                 PyODScorer(model=KNN(), window=10),
@@ -830,8 +830,8 @@ def test_univariate_covariate_ForecastingAnomalyModel(self):
                 NormScorer(),
                 Difference(),
                 WassersteinScorer(),
-                KMeansScorer(),
-                KMeansScorer(window=10),
+                KMeansScorer(k=4),
+                KMeansScorer(k=7, window=10),
                 PyODScorer(model=KNN()),
                 PyODScorer(model=KNN(), window=10),
                 WassersteinScorer(window=15),

diff --git a/darts/tests/ad/test_scorers.py b/darts/tests/ad/test_scorers.py
@@ -1257,14 +1257,14 @@ def test_multivariate_componentwise_kmeans(self):
         )
 
         # test scorer with component_wise=False
-        scorer_w10_cwfalse = KMeansScorer(window=10, component_wise=False)
+        scorer_w10_cwfalse = KMeansScorer(window=10, component_wise=False, n_init=10)
         scorer_w10_cwfalse.fit(mts_train_kmeans)
         auc_roc_cwfalse = scorer_w10_cwfalse.eval_accuracy(
             anomalies_common_kmeans, mts_test_kmeans, metric="AUC_ROC"
         )
 
         # test scorer with component_wise=True
-        scorer_w10_cwtrue = KMeansScorer(window=10, component_wise=True)
+        scorer_w10_cwtrue = KMeansScorer(window=10, component_wise=True, n_init=10)
         scorer_w10_cwtrue.fit(mts_train_kmeans)
         auc_roc_cwtrue = scorer_w10_cwtrue.eval_accuracy(
             anomalies_kmeans_per_width, mts_test_kmeans, metric="AUC_ROC"

diff --git a/darts/tests/dataprocessing/dtw/test_dtw.py b/darts/tests/dataprocessing/dtw/test_dtw.py
@@ -1,5 +1,6 @@
 import unittest
 
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
@@ -198,6 +199,7 @@ def test_plot(self):
         align = dtw.dtw(self.series2, self.series1)
         align.plot()
         align.plot_alignment()
+        plt.close()
 
     def test_multivariate(self):
         n = 2

diff --git a/darts/tests/dataprocessing/encoders/test_covariate_index_generators.py b/darts/tests/dataprocessing/encoders/test_covariate_index_generators.py
@@ -329,7 +329,6 @@ def test_past_index_generator(self):
     def test_past_index_generator_with_lags(self):
         icl = self.input_chunk_length
         ocl = self.output_chunk_length
-        freq = self.target_time.freq
         target = self.target_time
 
         def test_routine_train(
@@ -341,17 +340,17 @@ def test_routine_train(
                 lags_covariates=[min_lag, max_lag],
             )
             idx, _ = idxg.generate_train_idx(target, None)
-            self.assertEqual(idx[0], pd.Timestamp(start_expected, freq=freq))
-            self.assertEqual(idx[-1], pd.Timestamp(end_expected, freq=freq))
+            self.assertEqual(idx[0], pd.Timestamp(start_expected))
+            self.assertEqual(idx[-1], pd.Timestamp(end_expected))
             # check case 0: we give covariates, index will always be the covariate time index
             idx, _ = idxg.generate_train_idx(target, self.cov_time_train)
             self.assertTrue(idx.equals(self.cov_time_train.time_index))
             return idxg
 
         def test_routine_inf(self, idxg, n, start_expected, end_expected):
             idx, _ = idxg.generate_inference_idx(n, target, None)
-            self.assertEqual(idx[0], pd.Timestamp(start_expected, freq=freq))
-            self.assertEqual(idx[-1], pd.Timestamp(end_expected, freq=freq))
+            self.assertEqual(idx[0], pd.Timestamp(start_expected))
+            self.assertEqual(idx[-1], pd.Timestamp(end_expected))
             # check case 0: we give covariates, index will always be the covariate time index
             idx, _ = idxg.generate_inference_idx(n, target, self.cov_time_inf_short)
             self.assertTrue(idx.equals(self.cov_time_inf_short.time_index))
@@ -436,7 +435,6 @@ def test_future_index_generator(self):
     def test_future_index_generator_with_lags(self):
         icl = self.input_chunk_length
         ocl = self.output_chunk_length
-        freq = self.target_time.freq
         target = self.target_time
 
         def test_routine_train(
@@ -448,17 +446,17 @@ def test_routine_train(
                 lags_covariates=[min_lag, max_lag],
             )
             idx, _ = idxg.generate_train_idx(target, None)
-            self.assertEqual(idx[0], pd.Timestamp(start_expected, freq=freq))
-            self.assertEqual(idx[-1], pd.Timestamp(end_expected, freq=freq))
+            self.assertEqual(idx[0], pd.Timestamp(start_expected))
+            self.assertEqual(idx[-1], pd.Timestamp(end_expected))
             # check case 0: we give covariates, index will always be the covariate time index
             idx, _ = idxg.generate_train_idx(target, self.cov_time_train)
             self.assertTrue(idx.equals(self.cov_time_train.time_index))
             return idxg
 
         def test_routine_inf(self, idxg, n, start_expected, end_expected):
             idx, _ = idxg.generate_inference_idx(n, target, None)
-            self.assertTrue(idx[0], pd.Timestamp(start_expected, freq=freq))
-            self.assertTrue(idx[-1], pd.Timestamp(end_expected, freq=freq))
+            self.assertTrue(idx[0], pd.Timestamp(start_expected))
+            self.assertTrue(idx[-1], pd.Timestamp(end_expected))
             # check case 0: we give covariates, index will always be the covariate time index
             idx, _ = idxg.generate_inference_idx(n, target, self.cov_time_inf_short)
             self.assertTrue(idx.equals(self.cov_time_inf_short.time_index))

diff --git a/darts/tests/dataprocessing/encoders/test_encoders.py b/darts/tests/dataprocessing/encoders/test_encoders.py
@@ -829,28 +829,28 @@ def test_integer_positional_encoder(self):
         )
         pc1, fc1 = encs.encode_train(ts)
         self.assertTrue(
-            pc1.start_time() == pd.Timestamp("1999-11-01", freq=ts.freq)
-            and pc1.end_time() == pd.Timestamp("2001-01-01", freq=ts.freq)
+            pc1.start_time() == pd.Timestamp("1999-11-01")
+            and pc1.end_time() == pd.Timestamp("2001-01-01")
             and (pc1.univariate_values() == np.arange(-25, -10)).all()
             and pc1[ts.start_time()].univariate_values()[0] == -23
         )
         self.assertTrue(
-            fc1.start_time() == pd.Timestamp("2001-03-01", freq=ts.freq)
-            and fc1.end_time() == pd.Timestamp("2002-03-01", freq=ts.freq)
+            fc1.start_time() == pd.Timestamp("2001-03-01")
+            and fc1.end_time() == pd.Timestamp("2002-03-01")
             and (fc1.univariate_values() == np.arange(-9, 4)).all()
             and fc1[ts.end_time()].univariate_values()[0] == 0
         )
 
         n = 2
         pc2, fc2 = encs.encode_inference(n=n, target=ts)
         self.assertTrue(
-            pc2.start_time() == pd.Timestamp("2000-11-01", freq=ts.freq)
-            and pc2.end_time() == pd.Timestamp("2001-07-01", freq=ts.freq)
+            pc2.start_time() == pd.Timestamp("2000-11-01")
+            and pc2.end_time() == pd.Timestamp("2001-07-01")
             and (pc2.univariate_values() == np.arange(-13, -4)).all()
         )
         self.assertTrue(
-            fc2.start_time() == pd.Timestamp("2002-03-01", freq=ts.freq)
-            and fc2.end_time() == pd.Timestamp("2002-09-01", freq=ts.freq)
+            fc2.start_time() == pd.Timestamp("2002-03-01")
+            and fc2.end_time() == pd.Timestamp("2002-09-01")
             and (fc2.univariate_values() == np.arange(3, 10)).all()
         )
 

diff --git a/darts/tests/explainability/test_shap_explainer.py b/darts/tests/explainability/test_shap_explainer.py
@@ -1,5 +1,6 @@
 from datetime import date, timedelta
 
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 import shap
@@ -500,6 +501,7 @@ def test_plot(self):
             "power",
         )
         self.assertTrue(isinstance(fplot, shap.plots._force.BaseVisualizer))
+        plt.close()
 
         # no component name -> multivariate error
         with self.assertRaises(ValueError):
@@ -552,3 +554,4 @@ def test_plot(self):
             target_component="power",
         )
         self.assertTrue(isinstance(fplot, shap.plots._force.BaseVisualizer))
+        plt.close()
diff --git a/darts/tests/models/filtering/test_filters.py b/darts/tests/models/filtering/test_filters.py
@@ -173,9 +173,9 @@ def test_gaussian_process(self):
         noise = TimeSeries.from_values(np.random.normal(0, 0.4, len(testing_signal)))
         testing_signal_with_noise = testing_signal + noise
 
-        kernel = ExpSineSquared()
+        kernel = ExpSineSquared(length_scale_bounds=(1e-3, 1e3))
         gpf = GaussianProcessFilter(
-            kernel=kernel, alpha=0.2, n_restarts_optimizer=100, random_state=42
+            kernel=kernel, alpha=0.2, n_restarts_optimizer=10, random_state=42
         )
         filtered_ts = gpf.filter(testing_signal_with_noise, num_samples=1)
 
@@ -205,7 +205,8 @@ def test_gaussian_process_multivariate(self):
     def test_gaussian_process_missing_values(self):
         ts = TimeSeries.from_values(np.ones(6))
 
-        gpf = GaussianProcessFilter(RBF())
+        kernel = RBF(length_scale_bounds=(1e-3, 1e10))
+        gpf = GaussianProcessFilter(kernel=kernel)
         filtered_values = gpf.filter(ts).values()
         np.testing.assert_allclose(filtered_values, np.ones_like(filtered_values))
 

diff --git a/darts/tests/models/forecasting/test_dlinear_nlinear.py b/darts/tests/models/forecasting/test_dlinear_nlinear.py
@@ -96,6 +96,7 @@ def test_logtensorboard(self):
                     n_epochs=1,
                     log_tensorboard=True,
                     work_dir=self.temp_work_dir,
+                    pl_trainer_kwargs={"log_every_n_steps": 1},
                 )
                 model.fit(ts)
                 model.predict(n=2)

diff --git a/darts/tests/models/forecasting/test_nbeats_nhits.py b/darts/tests/models/forecasting/test_nbeats_nhits.py
@@ -182,6 +182,7 @@ def test_logtensorboard(self):
                     log_tensorboard=True,
                     work_dir=self.temp_work_dir,
                     generic_architecture=architecture,
+                    pl_trainer_kwargs={"log_every_n_steps": 1},
                 )
                 model.fit(ts)
                 model.predict(n=2)

diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py
@@ -1939,7 +1939,7 @@ def generate_expected_times(ts, n_predict=0) -> dict:
             freq = ts[0].freq
 
             def to_ts(dt):
-                return pd.Timestamp(dt, freq=freq)
+                return pd.Timestamp(dt)
 
             def train_start_end(start_base, end_base):
                 start = to_ts(start_base) - int(not multi_model) * (ocl - 1) * freq

diff --git a/darts/tests/test_timeseries_static_covariates.py b/darts/tests/test_timeseries_static_covariates.py
@@ -592,7 +592,7 @@ def test_ts_methods_with_static_covariates(self):
 
         ts_stoch = ts.from_times_and_values(
             times=ts.time_index,
-            values=np.ones((10, 1, 3)),
+            values=np.random.randint(low=0, high=10, size=(10, 1, 3)),
             static_covariates=static_covs,
         )
         assert ts_stoch.static_covariates.index.equals(ts_stoch.components)

diff --git a/darts/tests/utils/test_statistics.py b/darts/tests/utils/test_statistics.py
@@ -1,3 +1,4 @@
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 
@@ -203,4 +204,6 @@ class PlotTestCase(DartsBaseTestClass):
 
     def test_statistics_plot(self):
         plot_residuals_analysis(self.series)
+        plt.close()
         plot_pacf(self.series)
+        plt.close()
diff --git a/darts/utils/timeseries_generation.py b/darts/utils/timeseries_generation.py
@@ -570,7 +570,7 @@ def holidays_timeseries(
 
     time_index = _extend_time_index_until(time_index, until, add_length)
     scope = range(time_index[0].year, (time_index[-1] + pd.Timedelta(days=1)).year)
-    country_holidays = holidays.CountryHoliday(
+    country_holidays = holidays.country_holidays(
         country_code, prov=prov, state=state, years=scope
     )
     index_series = pd.Series(time_index, index=time_index)