Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/comp lags feat order #2272

Merged
merged 27 commits into from
Apr 12, 2024
Merged
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
ddfb341
fix: reorder lagged features per lags when they are provided componen…
madtoinou Mar 6, 2024
3be57e9
fix: parametrize lagged_features_names test
madtoinou Mar 6, 2024
7a9c8cf
feat: added tests for lagged_features_names when lags are component-s…
madtoinou Mar 6, 2024
ec2f577
fix: create_lagged_name is not affected by lags order different than …
madtoinou Mar 6, 2024
838de67
fix: improve comment
madtoinou Mar 6, 2024
5a7f829
feat: tests verify that list and dict lags yield the same result
madtoinou Mar 6, 2024
80ef121
fix: remove staticmethod for the tests to pass on python 3.9
madtoinou Mar 11, 2024
a2b867d
feat: properly reorder features during autoregression, added correspo…
madtoinou Mar 12, 2024
38096b8
Merge branch 'master' into fix/comp_lags_feat_order
madtoinou Mar 12, 2024
80e4ed6
Merge branch 'master' into fix/comp_lags_feat_order
madtoinou Apr 4, 2024
d15c970
update changelog
madtoinou Apr 4, 2024
00ec6a1
fix: adressing review comments
madtoinou Apr 4, 2024
a27cd0d
Merge branch 'master' into fix/comp_lags_feat_order
madtoinou Apr 4, 2024
617b6ed
fix: moved autoregression lags extraction to tabularization
madtoinou Apr 4, 2024
dd2bee1
Merge branch 'fix/comp_lags_feat_order' of https://github.com/unit8co…
madtoinou Apr 4, 2024
2717183
fix: refactor tests to reduce code duplication
madtoinou Apr 5, 2024
73f4f64
fix: adress review comment
madtoinou Apr 5, 2024
e461b4b
Merge branch 'master' into fix/comp_lags_feat_order
dennisbader Apr 8, 2024
8465b5e
Merge branch 'master' into fix/comp_lags_feat_order
dennisbader Apr 8, 2024
ca50e1f
Merge branch 'master' into fix/comp_lags_feat_order
dennisbader Apr 9, 2024
c9e61d0
fix: remove usage of strict argument in zip, not support in python 3.9
madtoinou Apr 10, 2024
c6a6c74
further refactor lagged data extraction for autoregression
dennisbader Apr 10, 2024
9467190
allow coverage diffs for codecov upload
dennisbader Apr 11, 2024
677a606
use codecov v3
dennisbader Apr 11, 2024
bedf5a8
precompute lagged and ordered feature indices
dennisbader Apr 11, 2024
071372f
Merge branch 'master' into fix/comp_lags_feat_order
dennisbader Apr 11, 2024
be8c706
Merge branch 'master' into fix/comp_lags_feat_order
dennisbader Apr 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,25 @@
from darts.utils.timeseries_generation import linear_timeseries


def helper_create_multivariate_linear_timeseries(
n_components: int, components_names: Sequence[str] = None, **kwargs
) -> TimeSeries:
"""
Helper function that creates a `linear_timeseries` with a specified number of
components. To help distinguish each component from one another, `i` is added on
to each value of the `i`th component. Any additional keyword arguments are passed
to `linear_timeseries` (`start_value`, `end_value`, `start`, `end`, `length`, etc).
"""
timeseries = []
madtoinou marked this conversation as resolved.
Show resolved Hide resolved
if components_names is None or len(components_names) < n_components:
madtoinou marked this conversation as resolved.
Show resolved Hide resolved
components_names = [f"lin_ts_{i}" for i in range(n_components)]
for i in range(n_components):
# Values of each component is 1 larger than the last:
timeseries_i = linear_timeseries(column_name=components_names[i], **kwargs) + i
timeseries.append(timeseries_i)
return darts_concatenate(timeseries, axis=1)


class TestCreateLaggedTrainingData:
"""
Tests the `create_lagged_training_data` function defined in `darts.utils.data.tabularization`. There are broadly
Expand All @@ -40,27 +59,6 @@ class TestCreateLaggedTrainingData:
# Helper Functions for Generated Test Cases
#

@staticmethod
def create_multivariate_linear_timeseries(
n_components: int, components_names: Sequence[str] = None, **kwargs
) -> TimeSeries:
"""
Helper function that creates a `linear_timeseries` with a specified number of
components. To help distinguish each component from one another, `i` is added on
to each value of the `i`th component. Any additional keyword arguments are passed
to `linear_timeseries` (`start_value`, `end_value`, `start`, `end`, `length`, etc).
"""
timeseries = []
if components_names is None or len(components_names) < n_components:
components_names = [f"lin_ts_{i}" for i in range(n_components)]
for i in range(n_components):
# Values of each component is 1 larger than the last:
timeseries_i = (
linear_timeseries(column_name=components_names[i], **kwargs) + i
)
timeseries.append(timeseries_i)
return darts_concatenate(timeseries, axis=1)

@staticmethod
def get_feature_times(
target: TimeSeries,
Expand Down Expand Up @@ -419,7 +417,7 @@ def convert_lags_to_dict(ts_tg, ts_pc, ts_fc, lags_tg, lags_pc, lags_fc):
# Generated Test Cases
#

target_with_no_cov = create_multivariate_linear_timeseries(
target_with_no_cov = helper_create_multivariate_linear_timeseries(
n_components=1,
components_names=["no_static"],
start_value=0,
Expand All @@ -429,7 +427,7 @@ def convert_lags_to_dict(ts_tg, ts_pc, ts_fc, lags_tg, lags_pc, lags_fc):
freq=2,
)
n_comp = 2
target_with_static_cov = create_multivariate_linear_timeseries(
target_with_static_cov = helper_create_multivariate_linear_timeseries(
n_components=n_comp,
components_names=["static_0", "static_1"],
start_value=0,
Expand All @@ -455,7 +453,7 @@ def convert_lags_to_dict(ts_tg, ts_pc, ts_fc, lags_tg, lags_pc, lags_fc):
) # leads to sharing target component names
)

past = create_multivariate_linear_timeseries(
past = helper_create_multivariate_linear_timeseries(
n_components=3,
components_names=["past_0", "past_1", "past_2"],
start_value=10,
Expand All @@ -464,7 +462,7 @@ def convert_lags_to_dict(ts_tg, ts_pc, ts_fc, lags_tg, lags_pc, lags_fc):
length=10,
freq=2,
)
future = create_multivariate_linear_timeseries(
future = helper_create_multivariate_linear_timeseries(
n_components=4,
components_names=["future_0", "future_1", "future_2", "future_3"],
start_value=20,
Expand Down Expand Up @@ -508,23 +506,23 @@ def test_lagged_training_data_equal_freq(self, series_type: str):
# different start times, different lengths, and different values, but
# they're all of the same frequency:
if series_type == "integer":
target = self.create_multivariate_linear_timeseries(
target = helper_create_multivariate_linear_timeseries(
n_components=2,
start_value=0,
end_value=10,
start=2,
length=self.min_n_ts,
freq=2,
)
past = self.create_multivariate_linear_timeseries(
past = helper_create_multivariate_linear_timeseries(
n_components=3,
start_value=10,
end_value=20,
start=4,
length=self.min_n_ts + 1,
freq=2,
)
future = self.create_multivariate_linear_timeseries(
future = helper_create_multivariate_linear_timeseries(
n_components=4,
start_value=20,
end_value=30,
Expand All @@ -533,23 +531,23 @@ def test_lagged_training_data_equal_freq(self, series_type: str):
freq=2,
)
else:
target = self.create_multivariate_linear_timeseries(
target = helper_create_multivariate_linear_timeseries(
n_components=2,
start_value=0,
end_value=10,
start=pd.Timestamp("1/2/2000"),
length=self.min_n_ts,
freq="2d",
)
past = self.create_multivariate_linear_timeseries(
past = helper_create_multivariate_linear_timeseries(
n_components=3,
start_value=10,
end_value=20,
start=pd.Timestamp("1/4/2000"),
length=self.min_n_ts + 1,
freq="2d",
)
future = self.create_multivariate_linear_timeseries(
future = helper_create_multivariate_linear_timeseries(
n_components=4,
start_value=20,
end_value=30,
Expand Down Expand Up @@ -682,33 +680,33 @@ def test_lagged_training_data_unequal_freq(self, series_type):
# different start times, different lengths, different values, and different
# frequencies:
if series_type == "integer":
target = self.create_multivariate_linear_timeseries(
target = helper_create_multivariate_linear_timeseries(
n_components=2, start_value=0, end_value=10, start=2, length=20, freq=1
)
past = self.create_multivariate_linear_timeseries(
past = helper_create_multivariate_linear_timeseries(
n_components=3, start_value=10, end_value=20, start=4, length=10, freq=2
)
future = self.create_multivariate_linear_timeseries(
future = helper_create_multivariate_linear_timeseries(
n_components=4, start_value=20, end_value=30, start=6, length=7, freq=3
)
else:
target = self.create_multivariate_linear_timeseries(
target = helper_create_multivariate_linear_timeseries(
n_components=2,
start_value=0,
end_value=10,
start=pd.Timestamp("1/1/2000"),
length=20,
freq="d",
)
past = self.create_multivariate_linear_timeseries(
past = helper_create_multivariate_linear_timeseries(
n_components=3,
start_value=10,
end_value=20,
start=pd.Timestamp("1/2/2000"),
length=10,
freq="2d",
)
future = self.create_multivariate_linear_timeseries(
future = helper_create_multivariate_linear_timeseries(
n_components=4,
start_value=20,
end_value=30,
Expand Down Expand Up @@ -808,33 +806,33 @@ def test_lagged_training_data_method_consistency(self, series_type):
# different start times, different lengths, different values, and of
# different frequencies:
if series_type == "integer":
target = self.create_multivariate_linear_timeseries(
target = helper_create_multivariate_linear_timeseries(
n_components=2, start_value=0, end_value=10, start=2, length=20, freq=1
)
past = self.create_multivariate_linear_timeseries(
past = helper_create_multivariate_linear_timeseries(
n_components=3, start_value=10, end_value=20, start=4, length=10, freq=2
)
future = self.create_multivariate_linear_timeseries(
future = helper_create_multivariate_linear_timeseries(
n_components=4, start_value=20, end_value=30, start=6, length=7, freq=3
)
else:
target = self.create_multivariate_linear_timeseries(
target = helper_create_multivariate_linear_timeseries(
n_components=2,
start_value=0,
end_value=10,
start=pd.Timestamp("1/2/2000"),
end=pd.Timestamp("1/18/2000"),
freq="2d",
)
past = self.create_multivariate_linear_timeseries(
past = helper_create_multivariate_linear_timeseries(
n_components=3,
start_value=10,
end_value=20,
start=pd.Timestamp("1/4/2000"),
end=pd.Timestamp("1/20/2000"),
freq="2d",
)
future = self.create_multivariate_linear_timeseries(
future = helper_create_multivariate_linear_timeseries(
n_components=4,
start_value=20,
end_value=30,
Expand Down Expand Up @@ -1605,15 +1603,15 @@ def test_lagged_training_data_comp_wise_lags(self, config):
start_fc = pd.Timestamp("2000-01-17")

# length = max lag - min lag + 1 = -1 + 4 + 1 = 4
target = self.create_multivariate_linear_timeseries(
target = helper_create_multivariate_linear_timeseries(
n_components=2,
components_names=["target_0", "target_1"],
length=4 + output_chunk_shift + output_chunk_length,
start=start_tg,
)
# length = max lag - min lag + 1 = -3 + 3 + 1 = 1
past = (
self.create_multivariate_linear_timeseries(
helper_create_multivariate_linear_timeseries(
n_components=2,
components_names=["past_0", "past_1"],
length=1,
Expand All @@ -1623,7 +1621,7 @@ def test_lagged_training_data_comp_wise_lags(self, config):
)
# length = max lag - min lag + 1 = 1 + 2 + 1 = 4
future = (
self.create_multivariate_linear_timeseries(
helper_create_multivariate_linear_timeseries(
n_components=2,
components_names=["future_0", "future_1"],
length=4 + output_chunk_shift + output_chunk_length,
Expand Down