From 53d34c72596742d13bf72d6f8e52057b280e4e2b Mon Sep 17 00:00:00 2001 From: charavelg Date: Tue, 16 Sep 2025 18:22:33 +0200 Subject: [PATCH 01/10] Add frechet metric --- docs/gen_modules/tslearn.metrics.rst | 4 + tslearn/metrics/__init__.py | 62 ++- tslearn/metrics/frechet.py | 681 +++++++++++++++++++++++++++ tslearn/neighbors/neighbors.py | 64 ++- tslearn/tests/test_metrics.py | 109 +++++ tslearn/tests/test_neighbors.py | 26 +- 6 files changed, 901 insertions(+), 45 deletions(-) create mode 100644 tslearn/metrics/frechet.py diff --git a/docs/gen_modules/tslearn.metrics.rst b/docs/gen_modules/tslearn.metrics.rst index f688e3820..8a183e092 100644 --- a/docs/gen_modules/tslearn.metrics.rst +++ b/docs/gen_modules/tslearn.metrics.rst @@ -36,3 +36,7 @@ tslearn.metrics sigma_gak gamma_soft_dtw SoftDTWLossPyTorch + frechet + frechet_path + frechet_path_from_metric + cdist_frechet diff --git a/tslearn/metrics/__init__.py b/tslearn/metrics/__init__.py index 08d59ed2c..b98487cdd 100644 --- a/tslearn/metrics/__init__.py +++ b/tslearn/metrics/__init__.py @@ -23,34 +23,52 @@ SoftDTW) from .soft_dtw_loss_pytorch import SoftDTWLossPyTorch from .cycc import cdist_normalized_cc, y_shifted_sbd_vec +from .frechet import frechet, frechet_path, frechet_path_from_metric, cdist_frechet __author__ = 'Romain Tavenard romain.tavenard[at]univ-rennes2.fr' -TSLEARN_VALID_METRICS = ["ctw", "dtw", "gak", "sax", "softdtw", "lcss"] -VARIABLE_LENGTH_METRICS = ["ctw", "dtw", "gak", "sax", "softdtw", "lcss"] +TSLEARN_VALID_METRICS = ["ctw", "dtw", "gak", "sax", "softdtw", "lcss", "frechet"] +VARIABLE_LENGTH_METRICS = ["ctw", "dtw", "gak", "sax", "softdtw", "lcss", "frechet"] __all__ = [ - "TSLEARN_VALID_METRICS", "VARIABLE_LENGTH_METRICS", - - "dtw", "dtw_limited_warping_length", - "dtw_path_limited_warping_length", "subsequence_path", - "subsequence_cost_matrix", - "dtw_path", "dtw_path_from_metric", - "dtw_subsequence_path", "cdist_dtw", + "TSLEARN_VALID_METRICS", + "VARIABLE_LENGTH_METRICS", "GLOBAL_CONSTRAINT_CODE", - "lb_envelope", "lb_keogh", - "sakoe_chiba_mask", "itakura_mask", - "lcss", "lcss_path", "lcss_path_from_metric", - - "ctw_path", "ctw", "cdist_ctw", - + "dtw", + "dtw_limited_warping_length", + "dtw_path_limited_warping_length", + "subsequence_path", + "subsequence_cost_matrix", + "dtw_path", + "dtw_path_from_metric", + "dtw_subsequence_path", + "cdist_dtw", + "lb_envelope", + "lb_keogh", + "sakoe_chiba_mask", + "itakura_mask", + "lcss", + "lcss_path", + "lcss_path_from_metric", + "ctw_path", + "ctw", + "cdist_ctw", "cdist_sax", - - "cdist_soft_dtw", "cdist_gak", - "cdist_soft_dtw_normalized", "gak", "soft_dtw", "soft_dtw_alignment", - "sigma_gak", "gamma_soft_dtw", "SquaredEuclidean", "SoftDTW", - + "cdist_soft_dtw", + "cdist_gak", + "cdist_soft_dtw_normalized", + "gak", + "soft_dtw", + "soft_dtw_alignment", + "sigma_gak", + "gamma_soft_dtw", + "SquaredEuclidean", + "SoftDTW", "SoftDTWLossPyTorch", - - "cdist_normalized_cc", "y_shifted_sbd_vec" + "cdist_normalized_cc", + "y_shifted_sbd_vec", + "frechet", + "frechet_path", + "frechet_path_from_metric", + "cdist_frechet" ] diff --git a/tslearn/metrics/frechet.py b/tslearn/metrics/frechet.py new file mode 100644 index 000000000..16c0bf7f3 --- /dev/null +++ b/tslearn/metrics/frechet.py @@ -0,0 +1,681 @@ + +import numpy + +from numba import njit + +from tslearn.backend import instantiate_backend +from tslearn.metrics.dtw_variants import ( + compute_mask, + GLOBAL_CONSTRAINT_CODE, + _njit_return_path, + _return_path +) +from tslearn.metrics.utils import _cdist_generic +from tslearn.utils import to_time_series + + +def frechet( + s1, + s2, + global_constraint=None, + sakoe_chiba_radius=None, + itakura_max_slope=None, + be=None +): + r"""Compute Frechet similarity measure between + (possibly multidimensional) time series and return it. + + Frechet similarity score is computed as the maximum distance between + aligned time series, i.e., if :math:`\pi` is an optimal alignment path: + + .. math:: + + Frechet(X, Y) = \max_{(i, j) \in \pi} \|X_{i} - Y_{j}\| + + Note that this formula is still valid for the multivariate case. + + It is not required that both time series share the same size, but they must + be the same dimension. + + Parameters + ---------- + s1 : array-like, shape=(sz1, d) or (sz1,) + A time series. If shape is (sz1,), the time series is assumed to be univariate. + + s2 : array-like, shape=(sz2, d) or (sz2,) + Another time series. If shape is (sz2,), the time series is assumed to be univariate. + + global_constraint : {"itakura", "sakoe_chiba"} or None (default: None) + Global constraint to restrict admissible paths for Frechet distance. + + sakoe_chiba_radius : int or None (default: None) + Radius to be used for Sakoe-Chiba band global constraint. + The Sakoe-Chiba radius corresponds to the parameter :math:`\delta` mentioned in [1]_, + it controls how far in time we can go in order to match a given + point from one time series to a point in another time series. + If None and `global_constraint` is set to "sakoe_chiba", a radius of + 1 is used. + If both `sakoe_chiba_radius` and `itakura_max_slope` are set, + `global_constraint` is used to infer which constraint to use among the + two. In this case, if `global_constraint` corresponds to no global + constraint, a `RuntimeWarning` is raised and no global constraint is + used. + + itakura_max_slope : float or None (default: None) + Maximum slope for the Itakura parallelogram constraint. + If None and `global_constraint` is set to "itakura", a maximum slope + of 2. is used. + If both `sakoe_chiba_radius` and `itakura_max_slope` are set, + `global_constraint` is used to infer which constraint to use among the + two. In this case, if `global_constraint` corresponds to no global + constraint, a `RuntimeWarning` is raised and no global constraint is + used. + + be : Backend object or string or None + Backend. If `be` is an instance of the class `NumPyBackend` or the string `"numpy"`, + the NumPy backend is used. + If `be` is an instance of the class `PyTorchBackend` or the string `"pytorch"`, + the PyTorch backend is used. + If `be` is `None`, the backend is determined by the input arrays. + See our :ref:`dedicated user-guide page ` for more information. + + Returns + ------- + float + Similarity score + + Examples + -------- + >>> frechet([1, 2, 3], [1., 2., 2., 3.]) + np.float64(0.0) + >>> frechet([1, 2, 3], [1., 2., 2., 3., 4.]) + np.float64(1.0) + + The PyTorch backend can be used to compute gradients: + + >>> import torch + >>> s1 = torch.tensor([[1.0], [2.0], [3.0]], requires_grad=True) + >>> s2 = torch.tensor([[3.0], [4.0], [-3.0]]) + >>> sim = frechet(s1, s2, be="pytorch") + >>> print(sim) + tensor(6., grad_fn=) + >>> sim.backward() + >>> print(s1.grad) + tensor([[0.], + [0.], + [1.]]) + + >>> s1_2d = torch.tensor([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]], requires_grad=True) + >>> s2_2d = torch.tensor([[3.0, 3.0], [4.0, 4.0], [-3.0, -3.0]]) + >>> sim = frechet(s1_2d, s2_2d, be="pytorch") + >>> print(sim) + tensor(8.4853, grad_fn=) + >>> sim.backward() + >>> print(s1_2d.grad) + tensor([[0.0000, 0.0000], + [0.0000, 0.0000], + [0.7071, 0.7071]]) + + See Also + -------- + frechet_path : Get both the matching path and the similarity score + frechet_path_from_metric : Compute similarity score and path + using a user-defined distance metric + cdist_frechet : Cross similarity matrix between time series datasets + + References + ---------- + .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for + spoken word recognition," IEEE Transactions on Acoustics, Speech and + Signal Processing, vol. 26(1), pp. 43--49, 1978. + + """ # noqa: E501 + return _frechet( + s1, + s2, + global_constraint, + sakoe_chiba_radius, + itakura_max_slope, + be + )[1] + + +def _frechet( + s1, + s2, + global_constraint=None, + sakoe_chiba_radius=None, + itakura_max_slope=None, + be=None, + return_path=False, + metric="euclidean", + **kwds +): + backend = instantiate_backend(be, s1, s2) + + if metric != "precomputed": + + s1 = to_time_series(s1, remove_nans=True, be=backend) + s2 = to_time_series(s2, remove_nans=True, be=backend) + + if s1.shape[0] == 0 or s2.shape[0] == 0: + raise ValueError( + "One of the input time series contains only nans or has zero length." + ) + + if s1.shape[1] != s2.shape[1]: + raise ValueError("All input time series must have the same feature size.") + + else: + s1 = backend.array(s1) + s2 = s1 + + mask = compute_mask( + s1, + s2, + GLOBAL_CONSTRAINT_CODE[global_constraint], + sakoe_chiba_radius=sakoe_chiba_radius, + itakura_max_slope=itakura_max_slope + ) + + distance_matrix = None + if metric == "precomputed": + distance_matrix = s1 + elif metric not in ["euclidean", "sqeuclidean"] and backend.is_numpy: + distance_matrix = backend.pairwise_distances(s1, s2, metric=metric, **kwds) + + if distance_matrix is not None: + if backend.is_numpy: + acc_matrix = _njit_frechet_accumulated_matrix_from_distance_matrix(distance_matrix, mask) + else: + acc_matrix = _frechet_accumulated_matrix_from_distance_matrix(distance_matrix, mask, backend) + else: + if backend.is_numpy: + acc_matrix = _njit_frechet_accumulated_matrix(s1, s2, mask, squared=metric=="sqeuclidean") + else: + acc_matrix = _frechet_accumulated_matrix(s1, s2, mask, backend, metric, **kwds) + + path = None + if return_path: + if backend.is_numpy: + path = _njit_return_path(acc_matrix) + else: + path = _return_path(acc_matrix, be=backend) + + return path, acc_matrix[-1, -1] + + +@njit +def _njit_frechet_accumulated_matrix(s1, s2, mask, squared=True): + l1, l2 = s1.shape[0], s2.shape[0] + + acc_matrix = numpy.full((l1 + 1, l2 + 1), numpy.inf) + acc_matrix[0, 0] = 0 + + for i in range(l1): + for j in range(l2): + if mask[i, j]: + local_distance = numpy.linalg.norm(s1[i] - s2[j]) + if squared: + local_distance = local_distance ** 2 + acc_matrix[i + 1, j + 1] = max( + local_distance, + min(acc_matrix[i, j + 1], + acc_matrix[i + 1, j], + acc_matrix[i, j]) + ) + + return acc_matrix[1:, 1:] + + +def _frechet_accumulated_matrix(s1, s2, mask, backend, metric, **kwds): + l1, l2 = s1.shape[0], s2.shape[0] + + acc_matrix = backend.full((l1 + 1, l2 + 1), numpy.inf) + acc_matrix[0, 0] = 0 + + for i in range(l1): + for j in range(l2): + if mask[i, j]: + local_distance = ( + backend.pairwise_distances(s1[i].reshape(1, -1), + s2[j].reshape(1, -1), + metric=metric, + **kwds) + ) + acc_matrix[i + 1, j + 1] = max( + local_distance, + min(acc_matrix[i, j + 1], + acc_matrix[i + 1, j], + acc_matrix[i, j]) + ) + return acc_matrix[1:, 1:] + + +@njit +def _njit_frechet_accumulated_matrix_from_distance_matrix(distance_matrix, mask): + l1, l2 = distance_matrix.shape[0], distance_matrix.shape[1] + + acc_matrix = numpy.full((l1 + 1, l2 + 1), numpy.inf) + acc_matrix[0, 0] = 0 + + for i in range(l1): + for j in range(l2): + if mask[i, j]: + acc_matrix[i + 1, j + 1] = max( + distance_matrix[i, j], + min(acc_matrix[i, j + 1], + acc_matrix[i + 1, j], + acc_matrix[i, j]) + ) + + return acc_matrix[1:, 1:] + + +def _frechet_accumulated_matrix_from_distance_matrix(distance_matrix, mask, backend): + l1, l2 = distance_matrix.shape[0], distance_matrix.shape[1] + + acc_matrix = backend.full((l1 + 1, l2 + 1), numpy.inf) + acc_matrix[0, 0] = 0 + + for i in range(l1): + for j in range(l2): + if mask[i, j]: + acc_matrix[i + 1, j + 1] = max( + distance_matrix[i, j], + min(acc_matrix[i, j + 1], + acc_matrix[i + 1, j], + acc_matrix[i, j]) + ) + return acc_matrix[1:, 1:] + +def frechet_path( + s1, + s2, + global_constraint=None, + sakoe_chiba_radius=None, + itakura_max_slope=None, + be=None, +): + r"""Compute Frechet similarity measure between + (possibly multidimensional) time series and an optimal alignment path. + + Frechet distance is computed as the maximium distance between aligned time series, + i.e., if :math:`\pi` is the optimal alignment path: + + .. math:: + + Frechet(X, Y) = \max_{(i, j) \in \pi} \|X_{i} - Y_{j}\| + + It is not required that both time series share the same size, but they must + be the same dimension. + + Parameters + ---------- + s1 : array-like, shape=(sz1, d) or (sz1,) + A time series. If shape is (sz1,), the time series is assumed to be univariate. + s2 : array-like, shape=(sz2, d) or (sz2,) + Another time series. If shape is (sz2,), the time series is assumed to be univariate. + global_constraint : {"itakura", "sakoe_chiba"} or None (default: None) + Global constraint to restrict admissible paths for Frechet distance. + sakoe_chiba_radius : int or None (default: None) + Radius to be used for Sakoe-Chiba band global constraint. + The Sakoe-Chiba radius corresponds to the parameter :math:`\delta` mentioned in [1]_, + it controls how far in time we can go in order to match a given + point from one time series to a point in another time series. + If None and `global_constraint` is set to "sakoe_chiba", a radius of + 1 is used. + If both `sakoe_chiba_radius` and `itakura_max_slope` are set, + `global_constraint` is used to infer which constraint to use among the + two. In this case, if `global_constraint` corresponds to no global + constraint, a `RuntimeWarning` is raised and no global constraint is + used. + itakura_max_slope : float or None (default: None) + Maximum slope for the Itakura parallelogram constraint. + If None and `global_constraint` is set to "itakura", a maximum slope + of 2. is used. + If both `sakoe_chiba_radius` and `itakura_max_slope` are set, + `global_constraint` is used to infer which constraint to use among the + two. In this case, if `global_constraint` corresponds to no global + constraint, a `RuntimeWarning` is raised and no global constraint is + used. + be : Backend object or string or None + Backend. If `be` is an instance of the class `NumPyBackend` or the string `"numpy"`, + the NumPy backend is used. + If `be` is an instance of the class `PyTorchBackend` or the string `"pytorch"`, + the PyTorch backend is used. + If `be` is `None`, the backend is determined by the input arrays. + See our :ref:`dedicated user-guide page ` for more information. + + Returns + ------- + list of integer pairs + Matching path represented as a list of index pairs. In each pair, the + first index corresponds to s1 and the second one corresponds to s2. + + float + Similarity score + + Examples + -------- + >>> path, dist = frechet_path([1, 2, 3], [1., 2., 2., 3.]) + >>> path + [(0, 0), (1, 1), (1, 2), (2, 3)] + >>> float(dist) + 0.0 + >>> float(frechet_path([1, 2, 3], [1., 2., 2., 3., 4.])[1]) + 1.0 + + See Also + -------- + frechet : Get only the similarity score + frechet_path_from_metric : Compute similarity score and path + using a user-defined distance metric + cdist_frechet : Cross similarity matrix between time series datasets + + References + ---------- + .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for + spoken word recognition," IEEE Transactions on Acoustics, Speech and + Signal Processing, vol. 26(1), pp. 43--49, 1978. + + """ # noqa: E501 + return _frechet( + s1, + s2, + global_constraint, + sakoe_chiba_radius, + itakura_max_slope, + be, + return_path=True + ) + +def frechet_path_from_metric( + s1, + s2=None, + metric="precomputed", + global_constraint=None, + sakoe_chiba_radius=None, + itakura_max_slope=None, + be=None, + **kwds +): + r"""Compute Frechet similarity measure and an optimal alignment path + between (possibly multidimensional) time series using a distance metric + defined by the user. + + It is not required that both time series share the same size, but they must + be the same dimension. + + When using Pytorch backend only "precomputed", "euclidean", "sqeuclidean" + and callable metrics are available. + Otherwise, valid values for metric are the same as for scikit-learn + `pairwise_distances`_ function i.e. a string (e.g. "euclidean", + "sqeuclidean", "hamming") or a function that is used to compute the + pairwise distances. See `scikit`_ and `scipy`_ documentations for more + information about the available metrics. + + Parameters + ---------- + s1 : array-like, shape=(sz1, d) or (sz1,) if metric!="precomputed", (sz1, sz2) otherwise + A time series or an array of pairwise distances between samples. + If shape is (sz1,), the time series is assumed to be univariate. + + s2 : array-like, shape=(sz2, d) or (sz2,), optional (default: None) + A second time series, only used if metric != "precomputed". + If shape is (sz2,), the time series is assumed to be univariate. + + metric : string or callable (default: "precomputed") + If metric is "precomputed", `s1` is assumed to be a distance matrix. + + Otherwise, function used to compute the pairwise distances between each + points of `s1` and `s2`. + If metric is a string, it must be one of the options compatible + with sklearn.metrics.pairwise_distances. + Alternatively, if metric is a callable function, it is called on pairs + of rows of `s1` and `s2`. The callable should take two 1 dimensional + arrays as input and return a value indicating the distance between + them. + + global_constraint : {"itakura", "sakoe_chiba"} or None (default: None) + Global constraint to restrict admissible paths for Frechet. + + sakoe_chiba_radius : int or None (default: None) + Radius to be used for Sakoe-Chiba band global constraint. + The Sakoe-Chiba radius corresponds to the parameter :math:`\delta` mentioned in [1]_, + it controls how far in time we can go in order to match a given + point from one time series to a point in another time series. + If None and `global_constraint` is set to "sakoe_chiba", a radius of + 1 is used. + If both `sakoe_chiba_radius` and `itakura_max_slope` are set, + `global_constraint` is used to infer which constraint to use among the + two. In this case, if `global_constraint` corresponds to no global + constraint, a `RuntimeWarning` is raised and no global constraint is + used. + + itakura_max_slope : float or None (default: None) + Maximum slope for the Itakura parallelogram constraint. + If None and `global_constraint` is set to "itakura", a maximum slope + of 2. is used. + If both `sakoe_chiba_radius` and `itakura_max_slope` are set, + `global_constraint` is used to infer which constraint to use among the + two. In this case, if `global_constraint` corresponds to no global + constraint, a `RuntimeWarning` is raised and no global constraint is + used. + + be : Backend object or string or None + Backend. If `be` is an instance of the class `NumPyBackend` or the string `"numpy"`, + the NumPy backend is used. + If `be` is an instance of the class `PyTorchBackend` or the string `"pytorch"`, + the PyTorch backend is used. + If `be` is `None`, the backend is determined by the input arrays. + See our :ref:`dedicated user-guide page ` for more information. + + **kwds + Additional arguments to pass to sklearn pairwise_distances to compute + the pairwise distances. + + Returns + ------- + list of integer pairs + Matching path represented as a list of index pairs. In each pair, the + first index corresponds to s1 and the second one corresponds to s2. + + float + Similarity score (sum of metric along the wrapped time series). + + Examples + -------- + Lets create 2 numpy arrays to wrap: + + >>> import numpy as np + >>> rng = np.random.RandomState(0) + >>> s1, s2 = rng.rand(5, 2), rng.rand(6, 2) + + The wrapping can be done by passing a string indicating the metric to pass + to scikit-learn pairwise_distances: + + >>> x, y = frechet_path_from_metric(s1, s2, + ... metric="sqeuclidean") # doctest: +ELLIPSIS + >>> x, float(y) + ([(0, 0), (1, 0), (2, 1), (2, 2), (2, 3), (3, 4), (4, 5)], 0.4365...) + + Or by defining a custom distance function: + + >>> sqeuclidean = lambda x, y: np.sum((x-y)**2) + >>> x, y = frechet_path_from_metric(s1, s2, metric=sqeuclidean) # doctest: +ELLIPSIS + >>> x, float(y) + ([(0, 0), (1, 0), (2, 1), (2, 2), (2, 3), (3, 4), (4, 5)], 0.4365...) + + Or by using a precomputed distance matrix as input: + + >>> from sklearn.metrics.pairwise import pairwise_distances + >>> dist_matrix = pairwise_distances(s1, s2, metric="sqeuclidean") + >>> x, y = frechet_path_from_metric(dist_matrix, + ... metric="precomputed") # doctest: +ELLIPSIS + >>> x, float(y) + ([(0, 0), (1, 0), (2, 1), (2, 2), (2, 3), (3, 4), (4, 5)], 0.4365...) + + Notes + ----- + By using a squared euclidean distance metric as shown above, the output + path is the same as the one obtained by using frechet_path but the similarity + score is the sum of squared distances instead of the euclidean distance. + + See Also + -------- + frechet : Get only the similarity score + frechet_path : Get both the matching path and the similarity score + cdist_frechet : Cross similarity matrix between time series datasets + + References + ---------- + .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for + spoken word recognition," IEEE Transactions on Acoustics, Speech and + Signal Processing, vol. 26(1), pp. 43--49, 1978. + + .. _pairwise_distances: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise_distances.html + + .. _scikit: https://scikit-learn.org/stable/modules/metrics.html + + .. _scipy: https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html + + """ # noqa: E501 + return _frechet( + s1, + s2, + global_constraint, + sakoe_chiba_radius, + itakura_max_slope, + be, + return_path=True, + metric=metric, + **kwds + ) + +def cdist_frechet( + dataset1, + dataset2=None, + global_constraint=None, + sakoe_chiba_radius=None, + itakura_max_slope=None, + n_jobs=None, + verbose=0, + be=None, +): + r"""Compute cross-similarity matrix using Frechet + similarity measure. + + Frechet is computed as the maximum distance between aligned time series, + i.e., if :math:`\pi` is an optimal alignment path: + + .. math:: + + Frechet(X, Y) = \max_{(i, j) \in \pi} \|X_{i} - Y_{j}\| + + Note that this formula is still valid for the multivariate case. + + It is not required that time series share the same size, but they + must be the same dimension. + + Parameters + ---------- + dataset1 : array-like, shape=(n_ts1, sz1, d) or (n_ts1, sz1) or (sz1,) + A dataset of time series. + If shape is (n_ts1, sz1), the dataset is composed of univariate time series. + If shape is (sz1,), the dataset is composed of a unique univariate time series. + + dataset2 : None or array-like, shape=(n_ts2, sz2, d) or (n_ts2, sz2) or (sz2,) (default: None) + Another dataset of time series. If `None`, self-similarity of + `dataset1` is returned. + If shape is (n_ts2, sz2), the dataset is composed of univariate time series. + If shape is (sz2,), the dataset is composed of a unique univariate time series. + + global_constraint : {"itakura", "sakoe_chiba"} or None (default: None) + Global constraint to restrict admissible paths for Frechet. + + sakoe_chiba_radius : int or None (default: None) + Radius to be used for Sakoe-Chiba band global constraint. + The Sakoe-Chiba radius corresponds to the parameter :math:`\delta` mentioned in [1]_, + it controls how far in time we can go in order to match a given + point from one time series to a point in another time series. + If None and `global_constraint` is set to "sakoe_chiba", a radius of + 1 is used. + If both `sakoe_chiba_radius` and `itakura_max_slope` are set, + `global_constraint` is used to infer which constraint to use among the + two. In this case, if `global_constraint` corresponds to no global + constraint, a `RuntimeWarning` is raised and no global constraint is + used. + + itakura_max_slope : float or None (default: None) + Maximum slope for the Itakura parallelogram constraint. + If None and `global_constraint` is set to "itakura", a maximum slope + of 2. is used. + If both `sakoe_chiba_radius` and `itakura_max_slope` are set, + `global_constraint` is used to infer which constraint to use among the + two. In this case, if `global_constraint` corresponds to no global + constraint, a `RuntimeWarning` is raised and no global constraint is + used. + + n_jobs : int or None, optional (default=None) + The number of jobs to run in parallel. + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. See scikit-learns' + `Glossary `__ + for more details. + + verbose : int, optional (default=0) + The verbosity level: if non zero, progress messages are printed. + Above 50, the output is sent to stdout. + The frequency of the messages increases with the verbosity level. + If it more than 10, all iterations are reported. + `Glossary `__ + for more details. + + be : Backend object or string or None + Backend. If `be` is an instance of the class `NumPyBackend` or the string `"numpy"`, + the NumPy backend is used. + If `be` is an instance of the class `PyTorchBackend` or the string `"pytorch"`, + the PyTorch backend is used. + If `be` is `None`, the backend is determined by the input arrays. + See our :ref:`dedicated user-guide page ` for more information. + + Returns + ------- + cdist : array-like, shape=(n_ts1, n_ts2) + Cross-similarity matrix. + + Examples + -------- + >>> cdist_frechet([[1, 2, 2, 3], [1., 2., 3., 4.]]) + array([[0., 1.], + [1., 0.]]) + >>> cdist_frechet([[1, 2, 2, 3], [1., 2., 3., 4.]], [[1, 2, 3], [2, 3, 4, 5]]) + array([[0., 2.], + [1., 1.]]) + + See Also + -------- + frechet : Get only the similarity score + frechet_path : Get both the matching path and the similarity score + frechet_path_from_metric : Compute Frechet similarity score and path + using a user-defined distance metric + + References + ---------- + .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for + spoken word recognition," IEEE Transactions on Acoustics, Speech and + Signal Processing, vol. 26(1), pp. 43--49, 1978. + """ # noqa: E501 + return _cdist_generic( + dist_fun=frechet, + dataset1=dataset1, + dataset2=dataset2, + n_jobs=n_jobs, + verbose=verbose, + compute_diagonal=False, + global_constraint=global_constraint, + sakoe_chiba_radius=sakoe_chiba_radius, + itakura_max_slope=itakura_max_slope, + be=be, + ) diff --git a/tslearn/neighbors/neighbors.py b/tslearn/neighbors/neighbors.py index 7eee8c946..8fbc6dce2 100644 --- a/tslearn/neighbors/neighbors.py +++ b/tslearn/neighbors/neighbors.py @@ -1,16 +1,28 @@ import numpy + +from scipy.spatial.distance import cdist as scipy_cdist + from sklearn import neighbors from sklearn.neighbors import (KNeighborsClassifier, NearestNeighbors, KNeighborsRegressor) from sklearn.utils.validation import check_is_fitted -from scipy.spatial.distance import cdist as scipy_cdist -from tslearn.metrics import cdist_dtw, cdist_ctw, cdist_soft_dtw, \ - cdist_sax, TSLEARN_VALID_METRICS -from tslearn.piecewise import SymbolicAggregateApproximation -from tslearn.utils import (to_time_series_dataset, to_sklearn_dataset, - check_array, check_dims) from tslearn.bases import BaseModelPackage +from tslearn.metrics import ( + cdist_dtw, + cdist_ctw, + cdist_soft_dtw, + cdist_sax, + cdist_frechet, + TSLEARN_VALID_METRICS +) +from tslearn.piecewise import SymbolicAggregateApproximation +from tslearn.utils import ( + to_time_series_dataset, + to_sklearn_dataset, + check_array, + check_dims +) neighbors.VALID_METRICS['brute'].extend(['dtw', 'softdtw', 'sax', 'ctw']) @@ -60,17 +72,30 @@ def _precompute_cross_dist(self, X, other_X=None): X = to_time_series_dataset(X) if self._ts_metric == "dtw": - X_ = cdist_dtw(X, other_X, n_jobs=self.n_jobs, - **metric_params) + X_ = cdist_dtw( + X, + other_X, + n_jobs=self.n_jobs, + **metric_params) elif self._ts_metric == "ctw": X_ = cdist_ctw(X, other_X, **metric_params) elif self._ts_metric == "softdtw": X_ = cdist_soft_dtw(X, other_X, **metric_params) elif self._ts_metric == "sax": X = self._sax_preprocess(X, **metric_params) - X_ = cdist_sax(X, self._sax.breakpoints_avg_, - self._sax._X_fit_dims_[1], other_X, - n_jobs=self.n_jobs) + X_ = cdist_sax( + X, + self._sax.breakpoints_avg_, + self._sax._X_fit_dims_[1], + other_X, + n_jobs=self.n_jobs) + elif self._ts_metric == "frechet": + X_ = cdist_frechet( + X, + other_X, + n_jobs=self.n_jobs, + **metric_params + ) else: raise ValueError("Invalid metric recorded: %s" % self._ts_metric) @@ -331,12 +356,25 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True): X = check_dims(X, X_fit_dims=self._ts_fit.shape, extend=True, check_n_features_only=True) if self._ts_metric == "dtw": - X_ = cdist_dtw(X, self._ts_fit, n_jobs=self.n_jobs, - verbose=self.verbose, **metric_params) + X_ = cdist_dtw( + X, + self._ts_fit, + n_jobs=self.n_jobs, + verbose=self.verbose, + **metric_params + ) elif self._ts_metric == "ctw": X_ = cdist_ctw(X, self._ts_fit, **metric_params) elif self._ts_metric == "softdtw": X_ = cdist_soft_dtw(X, self._ts_fit, **metric_params) + elif self._ts_metric == "frechet": + X_ = cdist_frechet( + X, + self._ts_fit, + n_jobs=self.n_jobs, + verbose=self.verbose, + **metric_params + ) else: raise ValueError("Invalid metric recorded: %s" % self._ts_metric) diff --git a/tslearn/tests/test_metrics.py b/tslearn/tests/test_metrics.py index 20d93c41f..cc745c72f 100644 --- a/tslearn/tests/test_metrics.py +++ b/tslearn/tests/test_metrics.py @@ -806,3 +806,112 @@ def soft_dtw_loss_function(x, y, dist_func, gamma): axis=2, ) np.testing.assert_allclose(batch_ts_2.grad, expected_grad_ts2, rtol=5e-5) + + +def test_frechet(): + + np.random.seed(42) + s1 = np.random.randn(10, 4) + s2 = np.random.randn(10, 4) + + for be in backends: + for array_type in array_types: + + path, dist = tslearn.metrics.frechet_path( + cast([1., 2, 3], array_type), + cast([1.0, 2.0, 2.0, 3.0], array_type), + be=be + ) + if be is not None: + assert be.belongs_to_backend(dist) + else: + assert instantiate_backend(array_type).belongs_to_backend(dist) + + np.testing.assert_allclose(dist, [0.]) + assert isinstance(path, list) + assert path == [(0, 0), (1, 1), (1, 2), (2, 3)] + + path, dist = tslearn.metrics.frechet_path([1., 2, 3], [1.0, 0., 2.0, 4.0], be=be) + np.testing.assert_allclose(dist, [1.]) + assert path == [(0, 0), (0, 1), (1, 2), (2, 3)] + + path, dist = tslearn.metrics.frechet_path([1., 2, 3], [-2.0, 5.0, 7.0], be=be) + np.testing.assert_allclose(dist, [4.]) + assert path == [(0, 0), (1, 1), (2, 2)] + + path, dist = tslearn.metrics.frechet_path( + [1, 1, 1, 2, 3], + [1, 2, 2, 2, 3], + global_constraint="sakoe_chiba", + sakoe_chiba_radius=1, + be=be + ) + np.testing.assert_allclose(dist, 1) + assert path == [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)] + + path, dist = tslearn.metrics.frechet_path(s1, s2, be=be) + np.testing.assert_allclose(dist, [3.092], rtol=1e-03) + assert path == [ + (0, 0), (0, 1), (1, 2), (1, 3), (2, 4), (3, 5), + (4, 5), (5, 5), (6, 6), (6, 7), (7, 8), (8, 9), (9, 9) + ] + + dist = tslearn.metrics.frechet(s1, s2, be=be) + np.testing.assert_allclose(dist, [3.092], rtol=1e-03) + + path, dist = tslearn.metrics.frechet_path_from_metric(s1, s2, metric="euclidean", be=be) + np.testing.assert_allclose(dist, [3.092], rtol=1e-03) + + dist_matrix = cast(cdist(s1, s2, metric="sqeuclidean"), array_type) + path, dist = tslearn.metrics.frechet_path_from_metric(dist_matrix, be=be) + np.testing.assert_allclose(dist, [9.560], rtol=1e-03) + + path, dist = tslearn.metrics.frechet_path_from_metric(s1, s2, metric="sqeuclidean", be=be) + np.testing.assert_allclose(dist, [9.560], rtol=1e-03) + + path, dist = tslearn.metrics.frechet_path( + [[1, 1], [2, 2], [3, 3]], + [[1, 1], [2, 2], [3, 3], [2, 2], [3, 3]], + be=be + ) + np.testing.assert_allclose(dist, [1.414], rtol=1e-03) + assert path == [(0, 0), (1, 1), (1, 2), (1, 3), (2, 4)] + + path, dist = tslearn.metrics.frechet_path_from_metric( + [[1, 1], [2, 2], [3, 3]], + [[1, 1], [2, 2], [3, 3], [2, 2], [3, 3]], + metric="euclidean", + be=be + ) + np.testing.assert_allclose(dist, [1.414], rtol=1e-03) + assert path == [(0, 0), (1, 1), (1, 2), (1, 3), (2, 4)] + + path, dist = tslearn.metrics.frechet_path_from_metric( + [[1, 1], [2, 2], [3, 3]], + [[1, 1], [2, 2], [3, 3], [2, 2], [3, 3]], + metric="sqeuclidean", + be=be + ) + np.testing.assert_allclose(dist, [2], rtol=1e-07) + assert path == [(0, 0), (1, 1), (1, 2), (1, 3), (2, 4)] + + path, dist = tslearn.metrics.frechet_path_from_metric( + [[1, 1], [2, 2], [3, 3]], + [[1, 1], [2, 2], [3, 3], [2, 2], [3, 3]], + metric=lambda x, y: sum((x - y) ** 2), + be=be + ) + np.testing.assert_allclose(dist, [2], rtol=1e-07) + assert path == [(0, 0), (1, 1), (1, 2), (1, 3), (2, 4)] + + dists = tslearn.metrics.cdist_frechet( + [[1, 2, 2, 3], [1.0, 2.0, 3.0, 4.0]], + [[1, 2, 3], [2, 3, 4, 5]], + be=be or instantiate_backend(array_type) + ) + np.testing.assert_allclose(dists, [[0.0, 2], [1.0, 1]], atol=1e-5) + if be is not None: + assert be.belongs_to_backend(dists) + else: + assert instantiate_backend(array_type).belongs_to_backend(dists) + diff --git a/tslearn/tests/test_neighbors.py b/tslearn/tests/test_neighbors.py index 300a20464..87948e8e7 100644 --- a/tslearn/tests/test_neighbors.py +++ b/tslearn/tests/test_neighbors.py @@ -34,30 +34,36 @@ def test_constrained_paths(): y_pred_softdtw = model_softdtw.fit(X, y).predict(X) model_dtw = KNeighborsTimeSeriesClassifier( - n_neighbors=3, - metric="dtw" + n_neighbors=3, + metric="dtw" ) y_pred_dtw = model_dtw.fit(X, y).predict(X) np.testing.assert_equal(y_pred_dtw, y_pred_softdtw) model_ctw = KNeighborsTimeSeriesClassifier( - n_neighbors=3, - metric="ctw" + n_neighbors=3, + metric="ctw" ) # Just testing that things run, nothing smart here :( model_ctw.fit(X, y).predict(X) model_sax = KNeighborsTimeSeriesClassifier( - n_neighbors=3, - metric="sax", - metric_params={ - "alphabet_size_avg": 6, - "n_segments": 10 - } + n_neighbors=3, + metric="sax", + metric_params={ + "alphabet_size_avg": 6, + "n_segments": 10 + } ) model_sax.fit(X, y) + model_frechet = KNeighborsTimeSeriesClassifier( + n_neighbors=3, + metric="frechet" + ) + model_frechet.fit(X, y) + # The MINDIST of SAX is a lower bound of the euclidean distance euc_dist, _ = model_euc.kneighbors(X, n_neighbors=5) sax_dist, _ = model_sax.kneighbors(X, n_neighbors=5) From 8e0eebc1624ad04c3780b240a1a1e7cb7ca41964 Mon Sep 17 00:00:00 2001 From: charavelg Date: Wed, 17 Sep 2025 08:47:02 +0200 Subject: [PATCH 02/10] Fix mask handling --- tslearn/metrics/frechet.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tslearn/metrics/frechet.py b/tslearn/metrics/frechet.py index 16c0bf7f3..9d7436dd6 100644 --- a/tslearn/metrics/frechet.py +++ b/tslearn/metrics/frechet.py @@ -1,3 +1,4 @@ +"""Frechet metric toolbox.""" import numpy @@ -214,7 +215,7 @@ def _njit_frechet_accumulated_matrix(s1, s2, mask, squared=True): for i in range(l1): for j in range(l2): - if mask[i, j]: + if numpy.isfinite(mask[i, j]): local_distance = numpy.linalg.norm(s1[i] - s2[j]) if squared: local_distance = local_distance ** 2 @@ -236,7 +237,7 @@ def _frechet_accumulated_matrix(s1, s2, mask, backend, metric, **kwds): for i in range(l1): for j in range(l2): - if mask[i, j]: + if backend.isfinite(mask[i, j]): local_distance = ( backend.pairwise_distances(s1[i].reshape(1, -1), s2[j].reshape(1, -1), @@ -261,7 +262,7 @@ def _njit_frechet_accumulated_matrix_from_distance_matrix(distance_matrix, mask) for i in range(l1): for j in range(l2): - if mask[i, j]: + if numpy.isfinite(mask[i, j]): acc_matrix[i + 1, j + 1] = max( distance_matrix[i, j], min(acc_matrix[i, j + 1], @@ -280,7 +281,7 @@ def _frechet_accumulated_matrix_from_distance_matrix(distance_matrix, mask, back for i in range(l1): for j in range(l2): - if mask[i, j]: + if backend.isfinite(mask[i, j]): acc_matrix[i + 1, j + 1] = max( distance_matrix[i, j], min(acc_matrix[i, j + 1], From d77f4860de28c43cc791e95bac5fc77dfa8ee4c6 Mon Sep 17 00:00:00 2001 From: charavelg Date: Wed, 17 Sep 2025 09:47:35 +0200 Subject: [PATCH 03/10] Fix documentation --- tslearn/metrics/frechet.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tslearn/metrics/frechet.py b/tslearn/metrics/frechet.py index 9d7436dd6..83bd16af4 100644 --- a/tslearn/metrics/frechet.py +++ b/tslearn/metrics/frechet.py @@ -121,7 +121,7 @@ def frechet( -------- frechet_path : Get both the matching path and the similarity score frechet_path_from_metric : Compute similarity score and path - using a user-defined distance metric + using a user-defined distance metric cdist_frechet : Cross similarity matrix between time series datasets References @@ -290,6 +290,7 @@ def _frechet_accumulated_matrix_from_distance_matrix(distance_matrix, mask, back ) return acc_matrix[1:, 1:] + def frechet_path( s1, s2, @@ -371,7 +372,7 @@ def frechet_path( -------- frechet : Get only the similarity score frechet_path_from_metric : Compute similarity score and path - using a user-defined distance metric + using a user-defined distance metric cdist_frechet : Cross similarity matrix between time series datasets References @@ -391,6 +392,7 @@ def frechet_path( return_path=True ) + def frechet_path_from_metric( s1, s2=None, @@ -554,6 +556,7 @@ def frechet_path_from_metric( **kwds ) + def cdist_frechet( dataset1, dataset2=None, @@ -660,7 +663,7 @@ def cdist_frechet( frechet : Get only the similarity score frechet_path : Get both the matching path and the similarity score frechet_path_from_metric : Compute Frechet similarity score and path - using a user-defined distance metric + using a user-defined distance metric References ---------- From 97ddb27b5dadba2d1961070384e870ad8885473d Mon Sep 17 00:00:00 2001 From: charavelg Date: Wed, 17 Sep 2025 10:56:11 +0200 Subject: [PATCH 04/10] Update tests --- tslearn/neighbors/neighbors.py | 1 + tslearn/tests/test_metrics.py | 12 ++++++++ tslearn/tests/test_neighbors.py | 54 +++++++++++++++++++++++++++++++-- 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/tslearn/neighbors/neighbors.py b/tslearn/neighbors/neighbors.py index 8fbc6dce2..e2f8983a6 100644 --- a/tslearn/neighbors/neighbors.py +++ b/tslearn/neighbors/neighbors.py @@ -207,6 +207,7 @@ def __sklearn_tags__(self): tags.input_tags.sparse = False return tags + class KNeighborsTimeSeries(KNeighborsTimeSeriesMixin, NearestNeighbors, BaseModelPackage): """Unsupervised learner for implementing neighbor searches for Time Series. diff --git a/tslearn/tests/test_metrics.py b/tslearn/tests/test_metrics.py index cc745c72f..e7eb51cd9 100644 --- a/tslearn/tests/test_metrics.py +++ b/tslearn/tests/test_metrics.py @@ -810,6 +810,18 @@ def soft_dtw_loss_function(x, y, dist_func, gamma): def test_frechet(): + with pytest.raises(ValueError): + tslearn.metrics.frechet( + [], + np.random.randn(3, 2) + ) + + with pytest.raises(ValueError): + tslearn.metrics.frechet( + np.random.randn(3, 1), + np.random.randn(3, 2) + ) + np.random.seed(42) s1 = np.random.randn(10, 4) s2 = np.random.randn(10, 4) diff --git a/tslearn/tests/test_neighbors.py b/tslearn/tests/test_neighbors.py index 87948e8e7..a79ccfa83 100644 --- a/tslearn/tests/test_neighbors.py +++ b/tslearn/tests/test_neighbors.py @@ -1,10 +1,46 @@ import numpy as np -from tslearn.neighbors import KNeighborsTimeSeriesClassifier + +import pytest + +from tslearn.neighbors import KNeighborsTimeSeries, KNeighborsTimeSeriesClassifier __author__ = 'Romain Tavenard romain.tavenard[at]univ-rennes2.fr' -def test_constrained_paths(): +def test_k_neighbors_timeseries(): + n, sz, d = 15, 10, 3 + rng = np.random.RandomState(0) + X = rng.randn(n, sz, d) + + model = KNeighborsTimeSeries() + np.testing.assert_equal( + model.fit(X).kneighbors(X, return_distance=False)[0], + [0, 13, 7, 12, 3] + ) + + model = KNeighborsTimeSeries(metric='ctw') + np.testing.assert_equal( + model.fit(X).kneighbors(X, return_distance=False)[0], + [0, 13, 7, 12, 3] + ) + + model = KNeighborsTimeSeries(metric='softdtw') + np.testing.assert_equal( + model.fit(X).kneighbors(X, return_distance=False)[0], + [0, 13, 12, 7, 3] + ) + + model = KNeighborsTimeSeries(metric='frechet') + np.testing.assert_equal( + model.fit(X).kneighbors(X, return_distance=False)[0], + [0, 3, 13, 5, 1] + ) + + with pytest.raises(ValueError): + KNeighborsTimeSeries(metric='invalid').fit(X) + + +def test_k_neighbors_classifier(): n, sz, d = 15, 10, 3 rng = np.random.RandomState(0) X = rng.randn(n, sz, d) @@ -58,11 +94,23 @@ def test_constrained_paths(): ) model_sax.fit(X, y) + model_frechet = KNeighborsTimeSeriesClassifier( + n_neighbors=1, + metric="frechet" + ) + np.testing.assert_equal(model_frechet.fit(X, y).predict(X), y) + model_frechet = KNeighborsTimeSeriesClassifier( n_neighbors=3, metric="frechet" ) - model_frechet.fit(X, y) + np.testing.assert_equal( + model_frechet.fit(X, y).kneighbors(X, return_distance=False)[0], + [0, 3, 13] + ) + + with pytest.raises(ValueError): + KNeighborsTimeSeriesClassifier(metric='invalid').fit(X, y) # The MINDIST of SAX is a lower bound of the euclidean distance euc_dist, _ = model_euc.kneighbors(X, n_neighbors=5) From 346c5ab12870ff1a412e2fa226c05f7dc252d9f9 Mon Sep 17 00:00:00 2001 From: charavelg Date: Wed, 17 Sep 2025 11:01:56 +0200 Subject: [PATCH 05/10] Linting --- tslearn/metrics/frechet.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tslearn/metrics/frechet.py b/tslearn/metrics/frechet.py index 83bd16af4..6d8a26005 100644 --- a/tslearn/metrics/frechet.py +++ b/tslearn/metrics/frechet.py @@ -123,7 +123,7 @@ def frechet( frechet_path_from_metric : Compute similarity score and path using a user-defined distance metric cdist_frechet : Cross similarity matrix between time series datasets - + References ---------- .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for @@ -403,8 +403,8 @@ def frechet_path_from_metric( be=None, **kwds ): - r"""Compute Frechet similarity measure and an optimal alignment path - between (possibly multidimensional) time series using a distance metric + r"""Compute Frechet similarity measure and an optimal alignment path + between (possibly multidimensional) time series using a distance metric defined by the user. It is not required that both time series share the same size, but they must @@ -431,7 +431,7 @@ def frechet_path_from_metric( metric : string or callable (default: "precomputed") If metric is "precomputed", `s1` is assumed to be a distance matrix. - Otherwise, function used to compute the pairwise distances between each + Otherwise, function used to compute the pairwise distances between each points of `s1` and `s2`. If metric is a string, it must be one of the options compatible with sklearn.metrics.pairwise_distances. From 921c76ced2a0da8c8ed3cef6b756712d735c2fa6 Mon Sep 17 00:00:00 2001 From: charavelg Date: Wed, 17 Sep 2025 13:01:54 +0200 Subject: [PATCH 06/10] Fix doctests --- tslearn/metrics/frechet.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tslearn/metrics/frechet.py b/tslearn/metrics/frechet.py index 6d8a26005..f370d973e 100644 --- a/tslearn/metrics/frechet.py +++ b/tslearn/metrics/frechet.py @@ -87,10 +87,10 @@ def frechet( Examples -------- - >>> frechet([1, 2, 3], [1., 2., 2., 3.]) - np.float64(0.0) - >>> frechet([1, 2, 3], [1., 2., 2., 3., 4.]) - np.float64(1.0) + >>> float(frechet([1, 2, 3], [1., 2., 2., 3.])) + 0.0 + >>> float(frechet([1, 2, 3], [1., 2., 2., 3., 4.])) + 1.0 The PyTorch backend can be used to compute gradients: From d5983c9fa810155ab3e5991c54a447dcdc51b3e7 Mon Sep 17 00:00:00 2001 From: charavelg Date: Wed, 17 Sep 2025 15:02:56 +0200 Subject: [PATCH 07/10] Fix doctests without torch --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 533f687a9..8b407fa1e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -299,7 +299,7 @@ jobs: set -xe python -m pip install pytest-azurepipelines pip list - python -m pytest -v tslearn/ --doctest-modules -k 'not test_ and not tslearn.metrics.softdtw_variants.soft_dtw and not tslearn.metrics.softdtw_variants.cdist_soft_dtw and not tslearn.metrics.dtw_variants.dtw or tslearn.metrics.dtw_variants.dtw_ or test_all_estimators' + python -m pytest -v tslearn/ --doctest-modules -k 'not test_ and not tslearn.metrics.softdtw_variants.soft_dtw and not tslearn.metrics.frechet.frechet and not tslearn.metrics.softdtw_variants.cdist_soft_dtw and not tslearn.metrics.dtw_variants.dtw or tslearn.metrics.dtw_variants.dtw_ or test_all_estimators' displayName: 'Doctest' - job: 'codecov' # must be a separate job to only disable Numbas's JIT here From e5b9fe7df9521e6539a242f47c3a661f3e8d6c3a Mon Sep 17 00:00:00 2001 From: charavelg Date: Thu, 25 Sep 2025 18:14:59 +0200 Subject: [PATCH 08/10] Clean up --- tslearn/metrics/frechet.py | 36 +++++++++++++++++++++------------- tslearn/neighbors/neighbors.py | 11 +++-------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/tslearn/metrics/frechet.py b/tslearn/metrics/frechet.py index f370d973e..da93f995f 100644 --- a/tslearn/metrics/frechet.py +++ b/tslearn/metrics/frechet.py @@ -23,7 +23,7 @@ def frechet( itakura_max_slope=None, be=None ): - r"""Compute Frechet similarity measure between + r"""Compute Frechet similarity [1]_ measure between (possibly multidimensional) time series and return it. Frechet similarity score is computed as the maximum distance between @@ -51,7 +51,7 @@ def frechet( sakoe_chiba_radius : int or None (default: None) Radius to be used for Sakoe-Chiba band global constraint. - The Sakoe-Chiba radius corresponds to the parameter :math:`\delta` mentioned in [1]_, + The Sakoe-Chiba radius corresponds to the parameter :math:`\delta` mentioned in [2]_, it controls how far in time we can go in order to match a given point from one time series to a point in another time series. If None and `global_constraint` is set to "sakoe_chiba", a radius of @@ -126,7 +126,9 @@ def frechet( References ---------- - .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for + .. [1] FRÉCHET, M. "Sur quelques points du calcul fonctionnel. + Rendiconti del Circolo Mathematico di Palermo", 22, 1–74, 1906. + .. [2] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for spoken word recognition," IEEE Transactions on Acoustics, Speech and Signal Processing, vol. 26(1), pp. 43--49, 1978. @@ -299,7 +301,7 @@ def frechet_path( itakura_max_slope=None, be=None, ): - r"""Compute Frechet similarity measure between + r"""Compute Frechet similarity measure [1]_ between (possibly multidimensional) time series and an optimal alignment path. Frechet distance is computed as the maximium distance between aligned time series, @@ -322,7 +324,7 @@ def frechet_path( Global constraint to restrict admissible paths for Frechet distance. sakoe_chiba_radius : int or None (default: None) Radius to be used for Sakoe-Chiba band global constraint. - The Sakoe-Chiba radius corresponds to the parameter :math:`\delta` mentioned in [1]_, + The Sakoe-Chiba radius corresponds to the parameter :math:`\delta` mentioned in [2]_, it controls how far in time we can go in order to match a given point from one time series to a point in another time series. If None and `global_constraint` is set to "sakoe_chiba", a radius of @@ -377,7 +379,9 @@ def frechet_path( References ---------- - .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for + .. [1] FRÉCHET, M. "Sur quelques points du calcul fonctionnel. + Rendiconti del Circolo Mathematico di Palermo", 22, 1–74, 1906. + .. [2] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for spoken word recognition," IEEE Transactions on Acoustics, Speech and Signal Processing, vol. 26(1), pp. 43--49, 1978. @@ -403,7 +407,7 @@ def frechet_path_from_metric( be=None, **kwds ): - r"""Compute Frechet similarity measure and an optimal alignment path + r"""Compute Frechet similarity measure and an optimal alignment path [1]_ between (possibly multidimensional) time series using a distance metric defined by the user. @@ -445,7 +449,7 @@ def frechet_path_from_metric( sakoe_chiba_radius : int or None (default: None) Radius to be used for Sakoe-Chiba band global constraint. - The Sakoe-Chiba radius corresponds to the parameter :math:`\delta` mentioned in [1]_, + The Sakoe-Chiba radius corresponds to the parameter :math:`\delta` mentioned in [2]_, it controls how far in time we can go in order to match a given point from one time series to a point in another time series. If None and `global_constraint` is set to "sakoe_chiba", a radius of @@ -533,9 +537,11 @@ def frechet_path_from_metric( References ---------- - .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for - spoken word recognition," IEEE Transactions on Acoustics, Speech and - Signal Processing, vol. 26(1), pp. 43--49, 1978. + .. [1] FRÉCHET, M. "Sur quelques points du calcul fonctionnel. + Rendiconti del Circolo Mathematico di Palermo", 22, 1–74, 1906. + .. [2] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for + spoken word recognition," IEEE Transactions on Acoustics, Speech and + Signal Processing, vol. 26(1), pp. 43--49, 1978. .. _pairwise_distances: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise_distances.html @@ -568,7 +574,7 @@ def cdist_frechet( be=None, ): r"""Compute cross-similarity matrix using Frechet - similarity measure. + similarity measure [1]_. Frechet is computed as the maximum distance between aligned time series, i.e., if :math:`\pi` is an optimal alignment path: @@ -600,7 +606,7 @@ def cdist_frechet( sakoe_chiba_radius : int or None (default: None) Radius to be used for Sakoe-Chiba band global constraint. - The Sakoe-Chiba radius corresponds to the parameter :math:`\delta` mentioned in [1]_, + The Sakoe-Chiba radius corresponds to the parameter :math:`\delta` mentioned in [2]_, it controls how far in time we can go in order to match a given point from one time series to a point in another time series. If None and `global_constraint` is set to "sakoe_chiba", a radius of @@ -667,7 +673,9 @@ def cdist_frechet( References ---------- - .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for + .. [1] FRÉCHET, M. "Sur quelques points du calcul fonctionnel. + Rendiconti del Circolo Mathematico di Palermo", 22, 1–74, 1906. + .. [2] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for spoken word recognition," IEEE Transactions on Acoustics, Speech and Signal Processing, vol. 26(1), pp. 43--49, 1978. """ # noqa: E501 diff --git a/tslearn/neighbors/neighbors.py b/tslearn/neighbors/neighbors.py index e2f8983a6..412caaa63 100644 --- a/tslearn/neighbors/neighbors.py +++ b/tslearn/neighbors/neighbors.py @@ -2,7 +2,6 @@ from scipy.spatial.distance import cdist as scipy_cdist -from sklearn import neighbors from sklearn.neighbors import (KNeighborsClassifier, NearestNeighbors, KNeighborsRegressor) from sklearn.utils.validation import check_is_fitted @@ -24,8 +23,6 @@ check_dims ) -neighbors.VALID_METRICS['brute'].extend(['dtw', 'softdtw', 'sax', 'ctw']) - class KNeighborsTimeSeriesMixin(): """Mixin for k-neighbors searches on Time Series.""" @@ -147,9 +144,7 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True): else: fit_X = self._X_fit - if (self.metric in TSLEARN_VALID_METRICS or - self.metric in [cdist_dtw, cdist_ctw, - cdist_soft_dtw, cdist_sax]): + if self.metric in TSLEARN_VALID_METRICS: full_dist_matrix = self._precompute_cross_dist(X, other_X=fit_X) elif self.metric in ["euclidean", "sqeuclidean", "cityblock"]: @@ -160,8 +155,8 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True): else: raise ValueError("Unrecognized time series metric string: %s " "(should be one of 'dtw', 'softdtw', " - "'sax', 'euclidean', 'sqeuclidean' " - "or 'cityblock')" % self.metric) + "'frechet', 'sax', 'euclidean', 'sqeuclidean'" + " or 'cityblock')" % self.metric) # Code similar to sklearn (sklearn/neighbors/base.py), to make sure # that TimeSeriesKNeighbor~(metric='euclidean') has the same results as From 45a06bc79b1a104ef08db578ef11190287ae5ae5 Mon Sep 17 00:00:00 2001 From: charavelg Date: Mon, 29 Sep 2025 11:53:38 +0200 Subject: [PATCH 09/10] Add example --- docs/_static/custom.css | 35 ++-------------- docs/conf.py | 2 + docs/examples/metrics/plot_frechet.py | 59 +++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 32 deletions(-) create mode 100644 docs/examples/metrics/plot_frechet.py diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 41ce56b42..4523ed549 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,33 +1,4 @@ -code { - color: #055781; -} - -table.longtable tbody tr:nth-child(even) { - background-color: #FDFDFD; - -} -table.longtable tbody tr:nth-child(odd) { - background-color: #F0F7FA; -} - -table.longtable tbody tr { - border-style: solid none solid none; - border-width: 1px 0 1px 0; - border-color: #ddd; -} - -table.longtable tbody td { - border-color: #ddd; -} - -.alert-info { - background-color: #d2edf6; - border-color: #d2edf6; - color: #555555; -} - -@media screen and (min-width: 768px) and (max-width: 992px) { - .navbar-form .form-control { - display: none; - } +.sphx-glr-thumbcontainer{ + text-align: center; + justify-content: center; } \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index cab0110f5..db8059794 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -83,6 +83,7 @@ def matplotlib_svg_scraper(*args, **kwargs): 'subsection_order': ["examples/metrics", "examples/neighbors", "examples/clustering", "examples/classification", "examples/autodiff", "examples/misc"].index, + 'within_subsection_order': "FileNameSortKey", 'image_scrapers': (matplotlib_svg_scraper,), } @@ -237,6 +238,7 @@ def matplotlib_svg_scraper(*args, **kwargs): # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] +html_css_files = ['custom.css'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied diff --git a/docs/examples/metrics/plot_frechet.py b/docs/examples/metrics/plot_frechet.py new file mode 100644 index 000000000..308b4adc9 --- /dev/null +++ b/docs/examples/metrics/plot_frechet.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +""" +Frechet +====================== + +This example illustrates the use of Frechet distance between time +series and plots the matches obtained by the method [1]_ compared to DTW. + +The Frechet distance is plotted in red: + +.. math:: + + Frechet(X, Y) = \max_{(i, j) \in \pi} \|X_{i} - Y_{j}\| + +.. [1] FRÉCHET, M. "Sur quelques points du calcul fonctionnel. + Rendiconti del Circolo Mathematico di Palermo", 22, 1–74, 1906. +""" + +# License: BSD 3 clause + +import matplotlib.pyplot as plt +import numpy as np + +from tslearn.metrics import frechet_path, dtw_path + +np.random.seed(42) + +nb_points = 100 +angle1 = 0.25*np.linspace(0, 4*np.pi, nb_points) +s1 = np.sin(angle1) + 0.1 * np.random.rand(nb_points) + 1 +angle2 = np.linspace(0, 2 * np.pi, nb_points) +s2 = 0.5 * np.sin(angle2) + 0.1 * np.random.rand(nb_points) + +path_dtw, _ = dtw_path(s1, s2) +path_frechet, distance_frechet = frechet_path(s1, s2) + +plt.figure(figsize=(8, 4)) +ax = plt.subplot(1, 2, 1) +ax.plot(s1) +ax.plot(s2) +for (i, j) in path_frechet: + is_max = np.linalg.norm(s1[i] - s2[j]) == distance_frechet + ax.plot( + [i, j], + [s1[i], s2[j]], + 'rd:' if is_max else 'k--', + alpha=1 if is_max else 0.1 + ) +ax.set_title("Frechet") + +ax = plt.subplot(1, 2, 2) +ax.plot(s1) +ax.plot(s2) +for (i, j) in path_dtw: + ax.plot([i, j],[s1[i], s2[j]], 'k--', alpha=0.1) +ax.set_title("DTW") + +plt.tight_layout() +plt.show() From f4017963e02b29989b086d304352d8cf19d751a8 Mon Sep 17 00:00:00 2001 From: charavelg Date: Mon, 29 Sep 2025 12:10:16 +0200 Subject: [PATCH 10/10] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a1ccb9c7d..3cb885be7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Changelogs for this project are recorded in this file since v0.2.0. * `per_timeseries` and `per_feature` options for min-max and mean-variance scalers ([#536](https://github.com/tslearn-team/tslearn/issues/536)) * `TimeSeriesImputer`class: missing value imputer for time series ([#564](https://github.com/tslearn-team/tslearn/issues/564)) +* Frechet metrics and KNeighbors integration ([#402](https://github.com/tslearn-team/tslearn/issues/402) ## [v0.6.4]