diff --git a/.all-contributorsrc b/.all-contributorsrc
index ceaf4768928..eaac5917150 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -2405,6 +2405,15 @@
"bug",
"code"
]
+ },
+ {
+ "login": "pirnerjonas",
+ "name": "Jonas Pirner",
+ "avatar_url": "https://avatars.githubusercontent.com/u/48887249?v=4",
+ "profile": "https://github.com/pirnerjonas",
+ "contributions": [
+ "doc"
+ ]
}
]
}
diff --git a/.github/actions/test-base/action.yml b/.github/actions/test-base/action.yml
index 8c131fd4b75..78b802be054 100644
--- a/.github/actions/test-base/action.yml
+++ b/.github/actions/test-base/action.yml
@@ -4,6 +4,12 @@ inputs:
python-version-identifier:
description: python version to run tests
required: true
+ sub-sample-estimators:
+ description: test only subset of estimators
+ required: true
+ test-affected-estimators:
+ description: test only modified estimators
+ required: true
runs:
using: composite
steps:
@@ -17,7 +23,7 @@ runs:
run: python3 -m pip install .[tests]
shell: bash
- name: unit test step
- run: python3 -m pytest sktime/base
+ run: python3 -m pytest sktime/base --matrixdesign ${{ inputs.sub-sample-estimators }} --only_changed_modules ${{ inputs.test-affected-estimators }}
shell: bash
- name: test coverage step
uses: codecov/codecov-action@v3
diff --git a/.github/actions/test-component/action.yml b/.github/actions/test-component/action.yml
index 22446c1eee8..6e089e5b737 100644
--- a/.github/actions/test-component/action.yml
+++ b/.github/actions/test-component/action.yml
@@ -7,6 +7,12 @@ inputs:
python-version-identifier:
description: python version to run tests
required: true
+ sub-sample-estimators:
+ description: test only subset of estimators
+ required: true
+ test-affected-estimators:
+ description: test only modified estimators
+ required: true
runs:
using: composite
steps:
@@ -20,7 +26,7 @@ runs:
run: python3 -m pip install .[${{ inputs.sktime-component-identifier }},tests]
shell: bash
- name: unit test step
- run: python3 -m pytest sktime/${{ inputs.sktime-component-identifier }}
+ run: python3 -m pytest sktime/${{ inputs.sktime-component-identifier }} --matrixdesign ${{ inputs.sub-sample-estimators }} --only_changed_modules ${{ inputs.test-affected-estimators }}
shell: bash
- name: test coverage step
uses: codecov/codecov-action@v3
diff --git a/.github/workflows/test_all.yml b/.github/workflows/test_all.yml
index 5f3c0c33390..371cea14e22 100644
--- a/.github/workflows/test_all.yml
+++ b/.github/workflows/test_all.yml
@@ -24,6 +24,8 @@ jobs:
uses: ./.github/actions/test-base
with:
python-version-identifier: ${{ matrix.python-version }}
+ sub-sample-estimators: "False"
+ test-affected-estimators: "False"
test_components:
name: test individual components
strategy:
@@ -55,3 +57,5 @@ jobs:
with:
sktime-component-identifier: ${{ matrix.sktime-component }}
python-version-identifier: ${{ matrix.python-version }}
+ sub-sample-estimators: "False"
+ test-affected-estimators: "False"
diff --git a/.github/workflows/test_base.yml b/.github/workflows/test_base.yml
index 70ff10eaa8d..1f577607faa 100644
--- a/.github/workflows/test_base.yml
+++ b/.github/workflows/test_base.yml
@@ -42,3 +42,5 @@ jobs:
uses: ./.github/actions/test-base
with:
python-version-identifier: ${{ matrix.python-version }}
+ sub-sample-estimators: "True"
+ test-affected-estimators: "True"
diff --git a/.github/workflows/test_components.yml b/.github/workflows/test_components.yml
index 1642085db4f..5d56d7a096a 100644
--- a/.github/workflows/test_components.yml
+++ b/.github/workflows/test_components.yml
@@ -59,3 +59,5 @@ jobs:
with:
sktime-component-identifier: ${{ matrix.sktime-component }}
python-version-identifier: ${{ matrix.python-version }}
+ sub-sample-estimators: "True"
+ test-affected-estimators: "True"
diff --git a/build_tools/docker/py37.dockerfile b/build_tools/docker/py37.dockerfile
deleted file mode 100644
index 230d23c029a..00000000000
--- a/build_tools/docker/py37.dockerfile
+++ /dev/null
@@ -1,8 +0,0 @@
-FROM python:3.7.16-bullseye
-
-WORKDIR /usr/src/sktime
-
-COPY . .
-
-RUN python -m pip install -U pip
-RUN python -m pip install .[all_extras,dev,binder]
diff --git a/docs/source/api_reference/alignment.rst b/docs/source/api_reference/alignment.rst
index d40a87b29cb..5306ed44cb4 100644
--- a/docs/source/api_reference/alignment.rst
+++ b/docs/source/api_reference/alignment.rst
@@ -38,6 +38,14 @@ Dynamic time warping
AlignerDtwNumba
+.. currentmodule:: sktime.alignment.lucky
+
+.. autosummary::
+ :toctree: auto_generated/
+ :template: class.rst
+
+ AlignerLuckyDtw
+
Edit distance based aligners
----------------------------
diff --git a/docs/source/api_reference/dists_kernels.rst b/docs/source/api_reference/dists_kernels.rst
index ed8bf29802f..c6201165bd2 100644
--- a/docs/source/api_reference/dists_kernels.rst
+++ b/docs/source/api_reference/dists_kernels.rst
@@ -113,6 +113,7 @@ Dynamic Time Warping Distances
:template: class.rst
DtwDist
+ DtwPythonDist
DtwDistTslearn
SoftDtwDistTslearn
@@ -124,6 +125,14 @@ Dynamic Time Warping Distances
CtwDistTslearn
+.. currentmodule:: sktime.dists_kernels.lucky
+
+.. autosummary::
+ :toctree: auto_generated/
+ :template: class.rst
+
+ LuckyDtwDist
+
Time warping distances can also be obtained by composing ``DistFromAligner`` with
a time warping aligner, see docstring of ``DistFromAligner``:
diff --git a/docs/source/developer_guide/continuous_integration.rst b/docs/source/developer_guide/continuous_integration.rst
index 16b72bf7dfb..8b930bb8426 100644
--- a/docs/source/developer_guide/continuous_integration.rst
+++ b/docs/source/developer_guide/continuous_integration.rst
@@ -157,8 +157,6 @@ with the image of name ``PYTHON_VERSION`` based on the following python versions
+----------------+----------------+
| Python version | PYTHON_VERSION |
+================+================+
-| 3.7.16 | py37 |
-+----------------+----------------+
| 3.8.16 | py38 |
+----------------+----------------+
| 3.9.16 | py39 |
@@ -171,8 +169,8 @@ with the image of name ``PYTHON_VERSION`` based on the following python versions
The dockerized tests can be also executed via `make `_,
via the command ``make dockertest PYTHON_VERSION=``.
The ``PYTHON_VERSION`` argument specifies the python version and is the same string as in the table above.
-For example, to execute the tests in the Python version ``3.7.16``,
-use ``make dockertest PYTHON_VERSION=py37``.
+For example, to execute the tests in the Python version ``3.8.16``,
+use ``make dockertest PYTHON_VERSION=py38``.
Continuous integration
diff --git a/examples/02_classification.ipynb b/examples/02_classification.ipynb
index 8106380adab..0851ebda101 100644
--- a/examples/02_classification.ipynb
+++ b/examples/02_classification.ipynb
@@ -1061,7 +1061,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Evaluation is simila to `sklearn` classifiers - we split a dataset and evaluate performance on the test set.\n",
+ "Evaluation is similar to `sklearn` classifiers - we split a dataset and evaluate performance on the test set.\n",
"\n",
"This includes as additional steps:\n",
"\n",
diff --git a/pyproject.toml b/pyproject.toml
index dc991353821..a6348d26a3b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,7 +80,7 @@ forecasting = [
"statsforecast>=0.5.2,<1.7",
"statsmodels>=0.12.1,<0.15",
"tbats>=1.1,<1.2",
- "arch>=5.6.0,<6.2.0",
+ "arch>=5.6.0,<6.3.0",
]
networks = [
"keras-self-attention>=0.51,<0.52",
@@ -139,7 +139,7 @@ all_extras = [
"seaborn>=0.11.0",
"seasonal",
"skpro>=2.0.0,<2.1.0",
- "statsforecast>=0.5.2,<1.6.0; python_version < '3.11'",
+ "statsforecast>=0.5.2,<1.7.0",
"statsmodels>=0.12.1",
"stumpy>=1.5.1; python_version < '3.11'",
"tbats>=1.1.0",
@@ -147,7 +147,7 @@ all_extras = [
"tsfresh>=0.17.0",
"tslearn>=0.5.2,<0.6.0; python_version < '3.11'",
"xarray",
- "arch>=5.6.0,<6.2.0",
+ "arch>=5.6.0,<6.3.0",
]
all_extras_pandas2 = [
@@ -175,7 +175,7 @@ all_extras_pandas2 = [
"seaborn>=0.11.0",
"seasonal",
"skpro>=2.0.0,<2.1.0",
- "statsforecast>=0.5.2,<1.6.0; python_version < '3.11'",
+ "statsforecast>=0.5.2,<1.7.0",
"statsmodels>=0.12.1",
"stumpy>=1.5.1; python_version < '3.11'",
"tbats>=1.1.0",
@@ -183,7 +183,7 @@ all_extras_pandas2 = [
"tsfresh>=0.17.0",
"tslearn>=0.5.2,<0.6.0; python_version < '3.11'",
"xarray",
- "arch>=5.6.0,<6.2.0",
+ "arch>=5.6.0,<6.3.0",
]
cython_extras = [
diff --git a/sktime/alignment/base.py b/sktime/alignment/base.py
index 0ca7b98d932..111866835ee 100644
--- a/sktime/alignment/base.py
+++ b/sktime/alignment/base.py
@@ -337,4 +337,17 @@ def _get_distance_matrix(self):
distmat: an (n x n) np.array of floats, where n is length of X passed to fit
[i,j]-th entry is alignment distance between X[i] and X[j] passed to fit
"""
- raise NotImplementedError
+ # the default implementation assumes
+ # that the aligner can only align two sequences
+ if self.get_tag("capability:multiple-alignment", False):
+ raise NotImplementedError
+
+ import numpy as np
+
+ dist = self.get_distance()
+
+ distmat = np.zeros((2, 2), dtype="float")
+ distmat[0, 1] = dist
+ distmat[1, 0] = dist
+
+ return distmat
diff --git a/sktime/alignment/dtw_numba.py b/sktime/alignment/dtw_numba.py
index 70648c6e36b..b466cec72e0 100644
--- a/sktime/alignment/dtw_numba.py
+++ b/sktime/alignment/dtw_numba.py
@@ -110,8 +110,8 @@ class AlignerDtwNumba(BaseAligner):
>>> from sktime.utils._testing.series import _make_series
>>> from sktime.alignment.dtw_numba import AlignerDtwNumba
>>>
- >>> X0 = _make_series() # doctest: +SKIP
- >>> X1 = _make_series() # doctest: +SKIP
+ >>> X0 = _make_series(return_mtype="pd.DataFrame") # doctest: +SKIP
+ >>> X1 = _make_series(return_mtype="pd.DataFrame") # doctest: +SKIP
>>> d = AlignerDtwNumba(weighted=True, derivative=True) # doctest: +SKIP
>>> align = d.fit([X0, X1]).get_alignment() # doctest: +SKIP
"""
diff --git a/sktime/alignment/dtw_python.py b/sktime/alignment/dtw_python.py
index e868e3b628b..7334c2b73de 100644
--- a/sktime/alignment/dtw_python.py
+++ b/sktime/alignment/dtw_python.py
@@ -9,7 +9,6 @@
import pandas as pd
from sktime.alignment.base import BaseAligner
-from sktime.utils.validation._dependencies import _check_soft_dependencies
class AlignerDTW(BaseAligner):
@@ -18,30 +17,31 @@ class AlignerDTW(BaseAligner):
Behaviour: computes the full alignment between X[0] and X[1]
assumes pairwise alignment (only two series) and univariate
if multivariate series are passed:
- alignment is computed on univariate series with variable_to_align;
- if this is not set, defaults to the first variable of X[0]
+ alignment is computed on univariate series with variable_to_align;
+ if this is not set, defaults to the first variable of X[0]
raises an error if variable_to_align is not present in X[0] or X[1]
Parameters
----------
dist_method : str, optional, default = "euclidean"
distance function to use, a distance on real n-space
- one of the functions in `scipy.spatial.distance.cdist`
+ one of the functions in `scipy.spatial.distance.cdist`
step_pattern : str, optional, or dtw_python stepPattern object, optional
step pattern to use in time warping
one of: 'symmetric1', 'symmetric2' (default), 'asymmetric',
- and dozens of other more non-standard step patterns;
- list can be displayed by calling help(stepPattern) in dtw
+ and dozens of other more non-standard step patterns;
+ list can be displayed by calling help(stepPattern) in dtw
window_type : string, the chosen windowing function
"none", "itakura", "sakoechiba", or "slantedband"
- "none" (default) - no windowing
- "sakoechiba" - a band around main diagonal
- "slantedband" - a band around slanted diagonal
- "itakura" - Itakura parallelogram
- open_begin, open_end : boolean, optional, default=False
+ "none" (default) - no windowing
+ "sakoechiba" - a band around main diagonal
+ "slantedband" - a band around slanted diagonal
+ "itakura" - Itakura parallelogram
+ open_begin : boolean, optional, default=False
+ open_end: boolean, optional, default=False
whether to perform open-ended alignments
- open_begin = whether alignment open ended at start (low index)
- open_end = whether alignment open ended at end (high index)
+ open_begin = whether alignment open ended at start (low index)
+ open_end = whether alignment open ended at end (high index)
variable_to_align : string, default = first variable in X[0] as passed to fit
which variable to use for univariate alignment
"""
@@ -203,32 +203,30 @@ def get_test_params(cls, parameter_set="default"):
class AlignerDTWfromDist(BaseAligner):
"""Aligner interface for dtw-python using pairwise transformer.
- uses transformer for computation of distance matrix passed to alignment
+ Uses transformer for computation of distance matrix passed to alignment.
- Components
+ Parameters
----------
dist_trafo: estimator following the pairwise transformer template
i.e., instance of concrete class implementing template BasePairwiseTransformer
-
- Parameters
- ----------
step_pattern : str, optional, default = "symmetric2",
- or dtw_python stepPattern object, optional
+ or dtw_python stepPattern object, optional
step pattern to use in time warping
one of: 'symmetric1', 'symmetric2' (default), 'asymmetric',
- and dozens of other more non-standard step patterns;
- list can be displayed by calling help(stepPattern) in dtw
+ and dozens of other more non-standard step patterns;
+ list can be displayed by calling help(stepPattern) in dtw
window_type: str optional, default = "none"
the chosen windowing function
"none", "itakura", "sakoechiba", or "slantedband"
- "none" (default) - no windowing
- "sakoechiba" - a band around main diagonal
- "slantedband" - a band around slanted diagonal
- "itakura" - Itakura parallelogram
- open_begin, open_end: boolean, optional, default=False
+ "none" (default) - no windowing
+ "sakoechiba" - a band around main diagonal
+ "slantedband" - a band around slanted diagonal
+ "itakura" - Itakura parallelogram
+ open_begin : boolean, optional, default=False
+ open_end: boolean, optional, default=False
whether to perform open-ended alignments
- open_begin = whether alignment open ended at start (low index)
- open_end = whether alignment open ended at end (high index)
+ open_begin = whether alignment open ended at start (low index)
+ open_end = whether alignment open ended at end (high index)
"""
_tags = {
@@ -236,6 +234,7 @@ class AlignerDTWfromDist(BaseAligner):
"capability:distance": True, # does compute/return overall distance?
"capability:distance-matrix": True, # does compute/return distance matrix?
"python_dependencies": "dtw-python",
+ "python_dependencies_alias": {"dtw-python": "dtw"},
}
def __init__(
@@ -246,16 +245,6 @@ def __init__(
open_begin=False,
open_end=False,
):
- """Construct instance."""
- # added manually since dtw-python has an import alias
- # default check from super.__init__ does not allow aliases
- _check_soft_dependencies(
- "dtw-python",
- package_import_alias={"dtw-python": "dtw"},
- severity="error",
- obj=self,
- suppress_import_stdout=True,
- )
super().__init__()
self.dist_trafo = dist_trafo
diff --git a/sktime/alignment/lucky.py b/sktime/alignment/lucky.py
new file mode 100644
index 00000000000..5fb25afcd14
--- /dev/null
+++ b/sktime/alignment/lucky.py
@@ -0,0 +1,162 @@
+# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
+"""Lucky sequence alignment."""
+
+import numpy as np
+import pandas as pd
+
+from sktime.alignment.base import BaseAligner
+
+
+class AlignerLuckyDtw(BaseAligner):
+ """Alignment path based on lucky dynamic time warping distance.
+
+ This aligner returns the alignment path produced by the lucky time warping
+ distance [1]_.
+ Uses Euclidean distance for multivariate data.
+
+ Based on code by Krisztian A Buza's research group.
+
+ Parameters
+ ----------
+ window: int, optional (default=None)
+ Maximum distance between indices of aligned series, aka warping window.
+ If None, defaults to max(len(ts1), len(ts2)), i.e., no warping window.
+
+ References
+ ----------
+ ..[1] Stephan Spiegel, Brijnesh-Johannes Jain, and Sahin Albayrak.
+ Fast time series classification under lucky time warping distance.
+ Proceedings of the 29th Annual ACM Symposium on Applied Computing. 2014.
+ """
+
+ _tags = {
+ "capability:multiple-alignment": False, # can align more than two sequences?
+ "capability:distance": True, # does compute/return overall distance?
+ "capability:distance-matrix": True, # does compute/return distance matrix?
+ "alignment_type": "full", # does the aligner produce full or partial alignment
+ }
+
+ def __init__(self, window=None):
+ self.window = window
+
+ super().__init__()
+
+ def _fit(self, X, Z=None):
+ """Fit alignment given series/sequences to align.
+
+ core logic
+
+ Parameters
+ ----------
+ X: list of pd.DataFrame (sequence) of length n - panel of series to align
+ Z: pd.DataFrame with n rows, optional; metadata, row correspond to indices of X
+ """
+ window = self.window
+
+ ts1, ts2 = X
+ ts1 = ts1.values
+ ts2 = ts2.values
+
+ len_ts1 = len(ts1)
+ len_ts2 = len(ts2)
+
+ if window is None:
+ window = max(len_ts1, len_ts2)
+
+ def vec_dist(x):
+ return np.linalg.norm(x) ** 2
+
+ d = vec_dist(ts1[0] - ts2[0])
+
+ i = 0
+ j = 0
+ align_i = [i]
+ align_j = [j]
+
+ while i + 1 < len_ts1 or j + 1 < len_ts2:
+ d_best = np.inf
+
+ if i + 1 < len_ts1 and j + 1 < len_ts2:
+ d_best = vec_dist(ts1[i + 1] - ts2[j + 1])
+ new_i = i + 1
+ new_j = j + 1
+
+ if i + 1 < len_ts1 and abs(i + 1 - j) <= window:
+ d1 = vec_dist(ts1[i + 1] - ts2[j])
+ if d1 < d_best:
+ d_best = d1
+ new_i = i + 1
+ new_j = j
+
+ if j + 1 < len_ts2 and abs(j + 1 - i) <= window:
+ d2 = vec_dist(ts1[i] - ts2[j + 1])
+ if d2 < d_best:
+ d_best = d2
+ new_i = i
+ new_j = j + 1
+
+ d = d + d_best
+ i = new_i
+ j = new_j
+ align_i = align_i + [i]
+ align_j = align_j + [j]
+
+ self.align_i_ = align_i
+ self.align_j_ = align_j
+ self.dist_ = d
+
+ return self
+
+ def _get_alignment(self):
+ """Return alignment for sequences/series passed in fit (iloc indices).
+
+ Behaviour: returns an alignment for sequences in X passed to fit
+ model should be in fitted state, fitted model parameters read from self
+
+ Returns
+ -------
+ pd.DataFrame in alignment format, with columns 'ind'+str(i) for integer i
+ cols contain iloc index of X[i] mapped to alignment coordinate for alignment
+ """
+ align = pd.DataFrame({"ind0": self.align_i_, "ind1": self.align_j_})
+ return align
+
+ def _get_distance(self):
+ """Return overall distance of alignment.
+
+ core logic
+
+ Behaviour: returns overall distance corresponding to alignment
+ not all aligners will return or implement this (optional)
+ Accesses in self:
+ Fitted model attributes ending in "_".
+
+ Returns
+ -------
+ distance: float - overall distance between all elements of X passed to fit
+ """
+ return self.dist_
+
+ @classmethod
+ def get_test_params(cls, parameter_set="default"):
+ """Return testing parameter settings for the estimator.
+
+ Parameters
+ ----------
+ parameter_set : str, default="default"
+ Name of the set of test parameters to return, for use in tests. If no
+ special parameters are defined for a value, will return `"default"` set.
+ There are currently no reserved values for aligners.
+
+ Returns
+ -------
+ params : dict or list of dict, default = {}
+ Parameters to create testing instances of the class
+ Each dict are parameters to construct an "interesting" test instance, i.e.,
+ `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+ `create_test_instance` uses the first (or only) dictionary in `params`
+ """
+ params0 = {}
+ params1 = {"window": 3}
+
+ return [params0, params1]
diff --git a/sktime/dists_kernels/__init__.py b/sktime/dists_kernels/__init__.py
index 5f384da981c..30a588a16e0 100644
--- a/sktime/dists_kernels/__init__.py
+++ b/sktime/dists_kernels/__init__.py
@@ -9,6 +9,7 @@
from sktime.dists_kernels.dtw import DtwDist
from sktime.dists_kernels.dummy import ConstantPwTrafoPanel
from sktime.dists_kernels.edit_dist import EditDist
+from sktime.dists_kernels.lucky import LuckyDtwDist
from sktime.dists_kernels.scipy_dist import ScipyDist
from sktime.dists_kernels.signature_kernel import SignatureKernel
@@ -17,6 +18,7 @@
"BasePairwiseTransformerPanel",
"AggrDist",
"DtwDist",
+ "LuckyDtwDist",
"EditDist",
"FlatDist",
"ScipyDist",
diff --git a/sktime/dists_kernels/base/_delegate.py b/sktime/dists_kernels/base/_delegate.py
new file mode 100644
index 00000000000..d69527f09c0
--- /dev/null
+++ b/sktime/dists_kernels/base/_delegate.py
@@ -0,0 +1,64 @@
+"""Delegator mixin that delegates all methods to wrapped transformer.
+
+Useful for building estimators where all but one or a few methods are delegated.
+For that purpose, inherit from this estimator and then override only the methods that
+are not delegated.
+"""
+# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
+
+__author__ = ["fkiraly"]
+__all__ = ["_DelegatedPairwiseTransformerPanel"]
+
+from sktime.dists_kernels.base import BasePairwiseTransformerPanel
+
+
+class _DelegatedPairwiseTransformerPanel(BasePairwiseTransformerPanel):
+ """Delegator mixin that delegates all methods to wrapped transformer.
+
+ Delegates inner transformer methods to a wrapped estimator.
+ Wrapped estimator is value of attribute with name self._delegate_name.
+ By default, this is "estimator_", i.e., delegates to self.estimator_
+ To override delegation, override _delegate_name attribute in child class.
+
+ Delegates the following inner underscore methods:
+ _transform
+
+ Does NOT delegate get_params, set_params.
+ get_params, set_params will hence use one additional nesting level by default.
+
+ Does NOT delegate or copy tags, this should be done in a child class if required.
+ """
+
+ # attribute for _DelegatedBasePairwiseTransformerPanel, which then delegates
+ # all non-overridden methods are same as of getattr(self, _delegate_name)
+ # see further details in _DelegatedBasePairwiseTransformerPanel docstring
+ _delegate_name = "estimator_"
+
+ def _get_delegate(self):
+ return getattr(self, self._delegate_name)
+
+ def _transform(self, X, X2=None):
+ """Compute distance/kernel matrix.
+
+ private _transform containing core logic, called from transform
+
+ Behaviour: returns pairwise distance/kernel matrix
+ between samples in X and X2 (equal to X if not passed)
+
+ Parameters
+ ----------
+ X : guaranteed to be Series or Panel of mtype X_inner_mtype, n instances
+ if X_inner_mtype is list, _transform must support all types in it
+ Data to be transformed
+ X2 : guaranteed to be Series or Panel of mtype X_inner_mtype, m instances
+ if X_inner_mtype is list, _transform must support all types in it
+ Data to be transformed
+ default X2 = X
+
+ Returns
+ -------
+ distmat: np.array of shape [n, m]
+ (i,j)-th entry contains distance/kernel between X[i] and X2[j]
+ """
+ estimator = self._get_delegate()
+ return estimator.transform(X, X2=X2)
diff --git a/sktime/dists_kernels/dtw/__init__.py b/sktime/dists_kernels/dtw/__init__.py
index 549da5cc4fd..bc1fa6b4672 100644
--- a/sktime/dists_kernels/dtw/__init__.py
+++ b/sktime/dists_kernels/dtw/__init__.py
@@ -1,7 +1,8 @@
# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
"""Dynamic time warping distances."""
-__all__ = ["DtwDist", "DtwDistTslearn", "SoftDtwDistTslearn"]
+__all__ = ["DtwDist", "DtwPythonDist", "DtwDistTslearn", "SoftDtwDistTslearn"]
-from sktime.dists_kernels.dtw._dtw_python import DtwDist
+from sktime.dists_kernels.dtw._dtw_python import DtwPythonDist
+from sktime.dists_kernels.dtw._dtw_sktime import DtwDist
from sktime.dists_kernels.dtw._dtw_tslearn import DtwDistTslearn, SoftDtwDistTslearn
diff --git a/sktime/dists_kernels/dtw/_dtw_python.py b/sktime/dists_kernels/dtw/_dtw_python.py
index 67d03860286..adc448fe74b 100644
--- a/sktime/dists_kernels/dtw/_dtw_python.py
+++ b/sktime/dists_kernels/dtw/_dtw_python.py
@@ -1,202 +1,86 @@
-"""BaseEstimator interface to sktime dtw distances in distances module."""
+"""BaseEstimator interface to dynamic time warping distances in dtw_python."""
__author__ = ["fkiraly"]
-from typing import Union
+from sktime.dists_kernels.base._delegate import _DelegatedPairwiseTransformerPanel
-import numpy as np
-from sktime.distances import pairwise_distance
-from sktime.dists_kernels.base import BasePairwiseTransformerPanel
+class DtwPythonDist(_DelegatedPairwiseTransformerPanel):
+ r"""Interface to dynamic time warping distances in the dtw-python package.
-
-class DtwDist(BasePairwiseTransformerPanel):
- r"""Interface to sktime native dtw distances, with derivative or weighting.
-
- Interface to simple dynamic time warping (DTW) distance,
- and the following weighted/derivative versions:
-
- * WDTW - weighted dynamic tyme warping - ``weighted=True, derivative=False`
- * DDTW - derivative dynamic time warping - ``weighted=False, derivative=True``
- * WDDTW - weighted derivative dynamic time
- warping - ``weighted=True, derivative=True``
-
- `sktime` interface to the efficient `numba` implementations
- provided by ``pairwise_distance`` in `sktime.distances`.
-
- This estimator provides performant implementation of time warping distances for:
- * time series of equal length
- * the Euclidean pairwise distance
-
- For unequal length time series, use ``sktime.dists_kernels.DistFromAligner``
- with a time warping aligner such as ``sktime.aligners.AlignerDTW``.
- To use arbitrary pairwise distances, use ``sktime.aligners.AlignerDTWfromDist``.
- (for derivative DTW, pipeline an alignment distance with ``Differencer``)
-
- Note that the more flexible options above may be less performant.
-
- The algorithms are also available as alignment estimators
- ``sktime.alignmnent.dtw_numba``, producing alignments aka alignment paths.
-
- DTW was originally proposed in [1]_, DTW computes the distance between two
- time series by considering their alignments during the calculation.
- This is done by measuring
- the pointwise distance (normally using Euclidean) between all elements of the two
- time series and then using dynamic programming to find the warping path
- that minimises the total pointwise distance between realigned series.
-
- DDTW is an adaptation of DTW originally proposed in [2]_. DDTW attempts to
- improve on dtw by better account for the 'shape' of the time series.
- This is done by considering y axis data points as higher level features of 'shape'.
- To do this the first derivative of the sequence is taken, and then using this
- derived sequence a dtw computation is done.
-
- WDTW was first proposed in [3]_, it adds a multiplicative weight penalty based on
- the warping distance. This means that time series with lower phase difference have
- a smaller weight imposed (i.e less penalty imposed) and time series with larger
- phase difference have a larger weight imposed (i.e. larger penalty imposed).
-
- WDDTW was first proposed in [3]_ as an extension of DDTW. By adding a weight
- to the derivative it means the alignment isn't only considering the shape of the
- time series, but also the phase.
+ Computes the dynamic time warping distance between series, using
+ the dtw-python package.
Parameters
----------
- weighted : bool, optional, default=False
- whether a weighted version of the distance is computed
- False = unmodified distance, i.e., dtw distance or derivative dtw distance
- True = weighted distance, i.e., weighted dtw or derivative weighted dtw
- derivative : bool, optional, default=False
- whether the distance or the derivative distance is computed
- False = unmodified distance, i.e., dtw distance or weighted dtw distance
- True = derivative distance, i.e., derivative dtw distance or derivative wdtw
- window: int, defaults = None
- Sakoe-Chiba window radius
- one of three mutually exclusive ways to specify bounding matrix
- if ``None``, does not use Sakoe-Chiba window
- if ``int``, uses Sakoe-Chiba lower bounding window with radius ``window``.
- If ``window`` is passed, ``itakura_max_slope`` will be ignored.
- itakura_max_slope: float, between 0. and 1., default = None
- Itakura parallelogram slope
- one of three mutually exclusive ways to specify bounding matrix
- if ``None``, does not use Itakura parallelogram lower bounding
- if ``float``, uses Itakura parallelogram lower bounding,
- with slope gradient ``itakura_max_slope``
- bounding_matrix: optional, 2D np.ndarray, default=None
- one of three mutually exclusive ways to specify bounding matrix
- must be of shape ``(len(X), len(X2))``, ``len`` meaning number time points,
- where ``X``, ``X2`` are the two time series passed in transform
- Custom bounding matrix to use.
- If provided, then ``window`` and ``itakura_max_slope`` are ignored.
- The matrix should be structured so that indexes considered in
- bound should be the value 0. and indexes outside the bounding matrix should
- be infinity.
- g: float, optional, default = 0. Used only if ``weighted=True``.
- Constant that controls the curvature (slope) of the function;
- that is, ``g`` controls the level of penalisation for the points
- with larger phase difference.
-
- References
- ----------
- .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for
- spoken word recognition," IEEE Transactions on Acoustics, Speech and
- Signal Processing, vol. 26(1), pp. 43--49, 1978.
- .. [2] Keogh, Eamonn & Pazzani, Michael. (2002). Derivative Dynamic Time Warping.
- First SIAM International Conference on Data Mining.
- 1. 10.1137/1.9781611972719.1.
- .. [3] Young-Seon Jeong, Myong K. Jeong, Olufemi A. Omitaomu, Weighted dynamic time
- warping for time series classification, Pattern Recognition, Volume 44, Issue 9,
- 2011, Pages 2231-2240, ISSN 0031-3203, https://doi.org/10.1016/j.patcog.2010.09.022.
-
- Examples
- --------
- >>> from sktime.datasets import load_unit_test
- >>> from sktime.dists_kernels.dtw import DtwDist
- >>>
- >>> X, _ = load_unit_test(return_type="pd-multiindex") # doctest: +SKIP
- >>> d = DtwDist(weighted=True, derivative=True) # doctest: +SKIP
- >>> distmat = d.transform(X) # doctest: +SKIP
-
- distances are also callable, this does the same:
-
- >>> distmat = d(X) # doctest: +SKIP
+ dist: str, or estimator following sktime BasePairwiseTransformer API
+ distance to use, a distance on real n-space, default = "euclidean"
+ if str, must be name of one of the functions in `scipy.spatial.distance.cdist`
+ if estimator, must follow sktime BasePairwiseTransformer API
+ step_pattern : str, optional, default = "symmetric2",
+ or dtw_python stepPattern object, optional
+ step pattern to use in time warping
+ one of: 'symmetric1', 'symmetric2' (default), 'asymmetric',
+ and dozens of other more non-standard step patterns;
+ list can be displayed by calling help(stepPattern) in dtw
+ window_type: str optional, default = "none"
+ the chosen windowing function
+ "none", "itakura", "sakoechiba", or "slantedband"
+ "none" (default) - no windowing
+ "sakoechiba" - a band around main diagonal
+ "slantedband" - a band around slanted diagonal
+ "itakura" - Itakura parallelogram
+ open_begin : boolean, optional, default=False
+ open_end: boolean, optional, default=False
+ whether to perform open-ended alignments
+ open_begin = whether alignment open ended at start (low index)
+ open_end = whether alignment open ended at end (high index)
"""
_tags = {
+ "pwtrafo_type": "distance", # type of pw. transformer, "kernel" or "distance"
"symmetric": True, # all the distances are symmetric
- "X_inner_mtype": "numpy3D",
- "python_dependencies": "numba",
+ "capability:multivariate": True, # can estimator handle multivariate data?
+ "capability:unequal_length": True, # can dist handle unequal length panels?
+ "X_inner_mtype": "df-list",
+ "python_dependencies": "dtw-python",
+ "python_dependencies_alias": {"dtw-python": "dtw"},
}
def __init__(
self,
- weighted: bool = False,
- derivative: bool = False,
- window: Union[int, None] = None,
- itakura_max_slope: Union[float, None] = None,
- bounding_matrix: np.ndarray = None,
- g: float = 0.0,
+ dist="euclidean",
+ step_pattern="symmetric2",
+ window_type="none",
+ open_begin=False,
+ open_end=False,
):
- self.weighted = weighted
- self.derivative = derivative
- self.window = window
- self.itakura_max_slope = itakura_max_slope
- self.bounding_matrix = bounding_matrix
- self.g = g
-
- if not weighted and not derivative:
- metric_key = "dtw"
- elif not weighted and derivative:
- metric_key = "ddtw"
- elif weighted and not derivative:
- metric_key = "wdtw"
- elif weighted and derivative:
- metric_key = "wddtw"
-
- self.metric_key = metric_key
-
- kwargs = {
- "window": window,
- "itakura_max_slope": itakura_max_slope,
- "bounding_matrix": bounding_matrix,
- }
-
- # g is used only for weighted dtw
- if weighted:
- kwargs["g"] = g
-
- self.kwargs = kwargs
+ self.dist = dist
+ self.step_pattern = step_pattern
+ self.window_type = window_type
+ self.open_begin = open_begin
+ self.open_end = open_end
super().__init__()
- def _transform(self, X, X2=None):
- """Compute distance/kernel matrix.
-
- Core logic
-
- Behaviour: returns pairwise distance/kernel matrix
- between samples in X and X2
- if X2 is not passed, is equal to X
- if X/X2 is a pd.DataFrame and contains non-numeric columns,
- these are removed before computation
-
- Parameters
- ----------
- X: 3D np.array of shape [num_instances, num_vars, num_time_points]
- X2: 3D np.array of shape [num_instances, num_vars, num_time_points], optional
- default X2 = X
+ params = {
+ "step_pattern": step_pattern,
+ "window_type": window_type,
+ "open_begin": open_begin,
+ "open_end": open_end,
+ }
- Returns
- -------
- distmat: np.array of shape [n, m]
- (i,j)-th entry contains distance/kernel between X[i] and X2[j]
- """
- metric_key = self.metric_key
- kwargs = self.kwargs
+ from sktime.alignment.dtw_python import AlignerDTW, AlignerDTWfromDist
+ from sktime.dists_kernels.compose_from_align import DistFromAligner
- distmat = pairwise_distance(X, X2, metric=metric_key, **kwargs)
+ if isinstance(dist, str):
+ params["dist_method"] = dist
+ delegate = DistFromAligner(AlignerDTW(**params))
+ else:
+ params["dist_trafo"] = dist
+ delegate = DistFromAligner(AlignerDTWfromDist(**params))
- return distmat
+ self.estimator_ = delegate
@classmethod
def get_test_params(cls, parameter_set="default"):
@@ -217,9 +101,11 @@ def get_test_params(cls, parameter_set="default"):
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params`
"""
+ from sktime.dists_kernels import ScipyDist
+
params0 = {}
- params1 = {"weighted": True}
- params2 = {"derivative": True, "window": 0.2}
- params3 = {"weighted": True, "derivative": True, "g": 0.05}
+ params1 = {"dist": "cityblock"}
+ params2 = {"dist": ScipyDist()}
+ params3 = {"dist": ScipyDist("cityblock"), "step_pattern": "symmetric1"}
return [params0, params1, params2, params3]
diff --git a/sktime/dists_kernels/dtw/_dtw_sktime.py b/sktime/dists_kernels/dtw/_dtw_sktime.py
new file mode 100644
index 00000000000..67d03860286
--- /dev/null
+++ b/sktime/dists_kernels/dtw/_dtw_sktime.py
@@ -0,0 +1,225 @@
+"""BaseEstimator interface to sktime dtw distances in distances module."""
+
+__author__ = ["fkiraly"]
+
+from typing import Union
+
+import numpy as np
+
+from sktime.distances import pairwise_distance
+from sktime.dists_kernels.base import BasePairwiseTransformerPanel
+
+
+class DtwDist(BasePairwiseTransformerPanel):
+ r"""Interface to sktime native dtw distances, with derivative or weighting.
+
+ Interface to simple dynamic time warping (DTW) distance,
+ and the following weighted/derivative versions:
+
+ * WDTW - weighted dynamic tyme warping - ``weighted=True, derivative=False`
+ * DDTW - derivative dynamic time warping - ``weighted=False, derivative=True``
+ * WDDTW - weighted derivative dynamic time
+ warping - ``weighted=True, derivative=True``
+
+ `sktime` interface to the efficient `numba` implementations
+ provided by ``pairwise_distance`` in `sktime.distances`.
+
+ This estimator provides performant implementation of time warping distances for:
+ * time series of equal length
+ * the Euclidean pairwise distance
+
+ For unequal length time series, use ``sktime.dists_kernels.DistFromAligner``
+ with a time warping aligner such as ``sktime.aligners.AlignerDTW``.
+ To use arbitrary pairwise distances, use ``sktime.aligners.AlignerDTWfromDist``.
+ (for derivative DTW, pipeline an alignment distance with ``Differencer``)
+
+ Note that the more flexible options above may be less performant.
+
+ The algorithms are also available as alignment estimators
+ ``sktime.alignmnent.dtw_numba``, producing alignments aka alignment paths.
+
+ DTW was originally proposed in [1]_, DTW computes the distance between two
+ time series by considering their alignments during the calculation.
+ This is done by measuring
+ the pointwise distance (normally using Euclidean) between all elements of the two
+ time series and then using dynamic programming to find the warping path
+ that minimises the total pointwise distance between realigned series.
+
+ DDTW is an adaptation of DTW originally proposed in [2]_. DDTW attempts to
+ improve on dtw by better account for the 'shape' of the time series.
+ This is done by considering y axis data points as higher level features of 'shape'.
+ To do this the first derivative of the sequence is taken, and then using this
+ derived sequence a dtw computation is done.
+
+ WDTW was first proposed in [3]_, it adds a multiplicative weight penalty based on
+ the warping distance. This means that time series with lower phase difference have
+ a smaller weight imposed (i.e less penalty imposed) and time series with larger
+ phase difference have a larger weight imposed (i.e. larger penalty imposed).
+
+ WDDTW was first proposed in [3]_ as an extension of DDTW. By adding a weight
+ to the derivative it means the alignment isn't only considering the shape of the
+ time series, but also the phase.
+
+ Parameters
+ ----------
+ weighted : bool, optional, default=False
+ whether a weighted version of the distance is computed
+ False = unmodified distance, i.e., dtw distance or derivative dtw distance
+ True = weighted distance, i.e., weighted dtw or derivative weighted dtw
+ derivative : bool, optional, default=False
+ whether the distance or the derivative distance is computed
+ False = unmodified distance, i.e., dtw distance or weighted dtw distance
+ True = derivative distance, i.e., derivative dtw distance or derivative wdtw
+ window: int, defaults = None
+ Sakoe-Chiba window radius
+ one of three mutually exclusive ways to specify bounding matrix
+ if ``None``, does not use Sakoe-Chiba window
+ if ``int``, uses Sakoe-Chiba lower bounding window with radius ``window``.
+ If ``window`` is passed, ``itakura_max_slope`` will be ignored.
+ itakura_max_slope: float, between 0. and 1., default = None
+ Itakura parallelogram slope
+ one of three mutually exclusive ways to specify bounding matrix
+ if ``None``, does not use Itakura parallelogram lower bounding
+ if ``float``, uses Itakura parallelogram lower bounding,
+ with slope gradient ``itakura_max_slope``
+ bounding_matrix: optional, 2D np.ndarray, default=None
+ one of three mutually exclusive ways to specify bounding matrix
+ must be of shape ``(len(X), len(X2))``, ``len`` meaning number time points,
+ where ``X``, ``X2`` are the two time series passed in transform
+ Custom bounding matrix to use.
+ If provided, then ``window`` and ``itakura_max_slope`` are ignored.
+ The matrix should be structured so that indexes considered in
+ bound should be the value 0. and indexes outside the bounding matrix should
+ be infinity.
+ g: float, optional, default = 0. Used only if ``weighted=True``.
+ Constant that controls the curvature (slope) of the function;
+ that is, ``g`` controls the level of penalisation for the points
+ with larger phase difference.
+
+ References
+ ----------
+ .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for
+ spoken word recognition," IEEE Transactions on Acoustics, Speech and
+ Signal Processing, vol. 26(1), pp. 43--49, 1978.
+ .. [2] Keogh, Eamonn & Pazzani, Michael. (2002). Derivative Dynamic Time Warping.
+ First SIAM International Conference on Data Mining.
+ 1. 10.1137/1.9781611972719.1.
+ .. [3] Young-Seon Jeong, Myong K. Jeong, Olufemi A. Omitaomu, Weighted dynamic time
+ warping for time series classification, Pattern Recognition, Volume 44, Issue 9,
+ 2011, Pages 2231-2240, ISSN 0031-3203, https://doi.org/10.1016/j.patcog.2010.09.022.
+
+ Examples
+ --------
+ >>> from sktime.datasets import load_unit_test
+ >>> from sktime.dists_kernels.dtw import DtwDist
+ >>>
+ >>> X, _ = load_unit_test(return_type="pd-multiindex") # doctest: +SKIP
+ >>> d = DtwDist(weighted=True, derivative=True) # doctest: +SKIP
+ >>> distmat = d.transform(X) # doctest: +SKIP
+
+ distances are also callable, this does the same:
+
+ >>> distmat = d(X) # doctest: +SKIP
+ """
+
+ _tags = {
+ "symmetric": True, # all the distances are symmetric
+ "X_inner_mtype": "numpy3D",
+ "python_dependencies": "numba",
+ }
+
+ def __init__(
+ self,
+ weighted: bool = False,
+ derivative: bool = False,
+ window: Union[int, None] = None,
+ itakura_max_slope: Union[float, None] = None,
+ bounding_matrix: np.ndarray = None,
+ g: float = 0.0,
+ ):
+ self.weighted = weighted
+ self.derivative = derivative
+ self.window = window
+ self.itakura_max_slope = itakura_max_slope
+ self.bounding_matrix = bounding_matrix
+ self.g = g
+
+ if not weighted and not derivative:
+ metric_key = "dtw"
+ elif not weighted and derivative:
+ metric_key = "ddtw"
+ elif weighted and not derivative:
+ metric_key = "wdtw"
+ elif weighted and derivative:
+ metric_key = "wddtw"
+
+ self.metric_key = metric_key
+
+ kwargs = {
+ "window": window,
+ "itakura_max_slope": itakura_max_slope,
+ "bounding_matrix": bounding_matrix,
+ }
+
+ # g is used only for weighted dtw
+ if weighted:
+ kwargs["g"] = g
+
+ self.kwargs = kwargs
+
+ super().__init__()
+
+ def _transform(self, X, X2=None):
+ """Compute distance/kernel matrix.
+
+ Core logic
+
+ Behaviour: returns pairwise distance/kernel matrix
+ between samples in X and X2
+ if X2 is not passed, is equal to X
+ if X/X2 is a pd.DataFrame and contains non-numeric columns,
+ these are removed before computation
+
+ Parameters
+ ----------
+ X: 3D np.array of shape [num_instances, num_vars, num_time_points]
+ X2: 3D np.array of shape [num_instances, num_vars, num_time_points], optional
+ default X2 = X
+
+ Returns
+ -------
+ distmat: np.array of shape [n, m]
+ (i,j)-th entry contains distance/kernel between X[i] and X2[j]
+ """
+ metric_key = self.metric_key
+ kwargs = self.kwargs
+
+ distmat = pairwise_distance(X, X2, metric=metric_key, **kwargs)
+
+ return distmat
+
+ @classmethod
+ def get_test_params(cls, parameter_set="default"):
+ """Return testing parameter settings for the estimator.
+
+ Parameters
+ ----------
+ parameter_set : str, default="default"
+ Name of the set of test parameters to return, for use in tests. If no
+ special parameters are defined for a value, will return `"default"` set.
+ There are currently no reserved values for distance/kernel transformers.
+
+ Returns
+ -------
+ params : dict or list of dict, default = {}
+ Parameters to create testing instances of the class
+ Each dict are parameters to construct an "interesting" test instance, i.e.,
+ `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+ `create_test_instance` uses the first (or only) dictionary in `params`
+ """
+ params0 = {}
+ params1 = {"weighted": True}
+ params2 = {"derivative": True, "window": 0.2}
+ params3 = {"weighted": True, "derivative": True, "g": 0.05}
+
+ return [params0, params1, params2, params3]
diff --git a/sktime/dists_kernels/lucky.py b/sktime/dists_kernels/lucky.py
new file mode 100644
index 00000000000..00ed51605f9
--- /dev/null
+++ b/sktime/dists_kernels/lucky.py
@@ -0,0 +1,68 @@
+# copyright: sktime developers, BSD-3-Clause License (see LICENSE file)
+"""Lucky dynamic time warping distance."""
+
+from sktime.dists_kernels.base._delegate import _DelegatedPairwiseTransformerPanel
+
+
+class LuckyDtwDist(_DelegatedPairwiseTransformerPanel):
+ """Lucky dynamic time warping distance.
+
+ Implements lucky dynamic time warping distance [1]_.
+ Uses Euclidean distance for multivariate data.
+
+ Based on code by Krisztian A Buza's research group.
+
+ Parameters
+ ----------
+ window: int, optional (default=None)
+ Maximum distance between indices of aligned series, aka warping window.
+ If None, defaults to max(len(ts1), len(ts2)), i.e., no warping window.
+
+ References
+ ----------
+ ..[1] Stephan Spiegel, Brijnesh-Johannes Jain, and Sahin Albayrak.
+ Fast time series classification under lucky time warping distance.
+ Proceedings of the 29th Annual ACM Symposium on Applied Computing. 2014.
+ """
+
+ _tags = {
+ "symmetric": True, # is the transformer symmetric, i.e., t(x,y)=t(y,x) always?
+ "capability:missing_values": False, # can estimator handle missing data?
+ "capability:multivariate": True, # can estimator handle multivariate data?
+ "capability:unequal_length": True, # can dist handle unequal length panels?
+ "pwtrafo_type": "distance", # type of pw. transformer, "kernel" or "distance"
+ }
+
+ def __init__(self, window=None):
+ self.window = window
+
+ super().__init__()
+
+ from sktime.alignment.lucky import AlignerLuckyDtw
+ from sktime.dists_kernels.compose_from_align import DistFromAligner
+
+ self.estimator_ = DistFromAligner(AlignerLuckyDtw(window=window))
+
+ @classmethod
+ def get_test_params(cls, parameter_set="default"):
+ """Return testing parameter settings for the estimator.
+
+ Parameters
+ ----------
+ parameter_set : str, default="default"
+ Name of the set of test parameters to return, for use in tests. If no
+ special parameters are defined for a value, will return `"default"` set.
+ There are currently no reserved values for distance/kernel transformers.
+
+ Returns
+ -------
+ params : dict or list of dict, default = {}
+ Parameters to create testing instances of the class
+ Each dict are parameters to construct an "interesting" test instance, i.e.,
+ `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+ `create_test_instance` uses the first (or only) dictionary in `params`
+ """
+ params0 = {}
+ params1 = {"window": 4}
+
+ return [params0, params1]
diff --git a/sktime/forecasting/base/adapters/_generalised_statsforecast.py b/sktime/forecasting/base/adapters/_generalised_statsforecast.py
index 7ac8db8dbf2..3055b7a0449 100644
--- a/sktime/forecasting/base/adapters/_generalised_statsforecast.py
+++ b/sktime/forecasting/base/adapters/_generalised_statsforecast.py
@@ -43,30 +43,77 @@ def __init__(self):
def _get_statsforecast_class(self):
raise NotImplementedError("abstract method")
- def _get_statsforecast_params(self):
+ def _get_statsforecast_params(self) -> dict:
return self.get_params()
def _get_init_statsforecast_params(self):
+ """Return parameters in __init__ statsforecast forecaster.
+
+ Return a list of parameters in the __init__ method from
+ the statsforecast forecaster class used in the sktime adapter.
+ """
statsforecast_class = self._get_statsforecast_class()
return list(signature(statsforecast_class.__init__).parameters.keys())
- def _get_validated_statsforecast_params(self):
- sktime_params = self._get_statsforecast_params()
- sktime_default_params = self.get_param_defaults().keys()
- statsforecast_params = self._get_init_statsforecast_params()
-
- for sktime_param in sktime_params.keys():
- if sktime_param not in statsforecast_params:
- sktime_params.pop(sktime_param)
- if sktime_param not in sktime_default_params:
- warn(
- f"Keyword argument '{sktime_param}' will be omitted as it is"
- f" not found in the __init__ method "
- f"from {self._get_statsforecast_class()}. "
- f"Check your statsforecast version"
- f"to find out the right API parameters."
- )
- return sktime_params
+ def _get_statsforecast_default_params(self) -> dict:
+ """Get default parameters for the statsforecast forecaster.
+
+ This will in general be different from self.get_param_defaults(),
+ as the set or names of inner parameters can differ.
+
+ For parameters without defaults, will use the parameter
+ of self instead.
+ """
+ self_params = self.get_params(deep=False)
+ self_default_params = self.get_param_defaults()
+ self_params.update(self_default_params)
+ cls_with_defaults = type(self)(**self_params)
+ return cls_with_defaults._get_statsforecast_params()
+
+ def _get_validated_statsforecast_params(self) -> dict:
+ """Return parameter dict with only parameters accepted by statsforecast API.
+
+ Checks if the parameters passed to the statsforecast forecaster
+ are valid in the __init__ method of the aforementioned forecaster.
+ If the parameter is not there it will just not be passed. Furthermore
+ if the parameter is modified by the sktime user,
+ he will be notified that the parameter does not exist
+ anymore in the version installed of statsforecast by the user.
+
+ """
+ params_sktime_to_statsforecast: dict = self._get_statsforecast_params()
+ params_sktime_to_statsforecast_default: dict = (
+ self._get_statsforecast_default_params()
+ )
+ statsforecast_init_params = set(self._get_init_statsforecast_params())
+
+ # Filter sktime_params to only include keys in statsforecast_params
+ filtered_sktime_params = {
+ key: value
+ for key, value in params_sktime_to_statsforecast.items()
+ if key in statsforecast_init_params
+ }
+
+ non_default_params = [
+ p
+ for p in params_sktime_to_statsforecast
+ if params_sktime_to_statsforecast[p]
+ != params_sktime_to_statsforecast_default[p]
+ ]
+ # Find parameters not in statsforecast_params or sktime_default_params
+ param_diff = set(non_default_params) - statsforecast_init_params
+
+ if param_diff:
+ params_str = ", ".join([f'"{param}"' for param in param_diff])
+ warning_message = (
+ f"Keyword arguments {params_str} "
+ f"will be omitted as they are not found in the __init__ method from "
+ f"{self._get_statsforecast_class()}. Check your statsforecast version "
+ f"to find out the right API parameters."
+ )
+ warn(warning_message)
+
+ return filtered_sktime_params
def _instantiate_model(self):
cls = self._get_statsforecast_class()
@@ -400,10 +447,17 @@ def __init__(self, estimator):
super().__init__()
self.estimator = estimator
+ self.prediction_intervals = None
def __repr__(self):
return "StatsForecastBackAdapter"
+ def new(self):
+ """Make new instance of back-adapter."""
+ _self = type(self).__new__(type(self))
+ _self.__dict__.update(self.__dict__)
+ return _self
+
def fit(self, y, X=None):
"""Fit to training data.
@@ -442,6 +496,12 @@ def predict(self, h, X=None, level=None):
mean = self.estimator.predict(fh=range(1, h + 1), X=X)[:, 0]
if level is None:
return {"mean": mean}
+ # if a level is passed, and if prediction_intervals has not been instantiated
+ # yet
+ elif self.prediction_intervals is None:
+ from statsforecast.utils import ConformalIntervals
+
+ self.prediction_intervals = ConformalIntervals(h=h)
level = sorted(level)
coverage = [round(1 - (_l / 100), 2) for _l in level]
diff --git a/sktime/forecasting/compose/_reduce.py b/sktime/forecasting/compose/_reduce.py
index 4ccc3fe9e30..1a3b5fa6795 100644
--- a/sktime/forecasting/compose/_reduce.py
+++ b/sktime/forecasting/compose/_reduce.py
@@ -2453,6 +2453,15 @@ class YfromX(BaseForecaster, _ReducerMixin):
>>> f.fit(y=y_train, X=X_train, fh=fh)
YfromX(...)
>>> y_pred = f.predict(X=X_test)
+
+ YfromX can also be used with skpro probabilistic regressors,
+ in this case the resulting forecaster will be capable of probabilistic forecasts:
+ >>> from skpro.regression.residual import ResidualDouble # doctest: +SKIP
+ >>> reg_proba = ResidualDouble(LinearRegression()) # doctest: +SKIP
+ >>> f = YfromX(reg_proba) # doctest: +SKIP
+ >>> f.fit(y=y_train, X=X_train, fh=fh) # doctest: +SKIP
+ YfromX(...)
+ >>> y_pred = f.predict_interval(X=X_test) # doctest: +SKIP
"""
_tags = {
@@ -2461,6 +2470,7 @@ class YfromX(BaseForecaster, _ReducerMixin):
"handles-missing-data": True,
"X_inner_mtype": ["pd.DataFrame", "pd-multiindex", "pd_multiindex_hier"],
"y_inner_mtype": ["pd.DataFrame", "pd-multiindex", "pd_multiindex_hier"],
+ "capability:pred_int": True,
}
def __init__(self, estimator, pooling="local"):
@@ -2468,6 +2478,23 @@ def __init__(self, estimator, pooling="local"):
self.pooling = pooling
super().__init__()
+ # self._est_type encodes information what type of estimator is passed
+ if hasattr(estimator, "get_tags"):
+ _est_type = estimator.get_tag("object_type", "regressor", False)
+ else:
+ _est_type = "regressor"
+
+ if _est_type not in ["regressor", "regressor_proba"]:
+ raise TypeError(
+ "error in YfromX, estimator must be either an sklearn compatible "
+ "regressor, or an skpro probabilistic regressor."
+ )
+
+ # has probabilistic mode iff the estimator is of type regressor_proba
+ self.set_tags(**{"capability:pred_int": _est_type == "regressor_proba"})
+
+ self._est_type = _est_type
+
if pooling == "local":
mtypes = "pd.DataFrame"
elif pooling == "global":
@@ -2505,17 +2532,27 @@ def _fit(self, y, X, fh):
-------
self : reference to self
"""
+ _est_type = self._est_type
+
if X is None:
from sklearn.dummy import DummyRegressor
+ if _est_type == "regressor":
+ estimator = DummyRegressor()
+ else: # "proba_regressor"
+ from skpro.regression.residual import ResidualDouble
+
+ dummy = DummyRegressor()
+ estimator = ResidualDouble(dummy)
+
X = _coerce_col_str(y)
- estimator = DummyRegressor()
else:
X = _coerce_col_str(X)
estimator = clone(self.estimator)
- y = _coerce_col_str(y)
- y = y.values.flatten()
+ if _est_type == "regressor":
+ y = _coerce_col_str(y)
+ y = y.values.flatten()
estimator.fit(X, y)
self.estimator_ = estimator
@@ -2541,7 +2578,173 @@ def _predict(self, X=None, fh=None):
y_pred : pd.DataFrame, same type as y in _fit
Point predictions
"""
+ _est_type = self._est_type
+
+ fh_idx = self._get_expected_pred_idx(fh=fh)
+
+ X_idx = self._get_pred_X(X=X, fh_idx=fh_idx)
+ y_pred = self.estimator_.predict(X_idx)
+
+ if _est_type == "regressor":
+ y_cols = self._y.columns
+ y_pred = pd.DataFrame(y_pred, index=fh_idx, columns=y_cols)
+
+ return y_pred
+
+ def _predict_quantiles(self, fh, X, alpha):
+ """Compute/return prediction quantiles for a forecast.
+
+ private _predict_quantiles containing the core logic,
+ called from predict_quantiles and possibly predict_interval
+
+ State required:
+ Requires state to be "fitted".
+
+ Accesses in self:
+ Fitted model attributes ending in "_"
+ self.cutoff
+
+ Parameters
+ ----------
+ fh : guaranteed to be ForecastingHorizon
+ The forecasting horizon with the steps ahead to to predict.
+ X : sktime time series object, optional (default=None)
+ guaranteed to be of an mtype in self.get_tag("X_inner_mtype")
+ Exogeneous time series for the forecast
+ alpha : list of float (guaranteed not None and floats in [0,1] interval)
+ A list of probabilities at which quantile forecasts are computed.
+
+ Returns
+ -------
+ quantiles : pd.DataFrame
+ Column has multi-index: first level is variable name from y in fit,
+ second level being the values of alpha passed to the function.
+ Row index is fh, with additional (upper) levels equal to instance levels,
+ from y seen in fit, if y_inner_mtype is Panel or Hierarchical.
+ Entries are quantile forecasts, for var in col index,
+ at quantile probability in second col index, for the row index.
+ """
+ fh_idx = self._get_expected_pred_idx(fh=fh)
+ X_idx = self._get_pred_X(X=X, fh_idx=fh_idx)
+ y_pred = self.estimator_.predict_quantiles(X_idx, alpha=alpha)
+ return y_pred
+
+ def _predict_interval(self, fh, X, coverage):
+ """Compute/return prediction quantiles for a forecast.
+
+ private _predict_interval containing the core logic,
+ called from predict_interval and possibly predict_quantiles
+
+ State required:
+ Requires state to be "fitted".
+
+ Accesses in self:
+ Fitted model attributes ending in "_"
+ self.cutoff
+
+ Parameters
+ ----------
+ fh : guaranteed to be ForecastingHorizon
+ The forecasting horizon with the steps ahead to to predict.
+ X : sktime time series object, optional (default=None)
+ guaranteed to be of an mtype in self.get_tag("X_inner_mtype")
+ Exogeneous time series for the forecast
+ coverage : list of float (guaranteed not None and floats in [0,1] interval)
+ nominal coverage(s) of predictive interval(s)
+
+ Returns
+ -------
+ pred_int : pd.DataFrame
+ Column has multi-index: first level is variable name from y in fit,
+ second level coverage fractions for which intervals were computed.
+ in the same order as in input `coverage`.
+ Third level is string "lower" or "upper", for lower/upper interval end.
+ Row index is fh, with additional (upper) levels equal to instance levels,
+ from y seen in fit, if y_inner_mtype is Panel or Hierarchical.
+ Entries are forecasts of lower/upper interval end,
+ for var in col index, at nominal coverage in second col index,
+ lower/upper depending on third col index, for the row index.
+ Upper/lower interval end forecasts are equivalent to
+ quantile forecasts at alpha = 0.5 - c/2, 0.5 + c/2 for c in coverage.
+ """
+ fh_idx = self._get_expected_pred_idx(fh=fh)
+ X_idx = self._get_pred_X(X=X, fh_idx=fh_idx)
+ y_pred = self.estimator_.predict_interval(X_idx, coverage=coverage)
+ return y_pred
+
+ def _predict_var(self, fh, X=None, cov=False):
+ """Forecast variance at future horizon.
+
+ private _predict_var containing the core logic, called from predict_var
+
+ Parameters
+ ----------
+ fh : guaranteed to be ForecastingHorizon or None, optional (default=None)
+ The forecasting horizon with the steps ahead to to predict.
+ If not passed in _fit, guaranteed to be passed here
+ X : sktime time series object, optional (default=None)
+ guaranteed to be of an mtype in self.get_tag("X_inner_mtype")
+ Exogeneous time series for the forecast
+ cov : bool, optional (default=False)
+ if True, computes covariance matrix forecast.
+ if False, computes marginal variance forecasts.
+
+ Returns
+ -------
+ pred_var : pd.DataFrame, format dependent on `cov` variable
+ If cov=False:
+ Column names are exactly those of `y` passed in `fit`/`update`.
+ For nameless formats, column index will be a RangeIndex.
+ Row index is fh, with additional levels equal to instance levels,
+ from y seen in fit, if y_inner_mtype is Panel or Hierarchical.
+ Entries are variance forecasts, for var in col index.
+ A variance forecast for given variable and fh index is a predicted
+ variance for that variable and index, given observed data.
+ If cov=True:
+ Column index is a multiindex: 1st level is variable names (as above)
+ 2nd level is fh.
+ Row index is fh, with additional levels equal to instance levels,
+ from y seen in fit, if y_inner_mtype is Panel or Hierarchical.
+ Entries are (co-)variance forecasts, for var in col index, and
+ covariance between time index in row and col.
+ Note: no covariance forecasts are returned between different variables.
+ """
+ fh_idx = self._get_expected_pred_idx(fh=fh)
+ X_idx = self._get_pred_X(X=X, fh_idx=fh_idx)
+ y_pred = self.estimator_.predict_var(X_idx)
+ return y_pred
+
+ def _predict_proba(self, fh, X, marginal=True):
+ """Compute/return fully probabilistic forecasts.
+
+ private _predict_proba containing the core logic, called from predict_proba
+
+ Parameters
+ ----------
+ fh : int, list, np.array or ForecastingHorizon (not optional)
+ The forecasting horizon encoding the time stamps to forecast at.
+ if has not been passed in fit, must be passed, not optional
+ X : sktime time series object, optional (default=None)
+ Exogeneous time series for the forecast
+ Should be of same scitype (Series, Panel, or Hierarchical) as y in fit
+ if self.get_tag("X-y-must-have-same-index"),
+ X.index must contain fh.index and y.index both
+ marginal : bool, optional (default=True)
+ whether returned distribution is marginal by time index
+
+ Returns
+ -------
+ pred_dist : sktime BaseDistribution
+ predictive distribution
+ if marginal=True, will be marginal distribution by time point
+ if marginal=False and implemented by method, will be joint
+ """
fh_idx = self._get_expected_pred_idx(fh=fh)
+ X_idx = self._get_pred_X(X=X, fh_idx=fh_idx)
+ y_pred = self.estimator_.predict_proba(X_idx)
+ return y_pred
+
+ def _get_pred_X(self, X, fh_idx):
y_cols = self._y.columns
if X is not None and self._X is not None:
@@ -2556,11 +2759,7 @@ def _predict(self, X=None, fh=None):
X_pool = _coerce_col_str(X_pool)
X_idx = X_pool.loc[fh_idx]
-
- y_pred = self.estimator_.predict(X_idx)
- y_pred = pd.DataFrame(y_pred, index=fh_idx, columns=y_cols)
-
- return y_pred
+ return X_idx
@classmethod
def get_test_params(cls, parameter_set="default"):
@@ -2583,6 +2782,8 @@ def get_test_params(cls, parameter_set="default"):
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
+ from sktime.utils.validation._dependencies import _check_soft_dependencies
+
params1 = {
"estimator": LinearRegression(),
"pooling": "local",
@@ -2593,4 +2794,15 @@ def get_test_params(cls, parameter_set="default"):
"pooling": "global", # all internal mtypes are tested across scenarios
}
- return [params1, params2]
+ params = [params1, params2]
+
+ if _check_soft_dependencies("skpro", severity="none"):
+ from skpro.regression.residual import ResidualDouble
+
+ params3 = {
+ "estimator": ResidualDouble.create_test_instance(),
+ "pooling": "global",
+ }
+ params = params + [params3]
+
+ return params
diff --git a/sktime/forecasting/statsforecast.py b/sktime/forecasting/statsforecast.py
index e1a639f14dc..21493ab634d 100644
--- a/sktime/forecasting/statsforecast.py
+++ b/sktime/forecasting/statsforecast.py
@@ -595,6 +595,12 @@ class StatsForecastMSTL(_GeneralisedStatsForecastAdapter):
trend_forecaster : estimator, optional, default=StatsForecastAutoETS()
Sktime estimator used to make univariate forecasts. Multivariate estimators are
not supported.
+ stl_kwargs : dict, optional
+ Extra arguments to pass to [`statsmodels.tsa.seasonal.STL`]
+ (https://www.statsmodels.org/dev/generated/statsmodels.tsa.seasonal.STL.html#statsmodels.tsa.seasonal.STL).
+ The `period` and `seasonal` arguments are reserved.
+ pred_int_kwargs : dict, optional
+ Extra arguments to pass to [`statsforecast.utils.ConformalIntervals`].
References
----------
@@ -623,6 +629,8 @@ def __init__(
self,
season_length: Union[int, List[int]],
trend_forecaster=None,
+ stl_kwargs: Optional[Dict] = None,
+ pred_int_kwargs: Optional[Dict] = None,
):
super().__init__()
@@ -634,6 +642,8 @@ def __init__(
self._trend_forecaster = clone(trend_forecaster)
else:
self._trend_forecaster = StatsForecastAutoETS(model="ZZN")
+ self.stl_kwargs = stl_kwargs
+ self.pred_int_kwargs = pred_int_kwargs
# checks if trend_forecaster is already wrapped with
# StatsForecastBackAdapter
@@ -650,6 +660,14 @@ def __init__(
"forecaster."
)
+ # check if prediction interval kwargs are passed
+ if self.pred_int_kwargs:
+ from statsforecast.utils import ConformalIntervals
+
+ self._trend_forecaster.prediction_intervals = ConformalIntervals(
+ **self.pred_int_kwargs
+ )
+
def _get_statsforecast_class(self):
from statsforecast.models import MSTL
@@ -695,6 +713,12 @@ def get_test_params(cls, parameter_set="default"):
{
"season_length": 4,
},
+ {
+ "season_length": 4,
+ "pred_int_kwargs": {
+ "n_windows": 2,
+ },
+ },
]
except ModuleNotFoundError:
from sktime.forecasting.naive import NaiveForecaster
diff --git a/sktime/forecasting/tests/test_all_forecasters.py b/sktime/forecasting/tests/test_all_forecasters.py
index 8916dbd956f..8141ce25675 100644
--- a/sktime/forecasting/tests/test_all_forecasters.py
+++ b/sktime/forecasting/tests/test_all_forecasters.py
@@ -515,9 +515,10 @@ def test_predict_quantiles(self, estimator_instance, n_columns, fh_int_oos, alph
estimator_instance.predict_quantiles(fh=fh_int_oos, alpha=alpha)
def _check_predict_proba(self, pred_dist, y_train, fh_int):
- from sktime.proba.base import BaseDistribution
+ assert hasattr(pred_dist, "get_tag")
+ obj_type = pred_dist.get_tag("object_type", None, False)
+ assert obj_type == "distribution"
- assert isinstance(pred_dist, BaseDistribution)
pred_cols = pred_dist.columns
pred_index = pred_dist.index
diff --git a/sktime/tests/test_all_estimators.py b/sktime/tests/test_all_estimators.py
index 82f20ef0a32..166a7275e37 100644
--- a/sktime/tests/test_all_estimators.py
+++ b/sktime/tests/test_all_estimators.py
@@ -38,6 +38,7 @@
VALID_ESTIMATOR_TYPES,
VALID_TRANSFORMER_TYPES,
)
+from sktime.tests.test_switch import run_test_for_class
from sktime.utils._testing._conditional_fixtures import (
create_conditional_fixtures_and_names,
)
@@ -50,12 +51,10 @@
_list_required_methods,
)
from sktime.utils._testing.scenarios_getter import retrieve_scenarios
-from sktime.utils.git_diff import is_class_changed
from sktime.utils.random_state import set_random_state
from sktime.utils.sampling import random_partition
from sktime.utils.validation._dependencies import (
_check_dl_dependencies,
- _check_estimator_deps,
_check_soft_dependencies,
)
@@ -220,10 +219,11 @@ def _all_estimators(self):
if MATRIXDESIGN:
est_list = subsample_by_version_os(est_list)
- # this setting ensures that only estimators are tested that have changed
- # in the sense that any line in the module is different from main
- if ONLY_CHANGED_MODULES:
- est_list = [est for est in est_list if is_class_changed(est)]
+ # run_test_for_class selects the estimators to run
+ # based on whether they have changed, and whether they have all dependencies
+ # internally, uses the ONLY_CHANGED_MODULES flag,
+ # and checks the python env against python_dependencies tag
+ est_list = [est for est in est_list if run_test_for_class(est)]
return est_list
@@ -274,13 +274,6 @@ def _generate_estimator_class(self, test_name, **kwargs):
if not self.is_excluded(test_name, est)
]
- # exclude classes based on python version compatibility
- estimator_classes_to_test = [
- est
- for est in estimator_classes_to_test
- if _check_estimator_deps(est, severity="none")
- ]
-
estimator_names = [est.__name__ for est in estimator_classes_to_test]
return estimator_classes_to_test, estimator_names
diff --git a/sktime/tests/test_switch.py b/sktime/tests/test_switch.py
index 84f9062bd12..1daf04cd692 100644
--- a/sktime/tests/test_switch.py
+++ b/sktime/tests/test_switch.py
@@ -3,6 +3,8 @@
__author__ = ["fkiraly"]
+from inspect import getmro, isclass
+
def run_test_for_class(cls):
"""Check if test should run for a class or function.
@@ -41,11 +43,25 @@ class for which to determine whether it should be tested
from sktime.utils.validation._dependencies import _check_estimator_deps
def _required_deps_present(obj):
+ """Check if all required soft dependencies are present, return bool."""
if hasattr(obj, "get_class_tag"):
return _check_estimator_deps(obj, severity="none")
else:
return True
+ def _is_class_changed_or_sktime_parents(cls):
+ """Check if class or any of its sktime parents have changed, return bool."""
+ # if cls is a function, not a class, default to is_class_changed
+ if not isclass(cls):
+ return is_class_changed(cls)
+
+ # now we know cls is a class, so has an mro
+ cls_and_parents = getmro(cls)
+ cls_and_sktime_parents = [
+ x for x in cls_and_parents if x.__module__.startswith("sktime")
+ ]
+ return any(is_class_changed(x) for x in cls_and_sktime_parents)
+
# if any of the required soft dependencies are not present, do not run the test
if not all(_required_deps_present(x) for x in cls):
return False
@@ -53,7 +69,7 @@ def _required_deps_present(obj):
# if ONLY_CHANGED_MODULES is on, run the test if and only if
# any of the modules containing any of the classes in the list have changed
if ONLY_CHANGED_MODULES:
- return any(is_class_changed(x) for x in cls)
+ return any(_is_class_changed_or_sktime_parents(x) for x in cls)
# otherwise
# i.e., dependencies are present, and differential testing is disabled