Skip to content

Commit

Permalink
Add tests for array API with dcor functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
vnmabus committed May 20, 2022
1 parent 65ce6a0 commit 8d70969
Show file tree
Hide file tree
Showing 6 changed files with 858 additions and 498 deletions.
43 changes: 29 additions & 14 deletions dcor/_dcor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,17 @@

from dataclasses import astuple, dataclass
from enum import Enum
from typing import TYPE_CHECKING, Any, Callable, Generic, Iterator, TypeVar
from typing import (
TYPE_CHECKING,
Any,
Callable,
Generic,
Iterator,
TypeVar,
Union,
)

import numpy as np

from dcor._dcor_internals import _af_inv_scaled

Expand Down Expand Up @@ -157,7 +167,9 @@ def _dispatch(
exponent: float,
**kwargs: Any,
) -> Any:
if _can_use_fast_algorithm(x, y, exponent):
xp = get_namespace(x, y)

if xp == np and _can_use_fast_algorithm(x, y, exponent):
return getattr(DistanceCovarianceMethod.AVL.value, method)(
x,
y,
Expand Down Expand Up @@ -431,8 +443,11 @@ def __repr__(self) -> str:
return '%s.%s' % (self.__class__.__name__, self.name)


DistanceCovarianceMethodLike = Union[DistanceCovarianceMethod, str]


def _to_algorithm(
algorithm: DistanceCovarianceMethod | str,
algorithm: DistanceCovarianceMethodLike,
) -> DistanceCovarianceMethod:
"""Convert to algorithm if string."""
if isinstance(algorithm, DistanceCovarianceMethod):
Expand All @@ -446,7 +461,7 @@ def distance_covariance_sqr(
y: T,
*,
exponent: float = 1,
method: DistanceCovarianceMethod | str = DistanceCovarianceMethod.AUTO,
method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO,
compile_mode: CompileMode = CompileMode.AUTO,
) -> T:
"""
Expand Down Expand Up @@ -506,7 +521,7 @@ def u_distance_covariance_sqr(
y: T,
*,
exponent: float = 1,
method: DistanceCovarianceMethod = DistanceCovarianceMethod.AUTO,
method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO,
compile_mode: CompileMode = CompileMode.AUTO,
) -> T:
"""
Expand Down Expand Up @@ -566,7 +581,7 @@ def distance_covariance(
y: T,
*,
exponent: float = 1,
method: DistanceCovarianceMethod | str = DistanceCovarianceMethod.AUTO,
method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO,
compile_mode: CompileMode = CompileMode.AUTO,
) -> T:
"""
Expand Down Expand Up @@ -627,7 +642,7 @@ def distance_stats_sqr(
y: T,
*,
exponent: float = 1,
method: DistanceCovarianceMethod | str = DistanceCovarianceMethod.AUTO,
method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO,
compile_mode: CompileMode = CompileMode.AUTO,
) -> Stats[T]:
"""
Expand Down Expand Up @@ -703,7 +718,7 @@ def u_distance_stats_sqr(
y: T,
*,
exponent: float = 1,
method: DistanceCovarianceMethod | str = DistanceCovarianceMethod.AUTO,
method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO,
compile_mode: CompileMode = CompileMode.AUTO,
) -> T:
"""
Expand Down Expand Up @@ -782,7 +797,7 @@ def distance_stats(
y: T,
*,
exponent: float = 1,
method: DistanceCovarianceMethod | str = DistanceCovarianceMethod.AUTO,
method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO,
compile_mode: CompileMode = CompileMode.AUTO,
) -> Stats[T]:
"""
Expand Down Expand Up @@ -863,7 +878,7 @@ def distance_correlation_sqr(
y: T,
*,
exponent: float = 1,
method: DistanceCovarianceMethod | str = DistanceCovarianceMethod.AUTO,
method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO,
compile_mode: CompileMode = CompileMode.AUTO,
) -> T:
"""
Expand Down Expand Up @@ -923,7 +938,7 @@ def u_distance_correlation_sqr(
y: T,
*,
exponent: float = 1,
method: DistanceCovarianceMethod | str = DistanceCovarianceMethod.AUTO,
method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO,
compile_mode: CompileMode = CompileMode.AUTO,
) -> T:
"""
Expand Down Expand Up @@ -985,7 +1000,7 @@ def distance_correlation(
y: T,
*,
exponent: float = 1,
method: DistanceCovarianceMethod | str = DistanceCovarianceMethod.AUTO,
method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO,
compile_mode: CompileMode = CompileMode.AUTO,
) -> T:
"""
Expand Down Expand Up @@ -1044,7 +1059,7 @@ def distance_correlation(
def distance_correlation_af_inv_sqr(
x: T,
y: T,
method: DistanceCovarianceMethod | str = DistanceCovarianceMethod.AUTO,
method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO,
compile_mode: CompileMode = CompileMode.AUTO,
) -> T:
"""
Expand Down Expand Up @@ -1114,7 +1129,7 @@ def distance_correlation_af_inv_sqr(
def distance_correlation_af_inv(
x: T,
y: T,
method: DistanceCovarianceMethod | str = DistanceCovarianceMethod.AUTO,
method: DistanceCovarianceMethodLike = DistanceCovarianceMethod.AUTO,
compile_mode: CompileMode = CompileMode.AUTO,
) -> T:
"""
Expand Down
49 changes: 29 additions & 20 deletions dcor/_fast_dcov_avl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
import math
import warnings

from numba import float64, int64, boolean
import numba
from numba.types import Tuple, Array

import numpy as np
from numba import boolean, float64, int64
from numba.types import Array, Tuple

from ._utils import CompileMode
from dcor._utils import get_namespace

from ._utils import CompileMode

input_array = Array(float64, 1, 'A', readonly=True)

Expand Down Expand Up @@ -268,14 +268,16 @@ def _get_impl_args(x, y, unbiased=False):


impls_dict = {
CompileMode.AUTO: ((_get_impl_args_compiled,
_distance_covariance_sqr_avl_impl_compiled),
(_get_impl_args,
_distance_covariance_sqr_avl_impl)),
CompileMode.NO_COMPILE: ((_get_impl_args,
_distance_covariance_sqr_avl_impl),),
CompileMode.COMPILE_CPU: ((_get_impl_args_compiled,
_distance_covariance_sqr_avl_impl_compiled),)
CompileMode.AUTO: (
(_get_impl_args_compiled, _distance_covariance_sqr_avl_impl_compiled),
(_get_impl_args, _distance_covariance_sqr_avl_impl),
),
CompileMode.NO_COMPILE: (
(_get_impl_args, _distance_covariance_sqr_avl_impl),
),
CompileMode.COMPILE_CPU: (
(_get_impl_args_compiled, _distance_covariance_sqr_avl_impl_compiled),
)
}


Expand All @@ -286,8 +288,12 @@ def _distance_covariance_sqr_avl_generic(
if exponent != 1:
raise ValueError(f"Exponent should be 1 but is {exponent} instead.")

x = np.asarray(x)
y = np.asarray(y)
xp = get_namespace(x, y)
x = xp.asarray(x)
y = xp.asarray(y)

if xp is not np:
raise ValueError("AVL method is only implemented for NumPy arrays.")

assert 1 <= x.ndim <= 2
if x.ndim == 2:
Expand All @@ -301,7 +307,8 @@ def _distance_covariance_sqr_avl_generic(

if compile_mode not in impls_dict:
raise NotImplementedError(
f"Compile mode {compile_mode} not implemented.")
f"Compile mode {compile_mode} not implemented.",
)

for get_args, impl in impls_dict[compile_mode]:

Expand All @@ -314,11 +321,13 @@ def _distance_covariance_sqr_avl_generic(
if compile_mode is not CompileMode.AUTO:
raise e

warnings.warn(f"Falling back to uncompiled AVL fast distance "
f"covariance because of TypeError exception "
f"raised: {e}. Rembember: only floating point "
f"values can be used in the compiled "
f"implementations.")
warnings.warn(
f"Falling back to uncompiled AVL fast distance "
f"covariance because of TypeError exception "
f"raised: {e}. Rembember: only floating point "
f"values can be used in the compiled "
f"implementations.",
)


def _generate_rowwise_internal(target):
Expand Down
35 changes: 21 additions & 14 deletions dcor/homogeneity.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,17 @@

from typing import Callable, Sequence, TypeVar

import numpy as _np

from . import _energy, _hypothesis, distances as _distances
from ._energy import EstimationStatistic, EstimationStatisticLike
from ._hypothesis import HypothesisTest
import numpy as np

from . import distances as _distances
from ._energy import (
EstimationStatistic,
EstimationStatisticLike,
_check_valid_energy_exponent,
_energy_distance_from_distance_matrices,
energy_distance,
)
from ._hypothesis import HypothesisTest, _permutation_test_with_sym_matrix
from ._utils import ArrayType, RandomLike, _transform_to_2d, get_namespace

T = TypeVar("T", bound=ArrayType)
Expand All @@ -37,7 +43,7 @@ def _energy_test_statistic_from_distance_matrices(
estimation_stat: EstimationStatisticLike = EstimationStatistic.V_STATISTIC,
) -> T:
"""Test statistic with precomputed distance matrices."""
energy_distance = _energy._energy_distance_from_distance_matrices(
energy_distance = _energy_distance_from_distance_matrices(
distance_xx=distance_xx,
distance_yy=distance_yy,
distance_xy=distance_xy,
Expand Down Expand Up @@ -111,7 +117,7 @@ def energy_test_statistic(

coefficient = _energy_test_statistic_coefficient(n, m)

return coefficient * _energy.energy_distance(
return coefficient * energy_distance(
x,
y,
exponent=exponent,
Expand Down Expand Up @@ -194,6 +200,7 @@ def energy_test(
distance using Hoeffding's unbiased U-statistics. Otherwise, use
von Mises's biased V-statistics. If this is provided as a string,
it will first be converted to an EstimationStatistic enum instance.
n_jobs: Number of jobs executed in parallel by Joblib.
Returns:
Results of the hypothesis test.
Expand Down Expand Up @@ -230,16 +237,16 @@ def energy_test(
A different exponent for the Euclidean distance in the range
:math:`(0, 2)` can be used:
>>> dcor.homogeneity.energy_test(a, b, exponent=1.5) # doctest: +ELLIPSIS
>>> dcor.homogeneity.energy_test(a, b, exponent=1.5)
... # doctest: +ELLIPSIS
HypothesisTest(pvalue=1.0, statistic=171.0623923...)
"""

samples = [_transform_to_2d(a) for a in args]

num_samples = len(samples)

_energy._check_valid_energy_exponent(exponent)
_check_valid_energy_exponent(exponent)

sample_sizes = tuple(a.shape[0] for a in samples)

Expand All @@ -249,11 +256,11 @@ def energy_test(
try:
concat = xp.concat
except AttributeError:
concat = _np.concatenate
concat = np.concatenate
pooled_samples = concat(samples)

sample_indexes_array = _np.zeros(num_samples, dtype=int)
sample_indexes_array[1:] = _np.cumsum(sample_sizes)[:-1]
sample_indexes_array = np.zeros(num_samples, dtype=int)
sample_indexes_array[1:] = np.cumsum(sample_sizes)[:-1]
sample_indexes = tuple(sample_indexes_array)

# Compute the distance matrix once
Expand All @@ -272,7 +279,7 @@ def statistic_function(distance_matrix: T) -> T:
estimation_stat=estimation_stat,
)

return _hypothesis._permutation_test_with_sym_matrix(
return _permutation_test_with_sym_matrix(
sample_distances,
statistic_function=statistic_function,
num_resamples=num_resamples,
Expand Down

0 comments on commit 8d70969

Please sign in to comment.