Skip to content

Commit

Permalink
Version without loops (bad, N^2 memory!).
Browse files Browse the repository at this point in the history
  • Loading branch information
vnmabus committed May 23, 2022
1 parent 14e293e commit a599ce3
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 24 deletions.
52 changes: 29 additions & 23 deletions dcor/_fast_dcov_avl.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ def _dyad_update(
"""
n = y.shape[0]
s[...] = 0
exps2 = 2 ** np.arange(l_max)

y_col = np.expand_dims(y, axis=1)
y_col = y[:, np.newaxis]

# Step 3.a: update s(l, k)
positions_3a = np.ceil(y_col / exps2).astype(np.int64) - 1
Expand Down Expand Up @@ -95,28 +96,30 @@ def _dyad_update_compiled_version(
"""
n = y.shape[0]
exps2 = 2 ** np.arange(l_max)

y_col = np.expand_dims(y, axis=1)

# Step 3.a: update s(l, k)
positions_3a = np.ceil(y_col / exps2).astype(np.int64) - 1
positions_3a[:, 1:] += pos_sums[:-1]

# Steps 3.b and 3.c
positions_3b = np.floor((y_col - 1) / exps2).astype(np.int64)
valid_positions = (positions_3b / 2 > np.floor(positions_3b / 2))
positions_3b -= 1
positions_3b[:, 1:] += pos_sums[:-1]
s[...] = 0

# Step 3: iteration
for i in range(1, n):

# Step 3.a: update s(l, k)
s[positions_3a[i - 1]] += c[i - 1]
for l in range(l_max):
k = int(math.ceil(y[i - 1] / 2 ** l))
pos = k - 1

if l > 0:
pos += pos_sums[l - 1]

s[pos] += c[i - 1]

# Steps 3.b and 3.c
gamma[i] += np.sum(s[positions_3b[i][valid_positions[i]]])
for l in range(l_max):
k = int(math.floor((y[i] - 1) / 2 ** l))
if k / 2 > math.floor(k / 2):
pos = k - 1
if l > 0:
pos += pos_sums[l - 1]

gamma[i] = gamma[i] + s[pos]

return gamma

Expand Down Expand Up @@ -263,7 +266,7 @@ def _distance_covariance_sqr_avl_impl(
sy_c[i],
c_sum[i],
l_max,
s[i],
s,
pos_sums,
gamma[i],
) for i in range(4)
Expand Down Expand Up @@ -308,7 +311,7 @@ def _distance_covariance_sqr_avl_impl(
float64[:, :],
float64[:],
int64,
float64[:, :],
float64[:],
int64[:],
float64[:, :],
),
Expand All @@ -318,7 +321,11 @@ def _distance_covariance_sqr_avl_impl(
)


def _get_impl_args(x, y, unbiased=False):
def _get_impl_args(
x: np.typing.NDArray[np.float64],
y: np.typing.NDArray[np.float64],
unbiased: bool = False,
):
"""
Get the parameters used in the algorithm.
"""
Expand Down Expand Up @@ -374,7 +381,7 @@ def _get_impl_args(x, y, unbiased=False):
l_max = int(math.ceil(np.log2(n)))

s_len = 2 ** (l_max + 1)
s = np.zeros(c.shape[:-1] + (s_len,), dtype=c.dtype)
s = np.empty(s_len, dtype=c.dtype)

pos_sums = np.arange(l_max, dtype=np.int64)
pos_sums[:] = 2 ** (l_max - pos_sums)
Expand Down Expand Up @@ -417,7 +424,7 @@ def _get_impl_args(x, y, unbiased=False):
float64[:, :],
float64[:],
int64,
float64[:, :],
float64[:],
int64[:],
float64[:, :],
))(input_array, input_array, boolean),
Expand All @@ -435,7 +442,7 @@ def _get_impl_args(x, y, unbiased=False):
),
CompileMode.COMPILE_CPU: (
(_get_impl_args_compiled, _distance_covariance_sqr_avl_impl_compiled),
)
),
}


Expand All @@ -448,7 +455,6 @@ def _distance_covariance_sqr_avl_generic(
compile_mode: CompileMode = CompileMode.AUTO,
) -> T:
"""Fast algorithm for the squared distance covariance."""

if exponent != 1:
raise ValueError(f"Exponent should be 1 but is {exponent} instead.")

Expand Down
25 changes: 24 additions & 1 deletion docs/performance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ resulting graph.
n_times = 100
n_samples_list = [10, 50, 100, 500, 1000]
avl_times = np.zeros(len(n_samples_list))
avl_uncompiled_times = np.zeros(len(n_samples_list))
mergesort_times = np.zeros(len(n_samples_list))
naive_times = np.zeros(len(n_samples_list))

Expand All @@ -46,13 +47,22 @@ resulting graph.
def avl():
return dcor.distance_covariance(x, y, method='AVL')

def avl_uncompiled():
return dcor.distance_covariance(
x,
y,
method='AVL',
compile_mode=dcor.CompileMode.NO_COMPILE,
)

def mergesort():
return dcor.distance_covariance(x, y, method='MERGESORT')

def naive():
return dcor.distance_covariance(x, y, method='NAIVE')

avl_times[i] = timeit(avl, number=n_times)
avl_uncompiled_times[i] = timeit(avl_uncompiled, number=n_times)
mergesort_times[i] = timeit(mergesort, number=n_times)
naive_times[i] = timeit(naive, number=n_times)

Expand All @@ -66,7 +76,20 @@ resulting graph.
plt.show()

We can see that the performance of the fast methods is much better than
the performance of the naive algorithm. In order to see the differences
the performance of the naive algorithm. We can also check the difference made by the
compilation with Numba:

.. jupyter-execute::

plt.title("Distance covariance performance comparison")
plt.xlabel("Number of samples")
plt.ylabel("Time (seconds)")
plt.plot(n_samples_list, avl_times, label="avl")
plt.plot(n_samples_list, avl_uncompiled_times, label="avl (uncompiled)")
plt.legend()
plt.show()

In order to see the differences
between the two fast methods, we will again compute them with more
samples. The large sample sizes used here could not be used with the naive
algorithm, as its used memory also grows quadratically.
Expand Down

0 comments on commit a599ce3

Please sign in to comment.