From 48b52c25f9935fc1f7608bb43bbd9a64a1bd6b25 Mon Sep 17 00:00:00 2001 From: Nate Pope Date: Tue, 27 May 2025 19:50:53 -0700 Subject: [PATCH] Fix bug in pair coal counts when window breakpoint is in missing interval --- c/tskit/trees.c | 3 ++- python/CHANGELOG.rst | 6 ++++++ python/tests/test_coalrate.py | 28 ++++++++++++++++++++++++++-- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/c/tskit/trees.c b/c/tskit/trees.c index 4cf479c31f..4638fb18e8 100644 --- a/c/tskit/trees.c +++ b/c/tskit/trees.c @@ -9586,8 +9586,9 @@ tsk_treeseq_pair_coalescence_stat(const tsk_treeseq_t *self, tsk_size_t num_samp window_span = windows[w + 1] - windows[w] - missing_span; missing_span = 0.0; if (num_edges == 0) { + /* missing interval, so remove overcounted missing span */ remaining_span = right - windows[w + 1]; - window_span -= remaining_span; + window_span += remaining_span; missing_span += remaining_span; } for (i = 0; i < (tsk_id_t) num_set_indexes; i++) { diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index 5225fd0e30..2baf0cc1c3 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -7,6 +7,12 @@ - ``TreeSequence.map_to_vcf_model`` now also returns the transformed positions and contig length. (:user:`benjeffery`, :pr:`XXXX`, :issue:`3173`) +**Bugfixes** + +- Fix bug in ``TreeSequence.pair_coalescence_counts`` when ``span_normalise=True`` + and a window breakpoint falls within an internal missing interval. + (:user:`nspope`, :pr:`3176`, :issue:`3175`) + -------------------- [0.6.4] - 2025-05-21 -------------------- diff --git a/python/tests/test_coalrate.py b/python/tests/test_coalrate.py index 017a5f70bb..9d869bd8d0 100644 --- a/python/tests/test_coalrate.py +++ b/python/tests/test_coalrate.py @@ -1294,9 +1294,9 @@ def test_span_normalise(self): proto = proto_pair_coalescence_counts(ts, windows=windows, span_normalise=False) np.testing.assert_allclose(proto, check) - def test_span_normalise_with_missing(self): + def test_span_normalise_with_missing_flanks(self): """ - test case where span is normalised and there are intervals without trees + test case where span is normalised and there are flanking intervals without trees """ ts = self.example_ts() missing = np.array([[0.0, 0.1], [0.8, 1.0]]) * ts.sequence_length @@ -1313,6 +1313,30 @@ def test_span_normalise_with_missing(self): proto = proto_pair_coalescence_counts(ts, windows=windows, span_normalise=True) np.testing.assert_allclose(proto, check) + def test_span_normalise_with_missing_interior(self): + """ + test that span normalisation correctly calculates internal missing data + """ + ts = msprime.sim_ancestry(samples=1, discrete_genome=False) + missing_interval = np.array([[0.3, 0.6]]) * ts.sequence_length + windows = np.array([0.0, 0.31, 1.0]) * ts.sequence_length + time_windows = np.array([0.0, np.inf]) + ts = ts.delete_intervals(missing_interval) + check = np.ones(windows.size - 1) + implm = ts.pair_coalescence_counts( + windows=windows, + time_windows=time_windows, + span_normalise=True, + ).flatten() + np.testing.assert_array_almost_equal(implm, check) + proto = proto_pair_coalescence_counts( + ts, + windows=windows, + time_windows=time_windows, + span_normalise=True, + ).flatten() + np.testing.assert_array_almost_equal(proto, check) + def test_empty_windows(self): """ test that windows without nodes contain zeros