Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -4595,6 +4595,31 @@ test_unequal_samples_kc(void)
tsk_tree_free(&other_t);
}

static void
test_unary_nodes_kc(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 2 0";
const char *edges = "0 1 2 0,1\n"
"0 1 3 2";
tsk_treeseq_t ts;
tsk_tree_t t;
int ret;
double result = 0;

tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, 1);
ret = tsk_tree_kc_distance(&t, &t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNARY_NODES);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}

/*=======================================================
* Miscellaneous tests.
*======================================================*/
Expand Down Expand Up @@ -5306,6 +5331,7 @@ main(int argc, char **argv)
{ "test_internal_samples_kc", test_internal_samples_kc },
{ "test_unequal_sample_size_kc", test_unequal_sample_size_kc },
{ "test_unequal_samples_kc", test_unequal_samples_kc },
{ "test_unary_nodes_kc", test_unary_nodes_kc },

/* Misc */
{ "test_tree_errors", test_tree_errors },
Expand Down
3 changes: 3 additions & 0 deletions c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,9 @@ tsk_strerror_internal(int err)
case TSK_ERR_MULTIPLE_ROOTS:
ret = "Trees with multiple roots not supported.";
break;
case TSK_ERR_UNARY_NODES:
ret = "Unsimplified trees with unary nodes are not supported.";
break;

/* Haplotype matching errors */
case TSK_ERR_NULL_VITERBI_MATRIX:
Expand Down
1 change: 1 addition & 0 deletions c/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ the file.
#define TSK_ERR_SAMPLES_NOT_EQUAL -1201
#define TSK_ERR_INTERNAL_SAMPLES -1202
#define TSK_ERR_MULTIPLE_ROOTS -1203
#define TSK_ERR_UNARY_NODES -1204

/* Haplotype matching errors */
#define TSK_ERR_NULL_VITERBI_MATRIX -1300
Expand Down
15 changes: 14 additions & 1 deletion c/tskit/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -4593,8 +4593,9 @@ norm_kc_vectors(kc_vectors *self, kc_vectors *other, double lambda)
int
tsk_tree_kc_distance(tsk_tree_t *self, tsk_tree_t *other, double lambda, double *result)
{
tsk_id_t u, n, i;
tsk_id_t u, n, num_nodes, i, left_child;
kc_vectors vecs[2];
tsk_tree_t *t;
tsk_tree_t *trees[2] = { self, other };
const tsk_id_t *samples = self->tree_sequence->samples;
const tsk_id_t *other_samples = other->tree_sequence->samples;
Expand All @@ -4617,6 +4618,18 @@ tsk_tree_kc_distance(tsk_tree_t *self, tsk_tree_t *other, double lambda, double
goto out;
}

for (i = 0; i < 2; i++) {
t = trees[i];
num_nodes = (tsk_id_t) tsk_treeseq_get_num_nodes(t->tree_sequence);
for (u = 0; u < num_nodes; u++) {
left_child = t->left_child[u];
if (left_child != TSK_NULL && left_child == t->right_child[u]) {
ret = TSK_ERR_UNARY_NODES;
goto out;
}
}
}

n = (tsk_id_t) self->tree_sequence->num_samples;
for (i = 0; i < n; i++) {
if (samples[i] != other_samples[i]) {
Expand Down
3 changes: 3 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ In development

**New features**

- Remove support for ``kc_distance`` on trees with unary nodes
(:user:`daniel-goldstein`, :`508`)

- Improve Kendall-Colijn tree distance algorithm to operate in O(n^2) time
instead of O(n^2 * log(n)) where n is the number of samples
(:user:`daniel-goldstein`, :pr:`490`)
Expand Down
19 changes: 19 additions & 0 deletions python/tests/test_topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ def naive_kc_distance(tree1, tree2, lambda_=0):
for u in samples:
if not tree1.is_leaf(u) or not tree2.is_leaf(u):
raise ValueError("Internal samples not supported")
for tree in [tree1, tree2]:
for u in tree.nodes():
if tree.num_children(u) == 1:
raise ValueError("Unary nodes are not supported")

n = samples.shape[0]
N = (n * (n - 1)) // 2
Expand Down Expand Up @@ -272,6 +276,11 @@ def c_kc_distance(tree1, tree2, lambda_=0):
for u in samples:
if not tree1.is_leaf(u) or not tree2.is_leaf(u):
raise ValueError("Internal samples not supported")
for tree in [tree1, tree2]:
for u in range(tree.tree_sequence.num_nodes):
left_child = tree.left_child(u)
if left_child != tskit.NULL and left_child == tree.right_child(u):
raise ValueError("Unary nodes are not supported")

n = tree1.tree_sequence.num_samples
vecs1 = KCVectors(n)
Expand Down Expand Up @@ -327,6 +336,16 @@ def test_different_samples_error(self):
self.assertRaises(ValueError, c_kc_distance, tree1, tree2)
self.assertRaises(_tskit.LibraryError, tree1.kc_distance, tree2)

unsimplified_ts = msprime.simulate(
10, random_seed=1, recombination_rate=10, record_full_arg=True
)
trees = unsimplified_ts.trees(sample_lists=True)
tree1 = next(trees)
tree2 = next(trees)
self.assertRaises(ValueError, naive_kc_distance, tree1, tree2)
self.assertRaises(ValueError, c_kc_distance, tree1, tree2)
self.assertRaises(_tskit.LibraryError, tree1.kc_distance, tree2)

def validate_trees(self, n):
for seed in range(1, 10):
ts1 = msprime.simulate(n, random_seed=seed)
Expand Down