Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 71 additions & 1 deletion c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -4383,6 +4383,44 @@ test_internal_sample_sample_sets(void)
tsk_treeseq_free(&ts);
}

static void
test_non_sample_leaf_sample_lists(void)
{
const char *nodes = "1 0 0\n"
"0 0 0\n"
"1 2 0\n";
const char *edges = "0 1 2 0,1\n";
const tsk_id_t left_sample[3] = { 0, -1, 1 };
const tsk_id_t right_sample[3] = { 0, -1, 0 };
const tsk_id_t next_sample[2] = { -1, 0 };
const tsk_id_t samples[2] = { 0, 2 };
const tsk_id_t sample_index_map[3] = { 0, -1, 1 };
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_id_t i;
int ret;

tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL);

ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, 1);

for (i = 0; i < 3; i++) {
CU_ASSERT_EQUAL_FATAL(left_sample[i], t.left_sample[i]);
CU_ASSERT_EQUAL_FATAL(right_sample[i], t.right_sample[i]);
CU_ASSERT_EQUAL_FATAL(sample_index_map[i], ts.sample_index_map[i]);
}
for (i = 0; i < 2; i++) {
CU_ASSERT_EQUAL_FATAL(next_sample[i], t.next_sample[i]);
CU_ASSERT_EQUAL_FATAL(samples[i], t.samples[i]);
}

tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}

/*=======================================================
* KC Distance tests.
*=======================================================*/
Expand Down Expand Up @@ -4644,7 +4682,37 @@ test_internal_samples_kc(void)
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, 1);
ret = tsk_tree_kc_distance(&t, &t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INTERNAL_SAMPLES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}

static void
test_non_sample_leaf_kc(void)
{
const char *nodes = "1 0 0\n"
"0 0 0\n"
"0 1 0\n";
const char *edges = "0 1 2 0,1\n";
tsk_treeseq_t ts;
tsk_tree_t t;
int ret;
double result = 0;

tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL);

ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0.0);

ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, 1);
ret = tsk_tree_kc_distance(&t, &t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0.0);

tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
Expand Down Expand Up @@ -5647,6 +5715,7 @@ main(int argc, char **argv)
{ "test_simple_sample_sets", test_simple_sample_sets },
{ "test_nonbinary_sample_sets", test_nonbinary_sample_sets },
{ "test_internal_sample_sample_sets", test_internal_sample_sample_sets },
{ "test_non_sample_leaf_sample_lists", test_non_sample_leaf_sample_lists },

/* KC distance tests */
{ "test_single_tree_kc", test_single_tree_kc },
Expand All @@ -5656,6 +5725,7 @@ main(int argc, char **argv)
{ "test_nonbinary_tree_kc", test_nonbinary_tree_kc },
{ "test_nonzero_samples_kc", test_nonzero_samples_kc },
{ "test_internal_samples_kc", test_internal_samples_kc },
{ "test_non_sample_leaf_kc", test_non_sample_leaf_kc },
{ "test_unequal_sample_size_kc", test_unequal_sample_size_kc },
{ "test_unequal_samples_kc", test_unequal_samples_kc },
{ "test_unary_nodes_kc", test_unary_nodes_kc },
Expand Down
3 changes: 0 additions & 3 deletions c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -390,9 +390,6 @@ tsk_strerror_internal(int err)
case TSK_ERR_SAMPLES_NOT_EQUAL:
ret = "Samples must be identical in trees to compare.";
break;
case TSK_ERR_INTERNAL_SAMPLES:
ret = "Internal samples are not supported.";
break;
case TSK_ERR_MULTIPLE_ROOTS:
ret = "Trees with multiple roots not supported.";
break;
Expand Down
9 changes: 4 additions & 5 deletions c/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,11 +241,10 @@ not found in the file.
/* Distance metric errors */
#define TSK_ERR_SAMPLE_SIZE_MISMATCH -1200
#define TSK_ERR_SAMPLES_NOT_EQUAL -1201
#define TSK_ERR_INTERNAL_SAMPLES -1202
#define TSK_ERR_MULTIPLE_ROOTS -1203
#define TSK_ERR_UNARY_NODES -1204
#define TSK_ERR_SEQUENCE_LENGTH_MISMATCH -1205
#define TSK_ERR_NO_SAMPLE_LISTS -1206
#define TSK_ERR_MULTIPLE_ROOTS -1202
#define TSK_ERR_UNARY_NODES -1203
#define TSK_ERR_SEQUENCE_LENGTH_MISMATCH -1204
#define TSK_ERR_NO_SAMPLE_LISTS -1205

/* Haplotype matching errors */
#define TSK_ERR_NULL_VITERBI_MATRIX -1300
Expand Down
87 changes: 35 additions & 52 deletions c/tskit/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -4529,7 +4529,7 @@ kc_vectors_free(kc_vectors *self)
}

static inline void
update_kc_vectors_single_leaf(
update_kc_vectors_single_sample(
tsk_treeseq_t *ts, kc_vectors *kc_vecs, tsk_id_t u, double time)
{
const tsk_id_t *sample_index_map = ts->sample_index_map;
Expand All @@ -4543,25 +4543,19 @@ static inline void
update_kc_vectors_all_pairs(tsk_tree_t *tree, kc_vectors *kc_vecs, tsk_id_t u,
tsk_id_t v, tsk_size_t depth, double time)
{
tsk_id_t leaf1_index, leaf2_index, leaf1, leaf2, n1, n2, tmp, pair_index;
tsk_treeseq_t *ts = tree->tree_sequence;
const tsk_id_t *restrict samples = ts->samples;
tsk_id_t sample1_index, sample2_index, n1, n2, tmp, pair_index;
const tsk_id_t *restrict left_sample = tree->left_sample;
const tsk_id_t *restrict right_sample = tree->right_sample;
const tsk_id_t *restrict next_sample = tree->next_sample;
const tsk_id_t *restrict sample_index_map = ts->sample_index_map;
tsk_size_t *restrict kc_m = kc_vecs->m;
double *restrict kc_M = kc_vecs->M;

leaf1_index = left_sample[u];
while (true) {
leaf1 = samples[leaf1_index];
leaf2_index = left_sample[v];
while (true) {
leaf2 = samples[leaf2_index];

n1 = sample_index_map[leaf1];
n2 = sample_index_map[leaf2];
sample1_index = left_sample[u];
while (sample1_index != TSK_NULL) {
sample2_index = left_sample[v];
while (sample2_index != TSK_NULL) {
n1 = sample1_index;
n2 = sample2_index;
if (n1 > n2) {
tmp = n1;
n1 = n2;
Expand All @@ -4574,15 +4568,15 @@ update_kc_vectors_all_pairs(tsk_tree_t *tree, kc_vectors *kc_vecs, tsk_id_t u,
kc_m[pair_index] = depth;
kc_M[pair_index] = time;

if (leaf2_index == right_sample[v]) {
if (sample2_index == right_sample[v]) {
break;
}
leaf2_index = next_sample[leaf2_index];
sample2_index = next_sample[sample2_index];
}
if (leaf1_index == right_sample[u]) {
if (sample1_index == right_sample[u]) {
break;
}
leaf1_index = next_sample[leaf1_index];
sample1_index = next_sample[sample1_index];
}
}

Expand Down Expand Up @@ -4620,22 +4614,21 @@ fill_kc_vectors(tsk_tree_t *t, kc_vectors *kc_vecs)
depth = stack[stack_top].depth;
stack_top--;

if (t->left_child[u] == TSK_NULL) {
if (u == root) {
time = 0;
} else {
time = times[t->parent[u]] - times[u];
}
update_kc_vectors_single_leaf(ts, kc_vecs, u, time);
} else {
if (tsk_tree_is_sample(t, u)) {
time = tsk_tree_branch_length(t, u);
update_kc_vectors_single_sample(ts, kc_vecs, u, time);
}

/* Don't bother going deeper if there are no samples under this node */
if (t->left_sample[u] != TSK_NULL) {
for (c1 = t->left_child[u]; c1 != TSK_NULL; c1 = t->right_sib[c1]) {
stack_top++;
stack[stack_top].node = c1;
stack[stack_top].depth = depth + 1;

for (c2 = t->right_sib[c1]; c2 != TSK_NULL; c2 = t->right_sib[c2]) {
update_kc_vectors_all_pairs(
t, kc_vecs, c1, c2, depth, times[root] - times[u]);
time = times[root] - times[u];
update_kc_vectors_all_pairs(t, kc_vecs, c1, c2, depth, time);
}
}
}
Expand Down Expand Up @@ -4718,7 +4711,7 @@ check_kc_distance_samples_inputs(tsk_treeseq_t *self, tsk_treeseq_t *other)
int
tsk_tree_kc_distance(tsk_tree_t *self, tsk_tree_t *other, double lambda, double *result)
{
tsk_id_t n, i, j, u, num_samples;
tsk_id_t n, i;
kc_vectors vecs[2];
tsk_tree_t *trees[2] = { self, other };
int ret = 0;
Expand All @@ -4732,17 +4725,6 @@ tsk_tree_kc_distance(tsk_tree_t *self, tsk_tree_t *other, double lambda, double
goto out;
}
for (i = 0; i < 2; i++) {
num_samples = (tsk_id_t) trees[i]->tree_sequence->num_samples;
for (j = 0; j < num_samples; j++) {
u = trees[i]->tree_sequence->samples[j];
if (trees[i]->left_child[u] != TSK_NULL) {
/* It's probably possible to support this, but it's too awkward
* to deal with and seems like a fairly niche requirement. */
ret = TSK_ERR_INTERNAL_SAMPLES;
goto out;
}
}

ret = check_kc_distance_tree_inputs(trees[i]);
if (ret != 0) {
goto out;
Expand Down Expand Up @@ -4789,22 +4771,24 @@ check_kc_distance_tree_sequence_inputs(tsk_treeseq_t *self, tsk_treeseq_t *other
}

static void
update_kc_pairs_with_leaf(tsk_tree_t *self, kc_vectors *kc, tsk_id_t leaf,
update_kc_pair_with_sample(tsk_tree_t *self, kc_vectors *kc, tsk_id_t sample,
tsk_size_t *depths, double root_time)
{
tsk_id_t c, p, sib;
double time;
tsk_size_t depth;
double *times = self->tree_sequence->tables->nodes.time;

for (c = leaf, p = self->parent[leaf]; p != TSK_NULL; c = p, p = self->parent[p]) {
c = sample;
for (p = self->parent[sample]; p != TSK_NULL; p = self->parent[p]) {
time = root_time - times[p];
depth = depths[p];
for (sib = self->left_child[p]; sib != TSK_NULL; sib = self->right_sib[sib]) {
if (sib != c) {
update_kc_vectors_all_pairs(self, kc, leaf, sib, depth, time);
update_kc_vectors_all_pairs(self, kc, sample, sib, depth, time);
}
}
c = p;
}
}

Expand All @@ -4830,14 +4814,13 @@ update_kc_subtree_state(
stack_top--;

if (tsk_tree_is_sample(t, v)) {
update_kc_pairs_with_leaf(t, kc, v, depths, root_time);
} else {
for (c = t->left_child[v]; c != TSK_NULL; c = t->right_sib[c]) {
if (depths[c] != 0) {
depths[c] = depths[v] + 1;
stack_top++;
stack[stack_top] = c;
}
update_kc_pair_with_sample(t, kc, v, depths, root_time);
}
for (c = t->left_child[v]; c != TSK_NULL; c = t->right_sib[c]) {
if (depths[c] != 0) {
depths[c] = depths[v] + 1;
stack_top++;
stack[stack_top] = c;
}
}
}
Expand Down Expand Up @@ -4889,7 +4872,7 @@ update_kc_incremental(tsk_tree_t *self, kc_vectors *kc, tsk_edge_list_t *edges_o

if (tsk_tree_is_sample(self, u)) {
time = tsk_tree_branch_length(self, u);
update_kc_vectors_single_leaf(self->tree_sequence, kc, u, time);
update_kc_vectors_single_sample(self->tree_sequence, kc, u, time);
}
}

Expand Down
3 changes: 3 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ In development

**New features**

- Add support for trees with internal samples for the Kendall-Colijn tree distance
metric. (:user:`daniel-goldstein`, :pr:`610`)

- Add background shading to SVG tree sequences to reflect tree position along the
sequence (:user:`hyanwong`, :pr:`563`)

Expand Down
5 changes: 2 additions & 3 deletions python/tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1976,8 +1976,7 @@ def test_kc_distance_errors(self):
t2 = _tskit.Tree(ts1, options=_tskit.SAMPLE_LISTS)
# If we don't seek to a specific tree, it has multiple roots (i.e., it's
# in the null state). This fails because we don't accept multiple roots.
with self.assertRaises(_tskit.LibraryError):
t1.get_kc_distance(t2, 0)
self.verify_kc_library_error(t2, t2)

# Different numbers of samples fail.
ts2 = self.get_example_tree_sequence(11)
Expand All @@ -1991,7 +1990,7 @@ def test_kc_distance_errors(self):
t2.first()
self.verify_kc_library_error(t1, t2)

# Internal samples cause errors.
# Unary nodes cause errors.
tables = _tskit.TableCollection(1.0)
tables.nodes.add_row(flags=1)
tables.nodes.add_row(flags=1, time=1)
Expand Down
Loading