Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions c/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
- Add ``num_children`` to ``tsk_tree_t`` an array which contains counts of the number of child
nodes of each node in the tree. (:user:`GertjanBisschop`, :issue:`2274`, :pr:`2316`)

- Add ``edge`` to ``tsk_tree_t`` an array which contains the ``edge_id`` of the edge encoding
the relationship between the child node and its parent for each (child) node in the tree
(:user:`GertjanBisschop`, :issue:`2304`, :pr:`2340`)


**Changes**

Expand Down
79 changes: 76 additions & 3 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ check_trees_identical(tsk_tree_t *self, tsk_tree_t *other)
tsk_memcmp(self->right_sib, other->right_sib, N * sizeof(tsk_id_t)) == 0);
CU_ASSERT_FATAL(
tsk_memcmp(self->num_children, other->num_children, N * sizeof(tsk_id_t)) == 0);
CU_ASSERT_FATAL(tsk_memcmp(self->edge, other->edge, N * sizeof(tsk_id_t)) == 0);

CU_ASSERT_EQUAL_FATAL(self->num_samples == NULL, other->num_samples == NULL)
CU_ASSERT_EQUAL_FATAL(
Expand Down Expand Up @@ -477,6 +478,65 @@ verify_tree_diffs(tsk_treeseq_t *ts, tsk_flags_t options)
free(sib);
}

static void
verify_edge_array_single_tree(
tsk_tree_t *tree, tsk_edge_table_t *edge_table, tsk_size_t num_nodes)
{
int ret;
tsk_id_t c, edge_id;
tsk_edge_t edge;
tsk_size_t count_edges = 0;

for (c = 0; c <= (tsk_id_t) num_nodes; c++) {
edge_id = tree->edge[c];
if (edge_id == TSK_NULL) {
/*c is either (virtual) root,
or is not associated with an edge along this tree */
CU_ASSERT_EQUAL(tree->parent[c], TSK_NULL);
} else {
ret = tsk_edge_table_get_row(edge_table, edge_id, &edge);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(edge.id, edge_id);
CU_ASSERT_EQUAL(edge.parent, tree->parent[c]);
CU_ASSERT_EQUAL(edge.child, c);
count_edges++;
}
}

CU_ASSERT_EQUAL(count_edges, tree->num_edges);
}

static void
verify_edge_array_trees(tsk_treeseq_t *ts)
{
int ret;
tsk_tree_t t;
tsk_edge_table_t edge_table;
tsk_size_t num_nodes;
tsk_id_t c;

num_nodes = tsk_treeseq_get_num_nodes(ts);

edge_table = ts->tables->edges;
ret = tsk_tree_init(&t, ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* verify initialized edge array */
for (c = 0; c <= (tsk_id_t) num_nodes; c++) {
CU_ASSERT_EQUAL(t.edge[c], TSK_NULL)
}
/* verify edge array for each tree in treesequence */
for (ret = tsk_tree_first(&t); ret == TSK_TREE_OK; ret = tsk_tree_next(&t)) {
verify_edge_array_single_tree(&t, &edge_table, num_nodes);
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* verify cleared edge array */
for (c = 0; c <= (tsk_id_t) num_nodes; c++) {
CU_ASSERT_EQUAL(t.edge[c], TSK_NULL)
}

tsk_tree_free(&t);
}

/* When we keep all sites in simplify, the genotypes for the subset of the
* samples should be the same as the original */
static void
Expand Down Expand Up @@ -3541,6 +3601,7 @@ test_single_tree_good_records(void)
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 7);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}

Expand Down Expand Up @@ -3568,6 +3629,7 @@ test_single_nonbinary_tree_good_records(void)
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 10);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}

Expand Down Expand Up @@ -3883,6 +3945,7 @@ test_single_tree_iter(void)
tsk_size_t num_nodes = 7;

tsk_treeseq_from_text(&ts, 6, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
verify_edge_array_trees(&ts);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);

Expand Down Expand Up @@ -3948,6 +4011,7 @@ test_single_nonbinary_tree_iter(void)
tsk_size_t total_samples = 7;

tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
verify_edge_array_trees(&ts);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);

Expand Down Expand Up @@ -4044,6 +4108,7 @@ test_single_tree_general_samples_iter(void)
CU_ASSERT_EQUAL(samples[1], 4);
CU_ASSERT_EQUAL(samples[2], 5);
CU_ASSERT_EQUAL(samples[3], 6);
verify_edge_array_trees(&ts);

ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
Expand Down Expand Up @@ -4951,6 +5016,7 @@ test_simple_multi_tree(void)
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_trees(&ts, num_trees, parents);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}

Expand All @@ -4970,6 +5036,7 @@ test_unary_multi_tree(void)
tsk_treeseq_from_text(&ts, 10, unary_ex_nodes, unary_ex_edges, NULL, unary_ex_sites,
unary_ex_mutations, NULL, NULL, 0);
verify_trees(&ts, num_trees, parents);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}

Expand All @@ -4989,6 +5056,7 @@ test_internal_sample_multi_tree(void)
tsk_treeseq_from_text(&ts, 10, internal_sample_ex_nodes, internal_sample_ex_edges,
NULL, internal_sample_ex_sites, internal_sample_ex_mutations, NULL, NULL, 0);
verify_trees(&ts, num_trees, parents);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}

Expand Down Expand Up @@ -5019,6 +5087,7 @@ test_internal_sample_simplified_multi_tree(void)
CU_ASSERT_EQUAL(node_map[5], 2);

verify_trees(&simplified, num_trees, parents);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&ts);
}
Expand All @@ -5040,6 +5109,7 @@ test_nonbinary_multi_tree(void)
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,
nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);
verify_trees(&ts, num_trees, parents);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}

Expand Down Expand Up @@ -5093,6 +5163,7 @@ test_simplify_keep_input_roots_multi_tree(void)
&ts, samples, 2, TSK_SIMPLIFY_KEEP_INPUT_ROOTS, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_trees(&simplified, num_trees, parents);
verify_edge_array_trees(&ts);

tsk_treeseq_free(&ts);
tsk_treeseq_free(&simplified);
Expand Down Expand Up @@ -5138,6 +5209,7 @@ test_left_to_right_multi_tree(void)
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
verify_trees(&ts, num_trees, parents);
verify_tree_next_prev(&ts);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}

Expand Down Expand Up @@ -5183,6 +5255,7 @@ test_gappy_multi_tree(void)
tsk_treeseq_from_text(&ts, 12, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
verify_trees(&ts, num_trees, parents);
verify_tree_next_prev(&ts);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}

Expand Down Expand Up @@ -5233,15 +5306,15 @@ test_tsk_treeseq_bad_records(void)
}

static void
test_num_children_multi_tree(void)
test_convenience_arrays_multi_tree(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t;

tsk_treeseq_from_text(
&ts, 10, unary_ex_nodes, unary_ex_edges, NULL, NULL, NULL, NULL, NULL, 0);

verify_edge_array_trees(&ts);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_tree_next(&t));
Expand Down Expand Up @@ -7823,7 +7896,7 @@ main(int argc, char **argv)
test_simplify_keep_input_roots_multi_tree },
{ "test_left_to_right_multi_tree", test_left_to_right_multi_tree },
{ "test_gappy_multi_tree", test_gappy_multi_tree },
{ "test_num_children_multi_tree", test_num_children_multi_tree },
{ "test_convenience_arrays_multi_tree", test_convenience_arrays_multi_tree },

{ "test_tsk_treeseq_bad_records", test_tsk_treeseq_bad_records },

Expand Down
14 changes: 11 additions & 3 deletions c/tskit/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -3462,9 +3462,10 @@ tsk_tree_init(tsk_tree_t *self, const tsk_treeseq_t *tree_sequence, tsk_flags_t
self->left_sib = tsk_malloc(N * sizeof(*self->left_sib));
self->right_sib = tsk_malloc(N * sizeof(*self->right_sib));
self->num_children = tsk_calloc(N, sizeof(*self->num_children));
self->edge = tsk_malloc(N * sizeof(*self->edge));
if (self->parent == NULL || self->left_child == NULL || self->right_child == NULL
|| self->left_sib == NULL || self->right_sib == NULL
|| self->num_children == NULL) {
|| self->num_children == NULL || self->edge == NULL) {
goto out;
}
if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {
Expand Down Expand Up @@ -3530,6 +3531,7 @@ tsk_tree_free(tsk_tree_t *self)
tsk_safe_free(self->right_sample);
tsk_safe_free(self->next_sample);
tsk_safe_free(self->num_children);
tsk_safe_free(self->edge);
return 0;
}

Expand Down Expand Up @@ -3679,6 +3681,7 @@ tsk_tree_copy(const tsk_tree_t *self, tsk_tree_t *dest, tsk_flags_t options)
tsk_memcpy(dest->left_sib, self->left_sib, N * sizeof(*self->left_sib));
tsk_memcpy(dest->right_sib, self->right_sib, N * sizeof(*self->right_sib));
tsk_memcpy(dest->num_children, self->num_children, N * sizeof(*self->num_children));
tsk_memcpy(dest->edge, self->edge, N * sizeof(*self->edge));
if (!(dest->options & TSK_NO_SAMPLE_COUNTS)) {
if (self->options & TSK_NO_SAMPLE_COUNTS) {
ret = TSK_ERR_UNSUPPORTED_OPERATION;
Expand Down Expand Up @@ -4256,6 +4259,7 @@ tsk_tree_remove_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c)
tsk_id_t *restrict parent = self->parent;
tsk_size_t *restrict num_samples = self->num_samples;
tsk_size_t *restrict num_tracked_samples = self->num_tracked_samples;
tsk_id_t *restrict edge = self->edge;
const tsk_size_t root_threshold = self->root_threshold;
tsk_id_t u;
tsk_id_t path_end = TSK_NULL;
Expand All @@ -4265,6 +4269,7 @@ tsk_tree_remove_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c)

tsk_tree_remove_branch(self, p, c, parent);
self->num_edges--;
edge[c] = TSK_NULL;

if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {
u = p;
Expand All @@ -4290,11 +4295,12 @@ tsk_tree_remove_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c)
}

static void
tsk_tree_insert_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c)
tsk_tree_insert_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c, tsk_id_t edge_id)
{
tsk_id_t *restrict parent = self->parent;
tsk_size_t *restrict num_samples = self->num_samples;
tsk_size_t *restrict num_tracked_samples = self->num_tracked_samples;
tsk_id_t *restrict edge = self->edge;
const tsk_size_t root_threshold = self->root_threshold;
tsk_id_t u;
tsk_id_t path_end = TSK_NULL;
Expand Down Expand Up @@ -4322,6 +4328,7 @@ tsk_tree_insert_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c)

tsk_tree_insert_branch(self, p, c, parent);
self->num_edges++;
edge[c] = edge_id;

if (self->options & TSK_SAMPLE_LISTS) {
tsk_tree_update_sample_lists(self, p, parent);
Expand Down Expand Up @@ -4361,7 +4368,7 @@ tsk_tree_advance(tsk_tree_t *self, int direction, const double *restrict out_bre
while (in >= 0 && in < num_edges && in_breakpoints[in_order[in]] == x) {
k = in_order[in];
in += direction;
tsk_tree_insert_edge(self, edge_parent[k], edge_child[k]);
tsk_tree_insert_edge(self, edge_parent[k], edge_child[k], k);
}

self->direction = direction;
Expand Down Expand Up @@ -4586,6 +4593,7 @@ tsk_tree_clear(tsk_tree_t *self)
tsk_memset(self->left_sib, 0xff, N * sizeof(*self->left_sib));
tsk_memset(self->right_sib, 0xff, N * sizeof(*self->right_sib));
tsk_memset(self->num_children, 0, N * sizeof(*self->num_children));
tsk_memset(self->edge, 0xff, N * sizeof(*self->edge));

if (sample_counts) {
tsk_memset(self->num_samples, 0, N * sizeof(*self->num_samples));
Expand Down
7 changes: 7 additions & 0 deletions c/tskit/trees.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,13 @@ typedef struct {
@brief The number of children of node u is num_children[u].
*/
tsk_id_t *num_children;
/**
@brief Array of edge ids where ``edge[u]`` is the edge that encodes the
relationship between the child node ``u`` and its parent. Equal to
``TSK_NULL`` if node ``u`` is a root, virtual root or is not a node in the
current tree.
*/
tsk_id_t *edge;
/**
@brief The total number of edges defining the topology of this tree.
This is equal to the number of tree sequence edges that intersect with
Expand Down
3 changes: 3 additions & 0 deletions python/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def __init__(self, num_nodes):
self.left_sib = [tskit.NULL for _ in range(num_nodes)]
self.right_sib = [tskit.NULL for _ in range(num_nodes)]
self.num_children = [0 for _ in range(num_nodes)]
self.edge = [tskit.NULL for _ in range(num_nodes)]
self.left = 0
self.right = 0
self.index = -1
Expand All @@ -65,6 +66,7 @@ def from_tree(cls, tree):
ret.left_sib[u] = tree.left_sib(u)
ret.right_sib[u] = tree.right_sib(u)
ret.num_children[u] = tree.num_children(u)
ret.edge[u] = tree.edge(u)
assert ret == tree
return ret

Expand Down Expand Up @@ -188,6 +190,7 @@ def trees(self):
pt.left_sib[:] = rtt.left_sib
pt.right_sib[:] = rtt.right_sib
pt.num_children[:] = rtt.num_children
pt.edge[:] = rtt.edge
pt.left_root = rtt.left_child[-1]
pt.left = left
pt.right = right
Expand Down
13 changes: 13 additions & 0 deletions python/tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,16 @@ def test_branch_operations_num_children(self):

qlt.remove_branch(2, 0)
assert qlt.num_children[2] == 0

def test_edge_operations(self):
tt = tskit.Tree.generate_balanced(3)
tts = tt.tree_sequence

for _, qlt in tsutil.algorithm_R(tts):
assert np.sum(qlt.edge != -1) == tt.num_edges
self.verify_tree_edges(qlt, tts)

def verify_tree_edges(self, quintuply_linked_tree, tts):
for edge in tts.edges():
assert quintuply_linked_tree.edge[edge.child] == edge.id
assert quintuply_linked_tree.parent[edge.child] == edge.parent
8 changes: 6 additions & 2 deletions python/tests/tsutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -1211,16 +1211,18 @@ def __init__(self, n, root_threshold=1):
self.num_samples = np.zeros(n + 1, dtype=np.int32)
self.num_edges = 0
self.num_children = np.zeros(n + 1, dtype=np.int32)
self.edge = np.zeros(n + 1, dtype=np.int32) - 1

def __str__(self):
s = "id\tparent\tlchild\trchild\tlsib\trsib\tnsamp\tnchild\n"
s = "id\tparent\tlchild\trchild\tlsib\trsib\tnsamp\tnchild\tedge\n"
for j in range(len(self.parent)):
s += (
f"{j}\t{self.parent[j]}\t"
f"{self.left_child[j]}\t{self.right_child[j]}\t"
f"{self.left_sib[j]}\t{self.right_sib[j]}\t"
f"{self.num_samples[j]}\t"
f"{self.num_children[j]}\n"
f"{self.num_children[j]}\t"
f"{self.edge[j]}\n"
)
return s

Expand Down Expand Up @@ -1277,6 +1279,7 @@ def remove_root(self, root):
def remove_edge(self, edge):
self.remove_branch(edge.parent, edge.child)
self.num_edges -= 1
self.edge[edge.child] = -1

u = edge.parent
while u != -1:
Expand Down Expand Up @@ -1305,6 +1308,7 @@ def insert_edge(self, edge):

self.insert_branch(edge.parent, edge.child)
self.num_edges += 1
self.edge[edge.child] = edge.id


def algorithm_R(ts, root_threshold=1):
Expand Down