diff --git a/c/CHANGELOG.rst b/c/CHANGELOG.rst index b2d48a1c03..59acd61935 100644 --- a/c/CHANGELOG.rst +++ b/c/CHANGELOG.rst @@ -7,6 +7,10 @@ - Add ``num_children`` to ``tsk_tree_t`` an array which contains counts of the number of child nodes of each node in the tree. (:user:`GertjanBisschop`, :issue:`2274`, :pr:`2316`) +- Add ``edge`` to ``tsk_tree_t`` an array which contains the ``edge_id`` of the edge encoding + the relationship between the child node and its parent for each (child) node in the tree + (:user:`GertjanBisschop`, :issue:`2304`, :pr:`2340`) + **Changes** diff --git a/c/tests/test_trees.c b/c/tests/test_trees.c index e2bf440194..8e14d57e7c 100644 --- a/c/tests/test_trees.c +++ b/c/tests/test_trees.c @@ -72,6 +72,7 @@ check_trees_identical(tsk_tree_t *self, tsk_tree_t *other) tsk_memcmp(self->right_sib, other->right_sib, N * sizeof(tsk_id_t)) == 0); CU_ASSERT_FATAL( tsk_memcmp(self->num_children, other->num_children, N * sizeof(tsk_id_t)) == 0); + CU_ASSERT_FATAL(tsk_memcmp(self->edge, other->edge, N * sizeof(tsk_id_t)) == 0); CU_ASSERT_EQUAL_FATAL(self->num_samples == NULL, other->num_samples == NULL) CU_ASSERT_EQUAL_FATAL( @@ -477,6 +478,65 @@ verify_tree_diffs(tsk_treeseq_t *ts, tsk_flags_t options) free(sib); } +static void +verify_edge_array_single_tree( + tsk_tree_t *tree, tsk_edge_table_t *edge_table, tsk_size_t num_nodes) +{ + int ret; + tsk_id_t c, edge_id; + tsk_edge_t edge; + tsk_size_t count_edges = 0; + + for (c = 0; c <= (tsk_id_t) num_nodes; c++) { + edge_id = tree->edge[c]; + if (edge_id == TSK_NULL) { + /*c is either (virtual) root, + or is not associated with an edge along this tree */ + CU_ASSERT_EQUAL(tree->parent[c], TSK_NULL); + } else { + ret = tsk_edge_table_get_row(edge_table, edge_id, &edge); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL(edge.id, edge_id); + CU_ASSERT_EQUAL(edge.parent, tree->parent[c]); + CU_ASSERT_EQUAL(edge.child, c); + count_edges++; + } + } + + CU_ASSERT_EQUAL(count_edges, tree->num_edges); +} + +static void +verify_edge_array_trees(tsk_treeseq_t *ts) +{ + int ret; + tsk_tree_t t; + tsk_edge_table_t edge_table; + tsk_size_t num_nodes; + tsk_id_t c; + + num_nodes = tsk_treeseq_get_num_nodes(ts); + + edge_table = ts->tables->edges; + ret = tsk_tree_init(&t, ts, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + /* verify initialized edge array */ + for (c = 0; c <= (tsk_id_t) num_nodes; c++) { + CU_ASSERT_EQUAL(t.edge[c], TSK_NULL) + } + /* verify edge array for each tree in treesequence */ + for (ret = tsk_tree_first(&t); ret == TSK_TREE_OK; ret = tsk_tree_next(&t)) { + verify_edge_array_single_tree(&t, &edge_table, num_nodes); + } + CU_ASSERT_EQUAL_FATAL(ret, 0); + /* verify cleared edge array */ + for (c = 0; c <= (tsk_id_t) num_nodes; c++) { + CU_ASSERT_EQUAL(t.edge[c], TSK_NULL) + } + + tsk_tree_free(&t); +} + /* When we keep all sites in simplify, the genotypes for the subset of the * samples should be the same as the original */ static void @@ -3541,6 +3601,7 @@ test_single_tree_good_records(void) CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 7); CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0); CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1); + verify_edge_array_trees(&ts); tsk_treeseq_free(&ts); } @@ -3568,6 +3629,7 @@ test_single_nonbinary_tree_good_records(void) CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 10); CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0); CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1); + verify_edge_array_trees(&ts); tsk_treeseq_free(&ts); } @@ -3883,6 +3945,7 @@ test_single_tree_iter(void) tsk_size_t num_nodes = 7; tsk_treeseq_from_text(&ts, 6, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0); + verify_edge_array_trees(&ts); ret = tsk_tree_init(&tree, &ts, 0); CU_ASSERT_EQUAL(ret, 0); @@ -3948,6 +4011,7 @@ test_single_nonbinary_tree_iter(void) tsk_size_t total_samples = 7; tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0); + verify_edge_array_trees(&ts); ret = tsk_tree_init(&tree, &ts, 0); CU_ASSERT_EQUAL(ret, 0); @@ -4044,6 +4108,7 @@ test_single_tree_general_samples_iter(void) CU_ASSERT_EQUAL(samples[1], 4); CU_ASSERT_EQUAL(samples[2], 5); CU_ASSERT_EQUAL(samples[3], 6); + verify_edge_array_trees(&ts); ret = tsk_tree_init(&tree, &ts, 0); CU_ASSERT_EQUAL(ret, 0); @@ -4951,6 +5016,7 @@ test_simple_multi_tree(void) tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites, paper_ex_mutations, paper_ex_individuals, NULL, 0); verify_trees(&ts, num_trees, parents); + verify_edge_array_trees(&ts); tsk_treeseq_free(&ts); } @@ -4970,6 +5036,7 @@ test_unary_multi_tree(void) tsk_treeseq_from_text(&ts, 10, unary_ex_nodes, unary_ex_edges, NULL, unary_ex_sites, unary_ex_mutations, NULL, NULL, 0); verify_trees(&ts, num_trees, parents); + verify_edge_array_trees(&ts); tsk_treeseq_free(&ts); } @@ -4989,6 +5056,7 @@ test_internal_sample_multi_tree(void) tsk_treeseq_from_text(&ts, 10, internal_sample_ex_nodes, internal_sample_ex_edges, NULL, internal_sample_ex_sites, internal_sample_ex_mutations, NULL, NULL, 0); verify_trees(&ts, num_trees, parents); + verify_edge_array_trees(&ts); tsk_treeseq_free(&ts); } @@ -5019,6 +5087,7 @@ test_internal_sample_simplified_multi_tree(void) CU_ASSERT_EQUAL(node_map[5], 2); verify_trees(&simplified, num_trees, parents); + verify_edge_array_trees(&ts); tsk_treeseq_free(&simplified); tsk_treeseq_free(&ts); } @@ -5040,6 +5109,7 @@ test_nonbinary_multi_tree(void) tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL, nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0); verify_trees(&ts, num_trees, parents); + verify_edge_array_trees(&ts); tsk_treeseq_free(&ts); } @@ -5093,6 +5163,7 @@ test_simplify_keep_input_roots_multi_tree(void) &ts, samples, 2, TSK_SIMPLIFY_KEEP_INPUT_ROOTS, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); verify_trees(&simplified, num_trees, parents); + verify_edge_array_trees(&ts); tsk_treeseq_free(&ts); tsk_treeseq_free(&simplified); @@ -5138,6 +5209,7 @@ test_left_to_right_multi_tree(void) tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0); verify_trees(&ts, num_trees, parents); verify_tree_next_prev(&ts); + verify_edge_array_trees(&ts); tsk_treeseq_free(&ts); } @@ -5183,6 +5255,7 @@ test_gappy_multi_tree(void) tsk_treeseq_from_text(&ts, 12, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0); verify_trees(&ts, num_trees, parents); verify_tree_next_prev(&ts); + verify_edge_array_trees(&ts); tsk_treeseq_free(&ts); } @@ -5233,7 +5306,7 @@ test_tsk_treeseq_bad_records(void) } static void -test_num_children_multi_tree(void) +test_convenience_arrays_multi_tree(void) { int ret; tsk_treeseq_t ts; @@ -5241,7 +5314,7 @@ test_num_children_multi_tree(void) tsk_treeseq_from_text( &ts, 10, unary_ex_nodes, unary_ex_edges, NULL, NULL, NULL, NULL, NULL, 0); - + verify_edge_array_trees(&ts); ret = tsk_tree_init(&t, &ts, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_tree_next(&t)); @@ -7823,7 +7896,7 @@ main(int argc, char **argv) test_simplify_keep_input_roots_multi_tree }, { "test_left_to_right_multi_tree", test_left_to_right_multi_tree }, { "test_gappy_multi_tree", test_gappy_multi_tree }, - { "test_num_children_multi_tree", test_num_children_multi_tree }, + { "test_convenience_arrays_multi_tree", test_convenience_arrays_multi_tree }, { "test_tsk_treeseq_bad_records", test_tsk_treeseq_bad_records }, diff --git a/c/tskit/trees.c b/c/tskit/trees.c index 68f6ed262d..b921254ff8 100644 --- a/c/tskit/trees.c +++ b/c/tskit/trees.c @@ -3462,9 +3462,10 @@ tsk_tree_init(tsk_tree_t *self, const tsk_treeseq_t *tree_sequence, tsk_flags_t self->left_sib = tsk_malloc(N * sizeof(*self->left_sib)); self->right_sib = tsk_malloc(N * sizeof(*self->right_sib)); self->num_children = tsk_calloc(N, sizeof(*self->num_children)); + self->edge = tsk_malloc(N * sizeof(*self->edge)); if (self->parent == NULL || self->left_child == NULL || self->right_child == NULL || self->left_sib == NULL || self->right_sib == NULL - || self->num_children == NULL) { + || self->num_children == NULL || self->edge == NULL) { goto out; } if (!(self->options & TSK_NO_SAMPLE_COUNTS)) { @@ -3530,6 +3531,7 @@ tsk_tree_free(tsk_tree_t *self) tsk_safe_free(self->right_sample); tsk_safe_free(self->next_sample); tsk_safe_free(self->num_children); + tsk_safe_free(self->edge); return 0; } @@ -3679,6 +3681,7 @@ tsk_tree_copy(const tsk_tree_t *self, tsk_tree_t *dest, tsk_flags_t options) tsk_memcpy(dest->left_sib, self->left_sib, N * sizeof(*self->left_sib)); tsk_memcpy(dest->right_sib, self->right_sib, N * sizeof(*self->right_sib)); tsk_memcpy(dest->num_children, self->num_children, N * sizeof(*self->num_children)); + tsk_memcpy(dest->edge, self->edge, N * sizeof(*self->edge)); if (!(dest->options & TSK_NO_SAMPLE_COUNTS)) { if (self->options & TSK_NO_SAMPLE_COUNTS) { ret = TSK_ERR_UNSUPPORTED_OPERATION; @@ -4256,6 +4259,7 @@ tsk_tree_remove_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c) tsk_id_t *restrict parent = self->parent; tsk_size_t *restrict num_samples = self->num_samples; tsk_size_t *restrict num_tracked_samples = self->num_tracked_samples; + tsk_id_t *restrict edge = self->edge; const tsk_size_t root_threshold = self->root_threshold; tsk_id_t u; tsk_id_t path_end = TSK_NULL; @@ -4265,6 +4269,7 @@ tsk_tree_remove_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c) tsk_tree_remove_branch(self, p, c, parent); self->num_edges--; + edge[c] = TSK_NULL; if (!(self->options & TSK_NO_SAMPLE_COUNTS)) { u = p; @@ -4290,11 +4295,12 @@ tsk_tree_remove_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c) } static void -tsk_tree_insert_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c) +tsk_tree_insert_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c, tsk_id_t edge_id) { tsk_id_t *restrict parent = self->parent; tsk_size_t *restrict num_samples = self->num_samples; tsk_size_t *restrict num_tracked_samples = self->num_tracked_samples; + tsk_id_t *restrict edge = self->edge; const tsk_size_t root_threshold = self->root_threshold; tsk_id_t u; tsk_id_t path_end = TSK_NULL; @@ -4322,6 +4328,7 @@ tsk_tree_insert_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c) tsk_tree_insert_branch(self, p, c, parent); self->num_edges++; + edge[c] = edge_id; if (self->options & TSK_SAMPLE_LISTS) { tsk_tree_update_sample_lists(self, p, parent); @@ -4361,7 +4368,7 @@ tsk_tree_advance(tsk_tree_t *self, int direction, const double *restrict out_bre while (in >= 0 && in < num_edges && in_breakpoints[in_order[in]] == x) { k = in_order[in]; in += direction; - tsk_tree_insert_edge(self, edge_parent[k], edge_child[k]); + tsk_tree_insert_edge(self, edge_parent[k], edge_child[k], k); } self->direction = direction; @@ -4586,6 +4593,7 @@ tsk_tree_clear(tsk_tree_t *self) tsk_memset(self->left_sib, 0xff, N * sizeof(*self->left_sib)); tsk_memset(self->right_sib, 0xff, N * sizeof(*self->right_sib)); tsk_memset(self->num_children, 0, N * sizeof(*self->num_children)); + tsk_memset(self->edge, 0xff, N * sizeof(*self->edge)); if (sample_counts) { tsk_memset(self->num_samples, 0, N * sizeof(*self->num_samples)); diff --git a/c/tskit/trees.h b/c/tskit/trees.h index e0e1c8d714..6cd03dc986 100644 --- a/c/tskit/trees.h +++ b/c/tskit/trees.h @@ -176,6 +176,13 @@ typedef struct { @brief The number of children of node u is num_children[u]. */ tsk_id_t *num_children; + /** + @brief Array of edge ids where ``edge[u]`` is the edge that encodes the + relationship between the child node ``u`` and its parent. Equal to + ``TSK_NULL`` if node ``u`` is a root, virtual root or is not a node in the + current tree. + */ + tsk_id_t *edge; /** @brief The total number of edges defining the topology of this tree. This is equal to the number of tree sequence edges that intersect with diff --git a/python/tests/__init__.py b/python/tests/__init__.py index cb67f45345..1f064b6a8d 100644 --- a/python/tests/__init__.py +++ b/python/tests/__init__.py @@ -43,6 +43,7 @@ def __init__(self, num_nodes): self.left_sib = [tskit.NULL for _ in range(num_nodes)] self.right_sib = [tskit.NULL for _ in range(num_nodes)] self.num_children = [0 for _ in range(num_nodes)] + self.edge = [tskit.NULL for _ in range(num_nodes)] self.left = 0 self.right = 0 self.index = -1 @@ -65,6 +66,7 @@ def from_tree(cls, tree): ret.left_sib[u] = tree.left_sib(u) ret.right_sib[u] = tree.right_sib(u) ret.num_children[u] = tree.num_children(u) + ret.edge[u] = tree.edge(u) assert ret == tree return ret @@ -188,6 +190,7 @@ def trees(self): pt.left_sib[:] = rtt.left_sib pt.right_sib[:] = rtt.right_sib pt.num_children[:] = rtt.num_children + pt.edge[:] = rtt.edge pt.left_root = rtt.left_child[-1] pt.left = left pt.right = right diff --git a/python/tests/test_utilities.py b/python/tests/test_utilities.py index 710fe24933..ea107db22f 100644 --- a/python/tests/test_utilities.py +++ b/python/tests/test_utilities.py @@ -194,3 +194,16 @@ def test_branch_operations_num_children(self): qlt.remove_branch(2, 0) assert qlt.num_children[2] == 0 + + def test_edge_operations(self): + tt = tskit.Tree.generate_balanced(3) + tts = tt.tree_sequence + + for _, qlt in tsutil.algorithm_R(tts): + assert np.sum(qlt.edge != -1) == tt.num_edges + self.verify_tree_edges(qlt, tts) + + def verify_tree_edges(self, quintuply_linked_tree, tts): + for edge in tts.edges(): + assert quintuply_linked_tree.edge[edge.child] == edge.id + assert quintuply_linked_tree.parent[edge.child] == edge.parent diff --git a/python/tests/tsutil.py b/python/tests/tsutil.py index 00dec81fba..77a6863c13 100644 --- a/python/tests/tsutil.py +++ b/python/tests/tsutil.py @@ -1211,16 +1211,18 @@ def __init__(self, n, root_threshold=1): self.num_samples = np.zeros(n + 1, dtype=np.int32) self.num_edges = 0 self.num_children = np.zeros(n + 1, dtype=np.int32) + self.edge = np.zeros(n + 1, dtype=np.int32) - 1 def __str__(self): - s = "id\tparent\tlchild\trchild\tlsib\trsib\tnsamp\tnchild\n" + s = "id\tparent\tlchild\trchild\tlsib\trsib\tnsamp\tnchild\tedge\n" for j in range(len(self.parent)): s += ( f"{j}\t{self.parent[j]}\t" f"{self.left_child[j]}\t{self.right_child[j]}\t" f"{self.left_sib[j]}\t{self.right_sib[j]}\t" f"{self.num_samples[j]}\t" - f"{self.num_children[j]}\n" + f"{self.num_children[j]}\t" + f"{self.edge[j]}\n" ) return s @@ -1277,6 +1279,7 @@ def remove_root(self, root): def remove_edge(self, edge): self.remove_branch(edge.parent, edge.child) self.num_edges -= 1 + self.edge[edge.child] = -1 u = edge.parent while u != -1: @@ -1305,6 +1308,7 @@ def insert_edge(self, edge): self.insert_branch(edge.parent, edge.child) self.num_edges += 1 + self.edge[edge.child] = edge.id def algorithm_R(ts, root_threshold=1):