From 33ccde24b3362858a2c863fbf802c078086b6939 Mon Sep 17 00:00:00 2001 From: Gertjan Bisschop Date: Thu, 2 Jun 2022 22:20:33 +0200 Subject: [PATCH] First step towards adding num_children array --- c/CHANGELOG.rst | 10 ++++++ c/tests/test_trees.c | 65 ++++++++++++++++++++++++++++++++++ c/tskit/trees.c | 21 +++++------ c/tskit/trees.h | 4 +++ python/tests/__init__.py | 2 ++ python/tests/test_utilities.py | 13 +++++++ python/tests/tsutil.py | 8 +++-- 7 files changed, 111 insertions(+), 12 deletions(-) diff --git a/c/CHANGELOG.rst b/c/CHANGELOG.rst index abe2b4e36d..32fd205cef 100644 --- a/c/CHANGELOG.rst +++ b/c/CHANGELOG.rst @@ -1,3 +1,13 @@ +-------------------- +[1.1.0] - 2022-XX-XX +-------------------- + +**Features** + +- Add ``num_children`` to ``tsk_tree_t`` an array which contains counts of the number of child + nodes of each node in the tree. (:user:`GertjanBisschop`, :issue:`2274`, :pr:`2316`) + + -------------------- [1.0.0] - 2022-05-24 -------------------- diff --git a/c/tests/test_trees.c b/c/tests/test_trees.c index 3867ff73ee..de4f782e28 100644 --- a/c/tests/test_trees.c +++ b/c/tests/test_trees.c @@ -70,6 +70,9 @@ check_trees_identical(tsk_tree_t *self, tsk_tree_t *other) tsk_memcmp(self->left_sib, other->left_sib, N * sizeof(tsk_id_t)) == 0); CU_ASSERT_FATAL( tsk_memcmp(self->right_sib, other->right_sib, N * sizeof(tsk_id_t)) == 0); + CU_ASSERT_FATAL( + tsk_memcmp(self->num_children, other->num_children, N * sizeof(tsk_size_t)) + == 0); CU_ASSERT_EQUAL_FATAL(self->num_samples == NULL, other->num_samples == NULL) CU_ASSERT_EQUAL_FATAL( @@ -1149,6 +1152,7 @@ test_simplest_nonbinary_records(void) "0 1 0"; const char *edges = "0 1 4 0,1,2,3\n"; tsk_treeseq_t ts, simplified; + tsk_tree_t t; tsk_id_t sample_ids[] = { 0, 1, 2, 3 }; int ret; @@ -1159,6 +1163,13 @@ test_simplest_nonbinary_records(void) CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0); CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1); + ret = tsk_tree_init(&t, &ts, 0); + CU_ASSERT_EQUAL(ret, 0); + ret = tsk_tree_first(&t); + CU_ASSERT_EQUAL(t.num_children[4], 4); + CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 1); + tsk_tree_free(&t); + ret = tsk_treeseq_simplify(&ts, sample_ids, 4, 0, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0)); @@ -1192,6 +1203,7 @@ test_simplest_unary_records(void) "0 1 3 1\n" "0 1 4 2,3\n"; tsk_treeseq_t ts, simplified, simplified_other; + tsk_tree_t t; tsk_id_t sample_ids[] = { 0, 1 }; tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0); @@ -1202,6 +1214,14 @@ test_simplest_unary_records(void) CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1); CU_ASSERT_EQUAL(tsk_treeseq_get_num_populations(&ts), 1); + ret = tsk_tree_init(&t, &ts, 0); + CU_ASSERT_EQUAL(ret, 0); + ret = tsk_tree_first(&t); + CU_ASSERT_EQUAL(t.num_children[2], 1); + CU_ASSERT_EQUAL(t.num_children[4], 2); + CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 1); + tsk_tree_free(&t); + ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 2); @@ -1408,6 +1428,8 @@ test_simplest_degenerate_multiple_root_records(void) CU_ASSERT_EQUAL(t.num_edges, 2); CU_ASSERT_EQUAL(t.right_sib[2], 3); CU_ASSERT_EQUAL(t.right_sib[3], TSK_NULL); + CU_ASSERT_EQUAL(t.num_children[2], 1); + CU_ASSERT_EQUAL(t.num_children[0], 0); ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -1501,6 +1523,8 @@ test_simplest_zero_root_tree(void) CU_ASSERT_EQUAL(tsk_tree_get_right_root(&t), TSK_NULL); CU_ASSERT_EQUAL(t.right_sib[2], 3); CU_ASSERT_EQUAL(t.right_sib[3], TSK_NULL); + CU_ASSERT_EQUAL(t.num_children[0], 0); + CU_ASSERT_EQUAL(t.num_children[4], 2); tsk_tree_free(&t); tsk_treeseq_free(&ts); @@ -1544,6 +1568,8 @@ test_simplest_multi_root_tree(void) CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 0); CU_ASSERT_EQUAL(t.right_sib[0], 3); CU_ASSERT_EQUAL(t.num_edges, 2); + CU_ASSERT_EQUAL(t.num_children[0], 0); + CU_ASSERT_EQUAL(t.num_children[3], 2); tsk_tree_print_state(&t, _devnull); @@ -1889,12 +1915,14 @@ test_simplest_initial_gap_zero_roots(void) CU_ASSERT_EQUAL(ret, TSK_TREE_OK); CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0); + ret = tsk_tree_next(&tree); CU_ASSERT_EQUAL(ret, TSK_TREE_OK); CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0); CU_ASSERT_EQUAL(tree.parent[0], 2); CU_ASSERT_EQUAL(tree.parent[1], 2); + CU_ASSERT_EQUAL(tree.num_children[2], 2); tsk_tree_free(&tree); tsk_treeseq_free(&ts); @@ -1944,11 +1972,13 @@ test_simplest_holey_tsk_treeseq_zero_roots(void) CU_ASSERT_EQUAL(tree.parent[0], 2); CU_ASSERT_EQUAL(tree.parent[1], 2); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0); + CU_ASSERT_EQUAL(tree.num_children[2], 2); ret = tsk_tree_next(&tree); CU_ASSERT_EQUAL(ret, TSK_TREE_OK); CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0); + CU_ASSERT_EQUAL(tree.num_children[2], 0); ret = tsk_tree_next(&tree); CU_ASSERT_EQUAL(ret, TSK_TREE_OK); @@ -1956,6 +1986,7 @@ test_simplest_holey_tsk_treeseq_zero_roots(void) CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0); CU_ASSERT_EQUAL(tree.parent[0], 2); CU_ASSERT_EQUAL(tree.parent[1], 2); + CU_ASSERT_EQUAL(tree.num_children[2], 2); tsk_tree_free(&tree); tsk_treeseq_free(&ts); @@ -2708,6 +2739,7 @@ test_simplest_overlapping_parents(void) CU_ASSERT_EQUAL(tree.right_sib[0], 1); CU_ASSERT_EQUAL(tree.left_sib[1], 0); CU_ASSERT_EQUAL(tree.right_sib[1], TSK_NULL); + CU_ASSERT_EQUAL(tree.num_children[2], 2); tsk_tree_free(&tree); tsk_treeseq_free(&ts); @@ -3859,6 +3891,8 @@ test_single_tree_iter(void) CU_ASSERT_EQUAL(ret, TSK_TREE_OK); CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), num_nodes); CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1); + CU_ASSERT_EQUAL(tree.num_children[4], 2); + CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 1); tsk_tree_print_state(&tree, _devnull); for (u = 0; u < (tsk_id_t) num_nodes; u++) { @@ -3945,6 +3979,7 @@ test_single_nonbinary_tree_iter(void) CU_ASSERT_EQUAL(tree.left_sib[2], 1); CU_ASSERT_EQUAL(tree.left_sib[1], 0); CU_ASSERT_EQUAL(tree.left_sib[0], TSK_NULL); + CU_ASSERT_EQUAL(tree.num_children[u], 4); u = 8; ret = tsk_tree_get_num_samples(&tree, u, &num_samples); @@ -3953,6 +3988,7 @@ test_single_nonbinary_tree_iter(void) CU_ASSERT_EQUAL(tree.right_child[u], 5); CU_ASSERT_EQUAL(tree.left_sib[5], 4); CU_ASSERT_EQUAL(tree.left_sib[4], TSK_NULL); + CU_ASSERT_EQUAL(tree.num_children[u], 2); u = 9; ret = tsk_tree_get_num_samples(&tree, u, &num_samples); @@ -3962,6 +3998,7 @@ test_single_nonbinary_tree_iter(void) CU_ASSERT_EQUAL(tree.left_sib[8], 7); CU_ASSERT_EQUAL(tree.left_sib[7], 6); CU_ASSERT_EQUAL(tree.left_sib[6], TSK_NULL); + CU_ASSERT_EQUAL(tree.num_children[u], 3); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 1); CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), 9); @@ -5196,6 +5233,31 @@ test_tsk_treeseq_bad_records(void) tsk_table_collection_free(&tables); } +static void +test_num_children_multi_tree(void) +{ + int ret; + tsk_treeseq_t ts; + tsk_tree_t t; + + tsk_treeseq_from_text( + &ts, 10, unary_ex_nodes, unary_ex_edges, NULL, NULL, NULL, NULL, NULL, 0); + + ret = tsk_tree_init(&t, &ts, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_tree_next(&t)); + CU_ASSERT_EQUAL(t.num_children[8], 2); + + CU_ASSERT_TRUE(tsk_tree_next(&t)); + CU_ASSERT_EQUAL(t.num_children[8], 1); + + CU_ASSERT_TRUE(tsk_tree_next(&t)); + CU_ASSERT_EQUAL(t.num_children[8], 0); + + tsk_tree_free(&t); + tsk_treeseq_free(&ts); +} + /*======================================================= * Diff iter tests. *======================================================*/ @@ -5474,6 +5536,8 @@ test_virtual_root_properties(void) CU_ASSERT_FALSE(tsk_tree_is_sample(&t, t.virtual_root)); + CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 1); + tsk_tree_free(&t); tsk_treeseq_free(&ts); } @@ -7747,6 +7811,7 @@ main(int argc, char **argv) test_simplify_keep_input_roots_multi_tree }, { "test_left_to_right_multi_tree", test_left_to_right_multi_tree }, { "test_gappy_multi_tree", test_gappy_multi_tree }, + { "test_num_children_multi_tree", test_num_children_multi_tree }, { "test_tsk_treeseq_bad_records", test_tsk_treeseq_bad_records }, diff --git a/c/tskit/trees.c b/c/tskit/trees.c index 5a77363123..7061655056 100644 --- a/c/tskit/trees.c +++ b/c/tskit/trees.c @@ -3466,8 +3466,10 @@ tsk_tree_init(tsk_tree_t *self, const tsk_treeseq_t *tree_sequence, tsk_flags_t self->right_child = tsk_malloc(N * sizeof(*self->right_child)); self->left_sib = tsk_malloc(N * sizeof(*self->left_sib)); self->right_sib = tsk_malloc(N * sizeof(*self->right_sib)); + self->num_children = tsk_calloc(N, sizeof(*self->num_children)); if (self->parent == NULL || self->left_child == NULL || self->right_child == NULL - || self->left_sib == NULL || self->right_sib == NULL) { + || self->left_sib == NULL || self->right_sib == NULL + || self->num_children == NULL) { goto out; } if (!(self->options & TSK_NO_SAMPLE_COUNTS)) { @@ -3532,6 +3534,7 @@ tsk_tree_free(tsk_tree_t *self) tsk_safe_free(self->left_sample); tsk_safe_free(self->right_sample); tsk_safe_free(self->next_sample); + tsk_safe_free(self->num_children); return 0; } @@ -3680,6 +3683,7 @@ tsk_tree_copy(const tsk_tree_t *self, tsk_tree_t *dest, tsk_flags_t options) tsk_memcpy(dest->right_child, self->right_child, N * sizeof(*self->right_child)); tsk_memcpy(dest->left_sib, self->left_sib, N * sizeof(*self->left_sib)); tsk_memcpy(dest->right_sib, self->right_sib, N * sizeof(*self->right_sib)); + tsk_memcpy(dest->num_children, self->num_children, N * sizeof(*self->num_children)); if (!(dest->options & TSK_NO_SAMPLE_COUNTS)) { if (self->options & TSK_NO_SAMPLE_COUNTS) { ret = TSK_ERR_UNSUPPORTED_OPERATION; @@ -3878,15 +3882,7 @@ tsk_tree_get_right_root(const tsk_tree_t *self) tsk_size_t tsk_tree_get_num_roots(const tsk_tree_t *self) { - const tsk_id_t *restrict right_sib = self->right_sib; - const tsk_id_t *restrict left_child = self->left_child; - tsk_size_t num_roots = 0; - tsk_id_t u; - - for (u = left_child[self->virtual_root]; u != TSK_NULL; u = right_sib[u]) { - num_roots++; - } - return num_roots; + return self->num_children[self->virtual_root]; } int TSK_WARN_UNUSED @@ -4200,6 +4196,7 @@ tsk_tree_remove_branch( tsk_id_t *restrict right_child = self->right_child; tsk_id_t *restrict left_sib = self->left_sib; tsk_id_t *restrict right_sib = self->right_sib; + tsk_size_t *restrict num_children = self->num_children; tsk_id_t lsib = left_sib[c]; tsk_id_t rsib = right_sib[c]; @@ -4216,6 +4213,7 @@ tsk_tree_remove_branch( parent[c] = TSK_NULL; left_sib[c] = TSK_NULL; right_sib[c] = TSK_NULL; + num_children[p]--; } static inline void @@ -4226,6 +4224,7 @@ tsk_tree_insert_branch( tsk_id_t *restrict right_child = self->right_child; tsk_id_t *restrict left_sib = self->left_sib; tsk_id_t *restrict right_sib = self->right_sib; + tsk_size_t *restrict num_children = self->num_children; tsk_id_t u; parent[c] = p; @@ -4240,6 +4239,7 @@ tsk_tree_insert_branch( right_sib[c] = TSK_NULL; } right_child[p] = c; + num_children[p]++; } static inline void @@ -4590,6 +4590,7 @@ tsk_tree_clear(tsk_tree_t *self) tsk_memset(self->right_child, 0xff, N * sizeof(*self->right_child)); tsk_memset(self->left_sib, 0xff, N * sizeof(*self->left_sib)); tsk_memset(self->right_sib, 0xff, N * sizeof(*self->right_sib)); + tsk_memset(self->num_children, 0, N * sizeof(*self->num_children)); if (sample_counts) { tsk_memset(self->num_samples, 0, N * sizeof(*self->num_samples)); diff --git a/c/tskit/trees.h b/c/tskit/trees.h index f15b4f0dd3..7aa68f8896 100644 --- a/c/tskit/trees.h +++ b/c/tskit/trees.h @@ -172,6 +172,10 @@ typedef struct { ``TSK_NULL`` if node u has no siblings to its right. */ tsk_id_t *right_sib; + /** + @brief The number of children of node u is num_children[u]. + */ + tsk_size_t *num_children; /** @brief The total number of edges defining the topology of this tree. This is equal to the number of tree sequence edges that intersect with diff --git a/python/tests/__init__.py b/python/tests/__init__.py index bb4151d978..cb53a7be54 100644 --- a/python/tests/__init__.py +++ b/python/tests/__init__.py @@ -42,6 +42,7 @@ def __init__(self, num_nodes): self.right_child = [tskit.NULL for _ in range(num_nodes)] self.left_sib = [tskit.NULL for _ in range(num_nodes)] self.right_sib = [tskit.NULL for _ in range(num_nodes)] + self.num_children = [0 for _ in range(num_nodes)] self.left = 0 self.right = 0 self.index = -1 @@ -220,6 +221,7 @@ def trees(self): pt.right_child[:] = rtt.right_child pt.left_sib[:] = rtt.left_sib pt.right_sib[:] = rtt.right_sib + pt.num_children[:] = rtt.num_children pt.left_root = rtt.left_child[-1] pt.left = left pt.right = right diff --git a/python/tests/test_utilities.py b/python/tests/test_utilities.py index ef09aa3070..14fbf72e8b 100644 --- a/python/tests/test_utilities.py +++ b/python/tests/test_utilities.py @@ -23,6 +23,7 @@ Tests for the various testing utilities. """ import msprime +import numpy as np import pytest import tests.tsutil as tsutil @@ -181,3 +182,15 @@ def test_sort_individuals(self): tables.individuals.add_row(parents=[0], metadata=b"1") with pytest.raises(ValueError, match="Individual pedigree has cycles"): tsutil.sort_individual_table(tables) + + +class TestQuintuplyLinkedTrees: + def test_branch_operations_num_children(self): + qlt = tsutil.QuintuplyLinkedTree(3) + assert np.sum(qlt.num_children) == 0 + qlt.insert_branch(2, 0) + assert qlt.num_children[2] == 1 + assert np.sum(qlt.num_children) == 1 + + qlt.remove_branch(2, 0) + assert qlt.num_children[2] == 0 diff --git a/python/tests/tsutil.py b/python/tests/tsutil.py index f2929f33f9..eeb5b4d689 100644 --- a/python/tests/tsutil.py +++ b/python/tests/tsutil.py @@ -1228,15 +1228,17 @@ def __init__(self, n, root_threshold=1): self.right_sib = np.zeros(n + 1, dtype=np.int32) - 1 self.num_samples = np.zeros(n + 1, dtype=np.int32) self.num_edges = 0 + self.num_children = np.zeros(n + 1, dtype=np.int32) def __str__(self): - s = "id\tparent\tlchild\trchild\tlsib\trsib\tnsamp\n" + s = "id\tparent\tlchild\trchild\tlsib\trsib\tnsamp\tnchild\n" for j in range(len(self.parent)): s += ( f"{j}\t{self.parent[j]}\t" f"{self.left_child[j]}\t{self.right_child[j]}\t" f"{self.left_sib[j]}\t{self.right_sib[j]}\t" - f"{self.num_samples[j]}\n" + f"{self.num_samples[j]}\t" + f"{self.num_children[j]}\n" ) return s @@ -1262,6 +1264,7 @@ def remove_branch(self, p, c): self.parent[c] = -1 self.left_sib[c] = -1 self.right_sib[c] = -1 + self.num_children[p] -= 1 def insert_branch(self, p, c): assert self.parent[c] == -1, "contradictory edges" @@ -1276,6 +1279,7 @@ def insert_branch(self, p, c): self.left_sib[c] = u self.right_sib[c] = -1 self.right_child[p] = c + self.num_children[p] += 1 def is_potential_root(self, u): return self.num_samples[u] >= self.root_threshold