diff --git a/c/CHANGELOG.rst b/c/CHANGELOG.rst index 91a21e035d..fb07e77b09 100644 --- a/c/CHANGELOG.rst +++ b/c/CHANGELOG.rst @@ -13,13 +13,20 @@ (:user:`benjeffery`, :issue:`1723`, :pr:`1727`) - The previously deprecated option ``TSK_SAMPLE_COUNTS`` has been removed. (:user:`benjeffery`, :issue:`1744`, :pr:`1761`). - -- FIXME breaking changes for tree API and virtual root - -- Individuals are no longer guaranteed or required to be topologically sorted in a tree sequence. +- Individuals are no longer guaranteed or required to be topologically sorted in a tree sequence. ``tsk_table_collection_sort`` no longer sorts individuals. (:user:`benjeffery`, :issue:`1774`, :pr:`1789`) +- FIXME breaking changes for tree API and virtual root. + +- The ``tsk_tree_t.left_root`` member has been removed. Client code can be updated + most easily by using the equivalent ``tsk_tree_get_left_root`` function. However, + it may be worth considering updating code to use either the standard traversal + functions (which automatically iterate over roots) or to use the ``virtual_root`` + member (which may lead to more concise code). (:user:`jeromekelleher`, :issue:`1796`, + :pr:`1862`) + + **Features** - Add ``tsk_table_collection_individual_topological_sort`` to sort the individuals as this is no longer done by the diff --git a/c/dev-tools/dev-cli.c b/c/dev-tools/dev-cli.c index 2cc99f29cf..b9d2ced3b9 100644 --- a/c/dev-tools/dev-cli.c +++ b/c/dev-tools/dev-cli.c @@ -122,12 +122,8 @@ print_ld_matrix(tsk_treeseq_t *ts) if (ret != 0) { fatal_library_error(ret, "get_site"); } - printf("%d\t%f\t%d\t%f\t%.3f\n", - (int) sA.id, - sA.position, - (int) sB.id, - sB.position, - r2[k]); + printf("%d\t%f\t%d\t%f\t%.3f\n", (int) sA.id, sA.position, (int) sB.id, + sB.position, r2[k]); } } free(r2); @@ -156,8 +152,8 @@ print_newick_trees(tsk_treeseq_t *ts) fatal_error("ERROR: %d: %s\n", ret, tsk_strerror(ret)); } for (ret = tsk_tree_first(&tree); ret == 1; ret = tsk_tree_next(&tree)) { - ret = tsk_convert_newick( - &tree, tree.left_root, precision, 0, newick_buffer_size, newick); + ret = tsk_convert_newick(&tree, tsk_tree_get_left_root(&tree), precision, 0, + newick_buffer_size, newick); if (ret != 0) { fatal_library_error(ret, "newick"); } @@ -187,9 +183,7 @@ print_tree_sequence(tsk_treeseq_t *ts, int verbose) } for (ret = tsk_tree_first(&tree); ret == 1; ret = tsk_tree_next(&tree)) { printf("-------------------------\n"); - printf("New tree: %d: %f (%d)\n", - (int) tree.index, - tree.right - tree.left, + printf("New tree: %d: %f (%d)\n", (int) tree.index, tree.right - tree.left, (int) tree.num_nodes); printf("-------------------------\n"); tsk_tree_print_state(&tree, stdout); @@ -242,11 +236,8 @@ run_newick(const char *filename, int TSK_UNUSED(verbose)) } static void -run_simplify(const char *input_filename, - const char *output_filename, - size_t num_samples, - bool filter_sites, - int verbose) +run_simplify(const char *input_filename, const char *output_filename, size_t num_samples, + bool filter_sites, int verbose) { tsk_treeseq_t ts, subset; const tsk_id_t *samples; @@ -290,9 +281,7 @@ main(int argc, char **argv) /* SYNTAX 1: simplify [-vi] [-s] */ struct arg_rex *cmd1 = arg_rex1(NULL, NULL, "simplify", NULL, REG_ICASE, NULL); struct arg_lit *verbose1 = arg_lit0("v", "verbose", NULL); - struct arg_int *num_samples1 = arg_int0("s", - "sample-size", - "", + struct arg_int *num_samples1 = arg_int0("s", "sample-size", "", "Number of samples to keep in the simplified tree sequence."); struct arg_lit *filter_sites1 = arg_lit0("i", "filter-invariant-sites", ""); @@ -348,10 +337,8 @@ main(int argc, char **argv) nerrors5 = arg_parse(argc, argv, argtable5); if (nerrors1 == 0) { - run_simplify(infiles1->filename[0], - outfiles1->filename[0], - (size_t) num_samples1->ival[0], - (bool) filter_sites1->count, + run_simplify(infiles1->filename[0], outfiles1->filename[0], + (size_t) num_samples1->ival[0], (bool) filter_sites1->count, verbose1->count); } else if (nerrors2 == 0) { run_ld(infiles2->filename[0], verbose2->count); diff --git a/c/examples/tree_traversal.c b/c/examples/tree_traversal.c index 4b9a614393..d8749d7e1c 100644 --- a/c/examples/tree_traversal.c +++ b/c/examples/tree_traversal.c @@ -9,6 +9,34 @@ errx(EXIT_FAILURE, "line %d: %s", __LINE__, tsk_strerror(val)); \ } + +static void +traverse_standard(tsk_tree_t *tree) +{ + int ret; + tsk_size_t num_nodes, j; + tsk_id_t *nodes = malloc(tsk_tree_get_size_bound(tree) * sizeof(*nodes)); + + if (nodes == NULL) { + errx(EXIT_FAILURE, "Out of memory"); + } + + ret = tsk_tree_preorder(tree, -1, nodes, &num_nodes); + check_tsk_error(ret); + for (j = 0; j < num_nodes; j++) { + printf("Visit preorder %lld\n", (long long) nodes[j]); + } + + ret = tsk_tree_postorder(tree, -1, nodes, &num_nodes); + check_tsk_error(ret); + for (j = 0; j < num_nodes; j++) { + printf("Visit postorder %lld\n", (long long) nodes[j]); + } + + free(nodes); +} + + static void _traverse(tsk_tree_t *tree, tsk_id_t u, int depth) { @@ -27,36 +55,29 @@ _traverse(tsk_tree_t *tree, tsk_id_t u, int depth) static void traverse_recursive(tsk_tree_t *tree) { - tsk_id_t root; - - for (root = tree->left_root; root != TSK_NULL; root = tree->right_sib[root]) { - _traverse(tree, root, 0); - } + _traverse(tree, tree->virtual_root, -1); } static void traverse_stack(tsk_tree_t *tree) { int stack_top; - tsk_id_t u, v, root; - tsk_id_t *stack - = malloc(tsk_treeseq_get_num_nodes(tree->tree_sequence) * sizeof(*stack)); + tsk_id_t u, v; + tsk_id_t *stack = malloc(tsk_tree_get_size_bound(tree) * sizeof(*stack)); if (stack == NULL) { errx(EXIT_FAILURE, "Out of memory"); } - for (root = tree->left_root; root != TSK_NULL; root = tree->right_sib[root]) { - stack_top = 0; - stack[stack_top] = root; - while (stack_top >= 0) { - u = stack[stack_top]; - stack_top--; - printf("Visit stack %lld\n", (long long) u); - /* Put nodes on the stack right-to-left, so we visit in left-to-right */ - for (v = tree->right_child[u]; v != TSK_NULL; v = tree->left_sib[v]) { - stack_top++; - stack[stack_top] = v; - } + stack_top = 0; + stack[stack_top] = tree->virtual_root; + while (stack_top >= 0) { + u = stack[stack_top]; + stack_top--; + printf("Visit stack %lld\n", (long long) u); + /* Put nodes on the stack right-to-left, so we visit in left-to-right */ + for (v = tree->right_child[u]; v != TSK_NULL; v = tree->left_sib[v]) { + stack_top++; + stack[stack_top] = v; } } free(stack); @@ -96,6 +117,8 @@ main(int argc, char **argv) ret = tsk_tree_first(&tree); check_tsk_error(ret); + traverse_standard(&tree); + traverse_recursive(&tree); traverse_stack(&tree); diff --git a/c/tests/test_stats.c b/c/tests/test_stats.c index f754c96639..d68d56ff0d 100644 --- a/c/tests/test_stats.c +++ b/c/tests/test_stats.c @@ -635,15 +635,15 @@ verify_branch_general_stat_identity(tsk_treeseq_t *ts) int ret; tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts); double *W = tsk_malloc(num_samples * sizeof(double)); - tsk_id_t *stack = tsk_malloc(tsk_treeseq_get_num_nodes(ts) * sizeof(*stack)); - tsk_id_t root, u, v; - int stack_top; + tsk_id_t *nodes = tsk_malloc(tsk_treeseq_get_num_nodes(ts) * sizeof(*nodes)); + tsk_id_t u; + tsk_size_t num_nodes; double s, branch_length; double *sigma = tsk_malloc(tsk_treeseq_get_num_trees(ts) * sizeof(*sigma)); tsk_tree_t tree; tsk_size_t j; CU_ASSERT_FATAL(W != NULL); - CU_ASSERT_FATAL(stack != NULL); + CU_ASSERT_FATAL(nodes != NULL); for (j = 0; j < num_samples; j++) { W[j] = 1; @@ -658,30 +658,21 @@ verify_branch_general_stat_identity(tsk_treeseq_t *ts) CU_ASSERT_EQUAL(ret, 0); for (ret = tsk_tree_first(&tree); ret == 1; ret = tsk_tree_next(&tree)) { + ret = tsk_tree_preorder(&tree, -1, nodes, &num_nodes); + CU_ASSERT_EQUAL_FATAL(ret, 0); + s = 0; - for (root = tree.left_root; root != TSK_NULL; root = tree.right_sib[root]) { - stack_top = 0; - stack[stack_top] = root; - while (stack_top >= 0) { - u = stack[stack_top]; - stack_top--; - - for (v = tree.right_child[u]; v != TSK_NULL; v = tree.left_sib[v]) { - branch_length - = ts->tables->nodes.time[u] - ts->tables->nodes.time[v]; - /* printf("u = %d v= %d s = %d bl = %f \n", u, v, - * tree.num_samples[v], */ - /* branch_length); */ - s += branch_length * (double) tree.num_samples[v]; - stack_top++; - stack[stack_top] = v; - } - } + for (j = 0; j < num_nodes; j++) { + u = nodes[j]; + ret = tsk_tree_get_branch_length(&tree, u, &branch_length); + CU_ASSERT_EQUAL_FATAL(ret, 0); + s += branch_length * (double) tree.num_samples[u]; } CU_ASSERT_DOUBLE_EQUAL_FATAL(sigma[tree.index], s, 1e-6); } CU_ASSERT_EQUAL_FATAL(ret, 0); - free(stack); + + free(nodes); tsk_tree_free(&tree); free(W); free(sigma); diff --git a/c/tests/test_trees.c b/c/tests/test_trees.c index cdaa2449f0..0ed977392c 100644 --- a/c/tests/test_trees.c +++ b/c/tests/test_trees.c @@ -58,7 +58,6 @@ check_trees_identical(tsk_tree_t *self, tsk_tree_t *other) tsk_size_t N = self->num_nodes; check_trees_equal(self, other); - CU_ASSERT_FATAL(self->left_root == other->left_root); CU_ASSERT_FATAL(self->left_index == other->left_index); CU_ASSERT_FATAL(self->right_index == other->right_index); CU_ASSERT_FATAL(self->direction == other->direction); @@ -725,7 +724,7 @@ verify_sample_counts(tsk_treeseq_t *ts, tsk_size_t num_tests, sample_count_test_ ret = tsk_tree_get_num_tracked_samples(&tree, 0, &num_samples); CU_ASSERT_EQUAL(ret, TSK_ERR_UNSUPPORTED_OPERATION); /* The root should be NULL */ - CU_ASSERT_EQUAL(tree.left_root, TSK_NULL); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL); } tsk_tree_free(&tree); @@ -747,7 +746,7 @@ verify_sample_counts(tsk_treeseq_t *ts, tsk_size_t num_tests, sample_count_test_ CU_ASSERT_EQUAL(ret, 0); CU_ASSERT_EQUAL(num_samples, 0); /* The root should not be NULL */ - CU_ASSERT_NOT_EQUAL(tree.left_root, TSK_NULL); + CU_ASSERT_NOT_EQUAL(tree.virtual_root, TSK_NULL); } tsk_tree_free(&tree); @@ -1326,7 +1325,8 @@ test_simplest_degenerate_multiple_root_records(void) ret = tsk_tree_first(&t); CU_ASSERT_EQUAL(ret, 1); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 2); - CU_ASSERT_EQUAL(t.left_root, 2); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 2); + CU_ASSERT_EQUAL(tsk_tree_get_right_root(&t), 3); CU_ASSERT_EQUAL(t.num_edges, 2); CU_ASSERT_EQUAL(t.right_sib[2], 3); CU_ASSERT_EQUAL(t.right_sib[3], TSK_NULL); @@ -1418,7 +1418,8 @@ test_simplest_zero_root_tree(void) CU_ASSERT_EQUAL(ret, 1); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 0); CU_ASSERT_EQUAL(t.num_edges, 4); - CU_ASSERT_EQUAL(t.left_root, TSK_NULL); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), TSK_NULL); + CU_ASSERT_EQUAL(tsk_tree_get_right_root(&t), TSK_NULL); CU_ASSERT_EQUAL(t.right_sib[2], 3); CU_ASSERT_EQUAL(t.right_sib[3], TSK_NULL); @@ -1449,7 +1450,7 @@ test_simplest_multi_root_tree(void) tsk_tree_print_state(&t, _devnull); /* Make sure the initial roots are set correctly */ - CU_ASSERT_EQUAL(t.left_root, 0); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 0); CU_ASSERT_EQUAL(t.left_sib[0], TSK_NULL); CU_ASSERT_EQUAL(t.right_sib[0], 1); CU_ASSERT_EQUAL(t.left_sib[1], 0); @@ -1461,7 +1462,7 @@ test_simplest_multi_root_tree(void) ret = tsk_tree_first(&t); CU_ASSERT_EQUAL(ret, 1); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 2); - CU_ASSERT_EQUAL(t.left_root, 0); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 0); CU_ASSERT_EQUAL(t.right_sib[0], 3); CU_ASSERT_EQUAL(t.num_edges, 2); @@ -1478,7 +1479,7 @@ test_simplest_multi_root_tree(void) ret = tsk_tree_next(&t); CU_ASSERT_EQUAL(ret, 1); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 1); - CU_ASSERT_EQUAL(t.left_root, 3); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 3); tsk_tree_free(&t); tsk_treeseq_free(&ts); @@ -1805,11 +1806,11 @@ test_simplest_initial_gap_zero_roots(void) CU_ASSERT_EQUAL(ret, 0); ret = tsk_tree_first(&tree); CU_ASSERT_EQUAL(ret, 1); - CU_ASSERT_EQUAL(tree.left_root, TSK_NULL); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0); ret = tsk_tree_next(&tree); CU_ASSERT_EQUAL(ret, 1); - CU_ASSERT_EQUAL(tree.left_root, TSK_NULL); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0); CU_ASSERT_EQUAL(tree.parent[0], 2); CU_ASSERT_EQUAL(tree.parent[1], 2); @@ -1858,19 +1859,19 @@ test_simplest_holey_tsk_treeseq_zero_roots(void) CU_ASSERT_EQUAL(ret, 0); ret = tsk_tree_first(&tree); CU_ASSERT_EQUAL(ret, 1); - CU_ASSERT_EQUAL(tree.left_root, TSK_NULL); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL); CU_ASSERT_EQUAL(tree.parent[0], 2); CU_ASSERT_EQUAL(tree.parent[1], 2); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0); ret = tsk_tree_next(&tree); CU_ASSERT_EQUAL(ret, 1); - CU_ASSERT_EQUAL(tree.left_root, TSK_NULL); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0); ret = tsk_tree_next(&tree); CU_ASSERT_EQUAL(ret, 1); - CU_ASSERT_EQUAL(tree.left_root, TSK_NULL); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0); CU_ASSERT_EQUAL(tree.parent[0], 2); CU_ASSERT_EQUAL(tree.parent[1], 2); @@ -3055,10 +3056,10 @@ test_simplest_map_mutations(void) CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_EQUAL_FATAL(ancestral_state, 0); CU_ASSERT_EQUAL_FATAL(num_transitions, 2); - CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0); + CU_ASSERT_EQUAL_FATAL(transitions[0].node, 1); CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL); CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1); - CU_ASSERT_EQUAL_FATAL(transitions[1].node, 1); + CU_ASSERT_EQUAL_FATAL(transitions[1].node, 0); CU_ASSERT_EQUAL_FATAL(transitions[1].parent, TSK_NULL); CU_ASSERT_EQUAL_FATAL(transitions[1].state, 1); free(transitions); @@ -3830,7 +3831,7 @@ test_single_nonbinary_tree_iter(void) CU_ASSERT_EQUAL(tree.left_sib[6], TSK_NULL); CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 1); - CU_ASSERT_EQUAL(tree.left_root, 9); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), 9); ret = tsk_tree_get_mrca(&tree, 0, 1, &w); CU_ASSERT_EQUAL(ret, 0); @@ -5253,7 +5254,7 @@ test_virtual_root_properties(void) tsk_treeseq_t ts; tsk_tree_t t; int depth; - double time; + double time, length; tsk_id_t node; tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL, @@ -5282,6 +5283,9 @@ test_virtual_root_properties(void) CU_ASSERT_EQUAL_FATAL(tsk_tree_get_parent(&t, t.virtual_root, &node), 0) CU_ASSERT_EQUAL(node, TSK_NULL); + CU_ASSERT_EQUAL_FATAL(tsk_tree_get_branch_length(&t, t.virtual_root, &length), 0) + CU_ASSERT_EQUAL(length, 0); + /* The definition of "descendant" is that node v is on the path from * u to a root. Since there is no parent link from roots to the * virtual_root, it's consistent with this definition to return false @@ -5505,7 +5509,7 @@ test_isolated_node_kc(void) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_tree_first(&t); CU_ASSERT_EQUAL_FATAL(ret, 1); - CU_ASSERT_EQUAL_FATAL(t.left_root, TSK_NULL); + CU_ASSERT_EQUAL_FATAL(tsk_tree_get_left_root(&t), TSK_NULL); ret = tsk_tree_kc_distance(&t, &t, 0, &result); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_ROOTS); tsk_treeseq_free(&ts); @@ -5634,7 +5638,7 @@ test_empty_tree_kc(void) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_tree_first(&t); CU_ASSERT_EQUAL_FATAL(ret, 1); - CU_ASSERT_EQUAL_FATAL(t.left_root, TSK_NULL); + CU_ASSERT_EQUAL_FATAL(tsk_tree_get_left_root(&t), TSK_NULL); CU_ASSERT_EQUAL_FATAL(t.left, 0); CU_ASSERT_EQUAL_FATAL(t.right, 1); CU_ASSERT_EQUAL_FATAL(t.parent[0], TSK_NULL); @@ -6147,6 +6151,8 @@ test_tree_errors(void) CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS); ret = tsk_tree_get_time(&t, u, NULL); CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS); + ret = tsk_tree_get_branch_length(&t, u, NULL); + CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS); ret = tsk_tree_get_mrca(&t, u, 0, NULL); CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS); ret = tsk_tree_get_mrca(&t, 0, u, NULL); @@ -6498,7 +6504,7 @@ test_empty_tree_sequence(void) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_tree_first(&t); CU_ASSERT_EQUAL_FATAL(ret, 1); - CU_ASSERT_EQUAL_FATAL(t.left_root, TSK_NULL); + CU_ASSERT_EQUAL_FATAL(tsk_tree_get_left_root(&t), TSK_NULL); CU_ASSERT_EQUAL_FATAL(t.left, 0); CU_ASSERT_EQUAL_FATAL(t.right, 1); CU_ASSERT_EQUAL_FATAL(t.num_edges, 0); @@ -6511,7 +6517,7 @@ test_empty_tree_sequence(void) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_tree_last(&t); CU_ASSERT_EQUAL_FATAL(ret, 1); - CU_ASSERT_EQUAL_FATAL(t.left_root, TSK_NULL); + CU_ASSERT_EQUAL_FATAL(tsk_tree_get_left_root(&t), TSK_NULL); CU_ASSERT_EQUAL_FATAL(t.left, 0); CU_ASSERT_EQUAL_FATAL(t.right, 1); CU_ASSERT_EQUAL_FATAL(tsk_tree_get_parent(&t, 1, &v), TSK_ERR_NODE_OUT_OF_BOUNDS); @@ -6561,7 +6567,7 @@ test_zero_edges(void) CU_ASSERT_EQUAL(t.num_edges, 0); CU_ASSERT_EQUAL(t.parent[0], TSK_NULL); CU_ASSERT_EQUAL(t.parent[1], TSK_NULL); - CU_ASSERT_EQUAL(t.left_root, 0); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 0); CU_ASSERT_EQUAL(t.left_sib[0], TSK_NULL); CU_ASSERT_EQUAL(t.right_sib[0], 1); tsk_tree_print_state(&t, _devnull); @@ -6575,7 +6581,7 @@ test_zero_edges(void) CU_ASSERT_EQUAL(t.right, 2); CU_ASSERT_EQUAL(t.parent[0], TSK_NULL); CU_ASSERT_EQUAL(t.parent[1], TSK_NULL); - CU_ASSERT_EQUAL(t.left_root, 0); + CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 0); CU_ASSERT_EQUAL(t.left_sib[0], TSK_NULL); CU_ASSERT_EQUAL(t.right_sib[0], 1); tsk_tree_print_state(&t, _devnull); diff --git a/c/tskit/genotypes.c b/c/tskit/genotypes.c index 509ce38b77..ecf2954870 100644 --- a/c/tskit/genotypes.c +++ b/c/tskit/genotypes.c @@ -462,10 +462,11 @@ tsk_vargen_mark_missing_i16(tsk_vargen_t *self) const tsk_id_t *restrict left_child = self->tree.left_child; const tsk_id_t *restrict right_sib = self->tree.right_sib; const tsk_id_t *restrict sample_index_map = self->sample_index_map; + const tsk_id_t N = self->tree.virtual_root; int16_t *restrict genotypes = self->variant.genotypes.i16; tsk_id_t root, sample_index; - for (root = self->tree.left_root; root != TSK_NULL; root = right_sib[root]) { + for (root = left_child[N]; root != TSK_NULL; root = right_sib[root]) { if (left_child[root] == TSK_NULL) { sample_index = sample_index_map[root]; if (sample_index != TSK_NULL) { @@ -484,10 +485,11 @@ tsk_vargen_mark_missing_i8(tsk_vargen_t *self) const tsk_id_t *restrict left_child = self->tree.left_child; const tsk_id_t *restrict right_sib = self->tree.right_sib; const tsk_id_t *restrict sample_index_map = self->sample_index_map; + const tsk_id_t N = self->tree.virtual_root; int8_t *restrict genotypes = self->variant.genotypes.i8; tsk_id_t root, sample_index; - for (root = self->tree.left_root; root != TSK_NULL; root = right_sib[root]) { + for (root = left_child[N]; root != TSK_NULL; root = right_sib[root]) { if (left_child[root] == TSK_NULL) { sample_index = sample_index_map[root]; if (sample_index != TSK_NULL) { diff --git a/c/tskit/haplotype_matching.c b/c/tskit/haplotype_matching.c index e770403818..f75638979d 100644 --- a/c/tskit/haplotype_matching.c +++ b/c/tskit/haplotype_matching.c @@ -274,7 +274,7 @@ tsk_ls_hmm_remove_dead_roots(tsk_ls_hmm_t *self) tsk_id_t *restrict T_index = self->transition_index; tsk_value_transition_t *restrict T = self->transitions; const tsk_id_t *restrict right_sib = self->tree.right_sib; - const tsk_id_t left_root = self->tree.left_root; + const tsk_id_t left_root = tsk_tree_get_left_root(&self->tree); const tsk_id_t *restrict parent = self->parent; tsk_id_t root, u; tsk_size_t j; @@ -404,6 +404,7 @@ tsk_ls_hmm_update_probabilities( tsk_id_t *restrict T_index = self->transition_index; tsk_value_transition_t *restrict T = self->transitions; int8_t *restrict allelic_state = self->allelic_state; + const tsk_id_t left_root = tsk_tree_get_left_root(tree); tsk_mutation_t mut; tsk_id_t j, u, v; double x; @@ -415,7 +416,7 @@ tsk_ls_hmm_update_probabilities( if (ret < 0) { goto out; } - for (root = tree->left_root; root != TSK_NULL; root = tree->right_sib[root]) { + for (root = left_root; root != TSK_NULL; root = tree->right_sib[root]) { allelic_state[root] = (int8_t) ret; } @@ -460,7 +461,7 @@ tsk_ls_hmm_update_probabilities( } /* Unset the allelic states */ - for (root = tree->left_root; root != TSK_NULL; root = tree->right_sib[root]) { + for (root = left_root; root != TSK_NULL; root = tree->right_sib[root]) { allelic_state[root] = TSK_MISSING_DATA; } for (j = 0; j < (tsk_id_t) site->mutations_length; j++) { @@ -819,7 +820,11 @@ tsk_ls_hmm_redistribute_transitions(tsk_ls_hmm_t *self) old_num_transitions = self->num_transitions; self->num_transitions = 0; - for (root = self->tree.left_root; root != TSK_NULL; root = right_sib[root]) { + /* TODO refactor this to push the virtual root onto the stack rather then + * iterating over the roots. See the existing parsimony implementations + * for an example. */ + for (root = tsk_tree_get_left_root(&self->tree); root != TSK_NULL; + root = right_sib[root]) { stack[0].tree_node = root; stack[0].old_state = T_old[T_index[root]].value_index; stack[0].new_state diff --git a/c/tskit/trees.c b/c/tskit/trees.c index 300d36e652..7e79d44367 100644 --- a/c/tskit/trees.c +++ b/c/tskit/trees.c @@ -3422,7 +3422,6 @@ tsk_tree_copy(const tsk_tree_t *self, tsk_tree_t *dest, tsk_flags_t options) } dest->left = self->left; dest->right = self->right; - dest->left_root = self->left_root; dest->left_index = self->left_index; dest->right_index = self->right_index; dest->direction = self->direction; @@ -3621,14 +3620,27 @@ tsk_tree_is_sample(const tsk_tree_t *self, tsk_id_t u) return tsk_treeseq_is_sample(self->tree_sequence, u); } +tsk_id_t +tsk_tree_get_left_root(const tsk_tree_t *self) +{ + return self->left_child[self->virtual_root]; +} + +tsk_id_t +tsk_tree_get_right_root(const tsk_tree_t *self) +{ + return self->right_child[self->virtual_root]; +} + tsk_size_t tsk_tree_get_num_roots(const tsk_tree_t *self) { + const tsk_id_t *restrict right_sib = self->right_sib; + const tsk_id_t *restrict left_child = self->left_child; tsk_size_t num_roots = 0; - tsk_id_t u = self->left_root; + tsk_id_t u = self->left_child[self->virtual_root]; - while (u != TSK_NULL) { - u = self->right_sib[u]; + for (u = left_child[self->virtual_root]; u != TSK_NULL; u = right_sib[u]) { num_roots++; } return num_roots; @@ -3673,6 +3685,29 @@ tsk_tree_get_time(const tsk_tree_t *self, tsk_id_t u, double *t) return ret; } +static inline double +tsk_tree_get_branch_length_unsafe(const tsk_tree_t *self, tsk_id_t u) +{ + const double *times = self->tree_sequence->tables->nodes.time; + const tsk_id_t parent = self->parent[u]; + + return parent == TSK_NULL ? 0 : times[parent] - times[u]; +} + +int TSK_WARN_UNUSED +tsk_tree_get_branch_length(const tsk_tree_t *self, tsk_id_t u, double *ret_branch_length) +{ + int ret = 0; + + ret = tsk_tree_check_node(self, u); + if (ret != 0) { + goto out; + } + *ret_branch_length = tsk_tree_get_branch_length_unsafe(self, u); +out: + return ret; +} + int tsk_tree_get_total_branch_length(const tsk_tree_t *self, tsk_id_t node, double *ret_tbl) { @@ -3759,17 +3794,6 @@ tsk_tree_node_root(tsk_tree_t *self, tsk_id_t u) return v; } -/* TODO Should push this through the stack since this is a python method and - * it's here anyway */ -static double -tsk_tree_branch_length(const tsk_tree_t *self, tsk_id_t u) -{ - const double *times = self->tree_sequence->tables->nodes.time; - tsk_id_t parent = self->parent[u]; - - return parent == TSK_NULL ? 0 : times[parent] - times[u]; -} - static void tsk_tree_check_state(const tsk_tree_t *self) { @@ -3795,13 +3819,11 @@ tsk_tree_check_state(const tsk_tree_t *self) is_root[u] = true; } if (self->tree_sequence->num_samples == 0) { - tsk_bug_assert(self->left_root == TSK_NULL); - } else { - tsk_bug_assert(self->left_sib[self->left_root] == TSK_NULL); + tsk_bug_assert(self->left_child[self->virtual_root] == TSK_NULL); } /* Iterate over the roots and make sure they are set */ - for (u = self->left_root; u != TSK_NULL; u = self->right_sib[u]) { + for (u = tsk_tree_get_left_root(self); u != TSK_NULL; u = self->right_sib[u]) { tsk_bug_assert(is_root[u]); is_root[u] = false; } @@ -3862,7 +3884,6 @@ tsk_tree_print_state(const tsk_tree_t *self, FILE *out) fprintf(out, "root_threshold = %lld\n", (long long) self->root_threshold); fprintf(out, "left = %f\n", self->left); fprintf(out, "right = %f\n", self->right); - fprintf(out, "left_root = %lld\n", (long long) self->left_root); fprintf(out, "index = %lld\n", (long long) self->index); fprintf(out, "node\tparent\tlchild\trchild\tlsib\trsib"); if (self->options & TSK_SAMPLE_LISTS) { @@ -4117,8 +4138,6 @@ tsk_tree_advance(tsk_tree_t *self, int direction, const double *restrict out_bre tsk_tree_insert_edge(self, edge_parent[k], edge_child[k]); } - self->left_root = self->left_child[self->virtual_root]; - self->direction = direction; self->index = self->index + direction; if (direction == TSK_DIR_FORWARD) { @@ -4316,7 +4335,6 @@ tsk_tree_clear(tsk_tree_t *self) self->right_sample[u] = (tsk_id_t) j; } } - self->left_root = TSK_NULL; if (sample_counts && self->root_threshold == 1 && num_samples > 0) { for (j = 0; j < num_samples; j++) { /* Set initial roots */ @@ -4324,7 +4342,6 @@ tsk_tree_clear(tsk_tree_t *self) tsk_tree_insert_root(self, self->samples[j], self->parent); } } - self->left_root = self->left_child[self->virtual_root]; } return ret; } @@ -4547,7 +4564,7 @@ tsk_tree_map_mutations(tsk_tree_t *self, int8_t *genotypes, uint64_t *restrict optimal_set = tsk_calloc(N + 1, sizeof(*optimal_set)); struct stack_elem *restrict preorder_stack = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*preorder_stack)); - tsk_id_t root, u, v; + tsk_id_t u, v; /* The largest possible number of transitions is one over every sample */ tsk_state_transition_t *transitions = tsk_malloc(num_samples * sizeof(*transitions)); int8_t allele, ancestral_state; @@ -4622,33 +4639,34 @@ tsk_tree_map_mutations(tsk_tree_t *self, int8_t *genotypes, } } if (!(options & TSK_MM_FIXED_ANCESTRAL_STATE)) { - ancestral_state = get_smallest_set_bit(optimal_set[N]); + ancestral_state = get_smallest_set_bit(optimal_set[self->virtual_root]); + } else { + optimal_set[self->virtual_root] = UINT64_MAX; } num_transitions = 0; - for (root = self->left_root; root != TSK_NULL; root = self->right_sib[root]) { - /* Do a preorder traversal */ - preorder_stack[0].node = root; - preorder_stack[0].state = ancestral_state; - preorder_stack[0].transition_parent = TSK_NULL; - stack_top = 0; - while (stack_top >= 0) { - s = preorder_stack[stack_top]; - stack_top--; - if (!bit_is_set(optimal_set[s.node], s.state)) { - s.state = get_smallest_set_bit(optimal_set[s.node]); - transitions[num_transitions].node = s.node; - transitions[num_transitions].parent = s.transition_parent; - transitions[num_transitions].state = s.state; - s.transition_parent = (tsk_id_t) num_transitions; - num_transitions++; - } - for (v = left_child[s.node]; v != TSK_NULL; v = right_sib[v]) { - stack_top++; - s.node = v; - preorder_stack[stack_top] = s; - } + /* Do a preorder traversal */ + preorder_stack[0].node = self->virtual_root; + preorder_stack[0].state = ancestral_state; + preorder_stack[0].transition_parent = TSK_NULL; + stack_top = 0; + while (stack_top >= 0) { + s = preorder_stack[stack_top]; + stack_top--; + + if (!bit_is_set(optimal_set[s.node], s.state)) { + s.state = get_smallest_set_bit(optimal_set[s.node]); + transitions[num_transitions].node = s.node; + transitions[num_transitions].parent = s.transition_parent; + transitions[num_transitions].state = s.state; + s.transition_parent = (tsk_id_t) num_transitions; + num_transitions++; + } + for (v = left_child[s.node]; v != TSK_NULL; v = right_sib[v]) { + stack_top++; + s.node = v; + preorder_stack[stack_top] = s; } } @@ -4939,7 +4957,7 @@ fill_kc_vectors(const tsk_tree_t *t, kc_vectors *kc_vecs) times = t->tree_sequence->tables->nodes.time; - for (root = t->left_root; root != TSK_NULL; root = t->right_sib[root]) { + for (root = tsk_tree_get_left_root(t); root != TSK_NULL; root = t->right_sib[root]) { stack_top = 0; stack[stack_top].node = root; stack[stack_top].depth = 0; @@ -4949,7 +4967,7 @@ fill_kc_vectors(const tsk_tree_t *t, kc_vectors *kc_vecs) stack_top--; if (tsk_tree_is_sample(t, u)) { - time = tsk_tree_branch_length(t, u); + time = tsk_tree_get_branch_length_unsafe(t, u); update_kc_vectors_single_sample(ts, kc_vecs, u, time); } @@ -5207,7 +5225,7 @@ update_kc_incremental(tsk_tree_t *self, kc_vectors *kc, tsk_edge_list_t *edges_o } if (tsk_tree_is_sample(self, u)) { - time = tsk_tree_branch_length(self, u); + time = tsk_tree_get_branch_length_unsafe(self, u); update_kc_vectors_single_sample(self->tree_sequence, kc, u, time); } } diff --git a/c/tskit/trees.h b/c/tskit/trees.h index 941fa41635..0a01e516cd 100644 --- a/c/tskit/trees.h +++ b/c/tskit/trees.h @@ -122,11 +122,6 @@ typedef struct { * @brief The parent tree sequence. */ const tsk_treeseq_t *tree_sequence; - /** - * @brief The leftmost root in the tree. Roots are siblings, and - * other roots can be found using right_sib. - */ - tsk_id_t left_root; /** @brief The ID of the "virtual root" whose children are the roots of the tree. @@ -476,14 +471,19 @@ bool tsk_tree_equals(const tsk_tree_t *self, const tsk_tree_t *other); bool tsk_tree_is_descendant(const tsk_tree_t *self, tsk_id_t u, tsk_id_t v); bool tsk_tree_is_sample(const tsk_tree_t *self, tsk_id_t u); +tsk_id_t tsk_tree_get_left_root(const tsk_tree_t *self); +tsk_id_t tsk_tree_get_right_root(const tsk_tree_t *self); + int tsk_tree_copy(const tsk_tree_t *self, tsk_tree_t *dest, tsk_flags_t options); int tsk_tree_set_tracked_samples( tsk_tree_t *self, tsk_size_t num_tracked_samples, const tsk_id_t *tracked_samples); int tsk_tree_set_tracked_samples_from_sample_list( tsk_tree_t *self, tsk_tree_t *other, tsk_id_t node); -int tsk_tree_get_parent(const tsk_tree_t *self, tsk_id_t u, tsk_id_t *parent); +int tsk_tree_get_branch_length( + const tsk_tree_t *self, tsk_id_t u, double *branch_length); int tsk_tree_get_time(const tsk_tree_t *self, tsk_id_t u, double *t); +int tsk_tree_get_parent(const tsk_tree_t *self, tsk_id_t u, tsk_id_t *parent); int tsk_tree_get_depth(const tsk_tree_t *self, tsk_id_t u, int *depth); int tsk_tree_get_mrca(const tsk_tree_t *self, tsk_id_t u, tsk_id_t v, tsk_id_t *mrca); int tsk_tree_get_num_samples( diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py index b618c6551c..df7454e668 100644 --- a/python/tests/test_lowlevel.py +++ b/python/tests/test_lowlevel.py @@ -2998,10 +2998,12 @@ def test_map_mutations_null(self): genotypes = np.arange(n, dtype=np.int8) ancestral_state, transitions = tree.map_mutations(genotypes) assert ancestral_state == 0 + assert len(transitions) == n - 1 for j in range(n - 1): - assert transitions[j][0] == j + 1 + x = n - j - 1 + assert transitions[j][0] == x assert transitions[j][1] == -1 - assert transitions[j][2] == j + 1 + assert transitions[j][2] == x def test_map_mutations(self): ts = self.get_example_tree_sequence() diff --git a/python/tests/test_parsimony.py b/python/tests/test_parsimony.py index f4df616c2c..4093f9fdce 100644 --- a/python/tests/test_parsimony.py +++ b/python/tests/test_parsimony.py @@ -230,6 +230,8 @@ def hartigan_map_mutations(tree, genotypes, alleles, ancestral_state=None): if ancestral_state is None: ancestral_state = np.argmax(optimal_set[tree.virtual_root]) + else: + optimal_set[tree.virtual_root] = 1 @dataclasses.dataclass class StackElement: @@ -238,7 +240,7 @@ class StackElement: mutation_parent: int mutations = [] - stack = [StackElement(root, ancestral_state, -1) for root in reversed(tree.roots)] + stack = [StackElement(tree.virtual_root, ancestral_state, -1)] while len(stack) > 0: s = stack.pop() if optimal_set[s.node, s.state] == 0: @@ -538,6 +540,9 @@ def do_map_mutations( ) assert ancestral_state1 == ancestral_state2 assert len(transitions1) == len(transitions2) + sorted_t1 = sorted([(m.node, m.derived_state) for m in transitions1]) + sorted_t2 = sorted([(m.node, m.derived_state) for m in transitions2]) + assert sorted_t1 == sorted_t2 assert transitions1 == transitions2 return ancestral_state1, transitions1