Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ build_script:
- cmd: python -m pip install newick
- cmd: python -m pip install python_jsonschema_objects
- cmd: python -m pip install xmlunittest
- cmd: python -m pip install portion
- cmd: python -m nose -vs --processes=%NUMBER_OF_PROCESSORS% --process-timeout=5000

after_test:
Expand Down
10 changes: 8 additions & 2 deletions c/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,19 @@

- The macro ``TSK_IMPUTE_MISSING_DATA`` is renamed to ``TSK_ISOLATED_NOT_MISSING``

**New features**

- Add a ``TSK_KEEP_INPUT_ROOTS`` option to simplify which, if enabled, adds edges
from the MRCAs of samples in the simplified tree sequence back to the roots
in the input tree sequence (:user:`jeromekelleher`, :issue:`775`, :pr:`782`).

---------------------
[0.99.4] - 2020-08-12
---------------------

**Note**

- The ``TSK_VERSION_PATCH`` macro was incorrectly set to ``4`` for 0.99.3, so both
- The ``TSK_VERSION_PATCH`` macro was incorrectly set to ``4`` for 0.99.3, so both
0.99.4 and 0.99.3 have the same value.

**Changes**
Expand Down Expand Up @@ -70,7 +76,7 @@

- New methods to perform set operations on table collections.
``tsk_table_collection_subset`` subsets and reorders table collections by nodes
(:user:`mufernando`, :user:`petrelharp`, :pr:`663`, :pr:`690`).
(:user:`mufernando`, :user:`petrelharp`, :pr:`663`, :pr:`690`).
``tsk_table_collection_union`` forms the node-wise union of two table collections
(:user:`mufernando`, :user:`petrelharp`, :issue:`381`, :pr:`623`).

Expand Down
77 changes: 76 additions & 1 deletion c/tests/test_tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -2978,6 +2978,80 @@ test_copy_table_collection(void)
tsk_treeseq_free(&ts);
}

static void
test_sort_tables_offsets(void)
{
int ret;
tsk_treeseq_t *ts;
tsk_table_collection_t tables, copy;
tsk_bookmark_t bookmark;

ts = caterpillar_tree(10, 5, 5);
ret = tsk_treeseq_copy_tables(ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_MIGRATIONS_NOT_SUPPORTED);

tsk_migration_table_clear(&tables.migrations);
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

/* Check that setting edge offset = len(edges) does nothing */
reverse_edges(&tables);
ret = tsk_table_collection_copy(&tables, &copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
memset(&bookmark, 0, sizeof(bookmark));
bookmark.edges = tables.edges.num_rows;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy));

ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tables.sites.num_rows > 2);
CU_ASSERT_FATAL(tables.mutations.num_rows > 2);

/* Check that setting mutation and site offset = to the len
* of the tables leaves them untouched. */
reverse_mutations(&tables);
/* Swap the positions of the first two sites, as a quick way
* to disorder the site table */
tables.sites.position[0] = tables.sites.position[1];
tables.sites.position[1] = 0;
ret = tsk_table_collection_copy(&tables, &copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
memset(&bookmark, 0, sizeof(bookmark));
bookmark.sites = tables.sites.num_rows;
bookmark.mutations = tables.mutations.num_rows;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy));

/* Anything other than len(table) leads to an error for sites
* and mutations, and we can't specify one without the other. */
memset(&bookmark, 0, sizeof(bookmark));
bookmark.sites = tables.sites.num_rows;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);

memset(&bookmark, 0, sizeof(bookmark));
bookmark.mutations = tables.mutations.num_rows;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);

memset(&bookmark, 0, sizeof(bookmark));
bookmark.sites = tables.sites.num_rows - 1;
bookmark.mutations = tables.mutations.num_rows - 1;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);

tsk_table_collection_free(&tables);
tsk_table_collection_free(&copy);
tsk_treeseq_free(ts);
free(ts);
}

static void
test_sort_tables_drops_indexes_with_options(tsk_flags_t tc_options)
{
Expand Down Expand Up @@ -3128,7 +3202,7 @@ test_sort_tables_errors(void)
memset(&pos, 0, sizeof(pos));
pos.migrations = 1;
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);

memset(&pos, 0, sizeof(pos));
pos.sites = 1;
Expand Down Expand Up @@ -4552,6 +4626,7 @@ main(int argc, char **argv)
test_link_ancestors_samples_and_ancestors_overlap },
{ "test_link_ancestors_multiple_to_single_tree",
test_link_ancestors_multiple_to_single_tree },
{ "test_sort_tables_offsets", test_sort_tables_offsets },
{ "test_sort_tables_drops_indexes", test_sort_tables_drops_indexes },
{ "test_sort_tables_edge_metadata", test_sort_tables_edge_metadata },
{ "test_sort_tables_no_edge_metadata", test_sort_tables_no_edge_metadata },
Expand Down
173 changes: 173 additions & 0 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -2566,6 +2566,69 @@ test_simplest_reduce_site_topology(void)
tsk_table_collection_free(&tables);
}

static void
test_simplest_simplify_defragment(void)
{
const char *nodes = "0 2 -1\n"
"0 2 -1\n"
"0 2 -1\n"
"0 2 -1\n"
"0 2 -1\n"
"0 2 -1\n"
"0 1 -1\n"
"0 1 -1\n"
"0 1 -1\n"
"0 1 -1\n"
"0 1 -1\n"
"0 1 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n";
const char *edges = "0.00000000 0.20784841 8 12\n"
"0.00000000 0.42202433 8 15\n"
"0.00000000 0.63541014 8 16\n"
"0.42202433 1.00000000 9 15\n"
"0.00000000 1.00000000 9 17\n"
"0.00000000 1.00000000 10 14\n"
"0.20784841 1.00000000 11 12\n"
"0.00000000 1.00000000 11 13\n"
"0.63541014 1.00000000 11 16\n"
"0.00000000 1.00000000 0 10\n"
"0.62102072 1.00000000 1 9\n"
"0.00000000 1.00000000 1 11\n"
"0.00000000 0.26002984 2 6\n"
"0.26002984 1.00000000 2 6\n"
"0.00000000 0.62102072 2 9\n"
"0.55150554 1.00000000 3 8\n"
"0.00000000 1.00000000 4 7\n"
"0.00000000 0.55150554 5 8\n";

tsk_id_t samples[] = { 12, 13, 14, 15, 16, 17 };
tsk_table_collection_t tables;
int ret;

/* This was the simplest example I could find that exercised the
* inner loops of the simplifier_extract_ancestry function */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
parse_nodes(nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 18);
parse_edges(edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 18);

ret = tsk_table_collection_simplify(&tables, samples, 6, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);

CU_ASSERT_EQUAL(tables.nodes.num_rows, 10);
CU_ASSERT_EQUAL(tables.edges.num_rows, 10);

tsk_table_collection_free(&tables);
}

static void
test_simplest_population_filter(void)
{
Expand Down Expand Up @@ -3640,6 +3703,7 @@ test_single_tree_iter_depths(void)
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}

static void
test_single_tree_simplify(void)
{
Expand Down Expand Up @@ -3697,6 +3761,55 @@ test_single_tree_simplify(void)
tsk_table_collection_free(&tables);
}

static void
test_single_tree_simplify_debug(void)
{
tsk_treeseq_t ts, simplified;
tsk_id_t samples[] = { 0, 1 };
int ret;
FILE *save_stdout = stdout;
FILE *tmp = fopen(_tmp_file_name, "w");

CU_ASSERT_FATAL(tmp != NULL);
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);

stdout = tmp;
ret = tsk_treeseq_simplify(&ts, samples, 2, TSK_DEBUG, &simplified, NULL);
stdout = save_stdout;
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(ftell(tmp) > 0);

fclose(tmp);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&simplified);
}

static void
test_single_tree_simplify_keep_input_roots(void)
{
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_id_t samples[] = { 0, 1 };
int ret;

tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
verify_simplify(&ts);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_table_collection_simplify(&tables, samples, 2, TSK_KEEP_INPUT_ROOTS, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.nodes.num_rows, 4);
CU_ASSERT_EQUAL(tables.edges.num_rows, 3);
CU_ASSERT_EQUAL(tables.sites.num_rows, 3);
CU_ASSERT_EQUAL(tables.mutations.num_rows, 4);

tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}

static void
test_single_tree_simplify_no_sample_nodes(void)
{
Expand Down Expand Up @@ -4283,6 +4396,60 @@ test_nonbinary_multi_tree(void)
tsk_treeseq_free(&ts);
}

static void
test_simplify_keep_input_roots_multi_tree(void)
{

/*
0.25┊ 8 ┊ ┊ ┊
┊ ┏━┻━┓ ┊ ┊ ┊
0.20┊ ┃ ┃ ┊ ┊ 7 ┊
┊ ┃ ┃ ┊ ┊ ┏━┻━┓ ┊
0.17┊ 6 ┃ ┊ 6 ┊ ┃ ┃ ┊
┊ ┏━┻┓ ┃ ┊ ┏━┻━┓ ┊ ┃ ┃ ┊
0.09┊ ┃ 5 ┃ ┊ ┃ 5 ┊ ┃ 5 ┊
┊ ┃ ┏┻┓ ┃ ┊ ┃ ┏━┻┓ ┊ ┃ ┏━┻┓ ┊
0.07┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ 4 ┊ ┃ ┃ 4 ┊
┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┊ ┃ ┃ ┏┻┓ ┊
0.00┊ 0 1 3 2 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊
0.00 2.00 7.00 10.00

Simplifies to

0.25┊ 4 ┊ ┊ ┊
┊ ┃ ┊ ┊ ┊
0.20┊ ┃ ┊ ┊ 3 ┊
┊ ┃ ┊ ┊ ┏┻┓ ┊
0.17┊ 2 ┊ 2 ┊ ┃ ┃ ┊
┊ ┏┻┓ ┊ ┏┻┓ ┊ ┃ ┃ ┊
0.00┊ 0 1 ┊ 0 1 ┊ 0 1 ┊
0.00 2.00 7.00 10.00

*/
int ret = 0;
// clang-format off
tsk_id_t parents[] = {
2, 2, 4, -1, -1,
2, 2, -1, -1, -1,
3, 3, -1, -1, -1,
};
// clang-format on
uint32_t num_trees = 3;

tsk_id_t samples[] = { 0, 3 };
tsk_treeseq_t ts, simplified;

tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
tsk_treeseq_dump(&ts, "tmp.trees", 0);
ret = tsk_treeseq_simplify(&ts, samples, 2, TSK_KEEP_INPUT_ROOTS, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_trees(&simplified, num_trees, parents);

tsk_treeseq_free(&ts);
tsk_treeseq_free(&simplified);
}

static void
test_left_to_right_multi_tree(void)
{
Expand Down Expand Up @@ -5811,6 +5978,7 @@ main(int argc, char **argv)
{ "test_simplest_overlapping_unary_edges_internal_samples_simplify",
test_simplest_overlapping_unary_edges_internal_samples_simplify },
{ "test_simplest_reduce_site_topology", test_simplest_reduce_site_topology },
{ "test_simplest_simplify_defragment", test_simplest_simplify_defragment },
{ "test_simplest_population_filter", test_simplest_population_filter },
{ "test_simplest_individual_filter", test_simplest_individual_filter },
{ "test_simplest_map_mutations", test_simplest_map_mutations },
Expand Down Expand Up @@ -5839,6 +6007,9 @@ main(int argc, char **argv)
{ "test_single_tree_iter_times", test_single_tree_iter_times },
{ "test_single_tree_iter_depths", test_single_tree_iter_depths },
{ "test_single_tree_simplify", test_single_tree_simplify },
{ "test_single_tree_simplify_debug", test_single_tree_simplify_debug },
{ "test_single_tree_simplify_keep_input_roots",
test_single_tree_simplify_keep_input_roots },
{ "test_single_tree_simplify_no_sample_nodes",
test_single_tree_simplify_no_sample_nodes },
{ "test_single_tree_simplify_null_samples",
Expand All @@ -5859,6 +6030,8 @@ main(int argc, char **argv)
{ "test_internal_sample_multi_tree", test_internal_sample_multi_tree },
{ "test_internal_sample_simplified_multi_tree",
test_internal_sample_simplified_multi_tree },
{ "test_simplify_keep_input_roots_multi_tree",
test_simplify_keep_input_roots_multi_tree },
{ "test_left_to_right_multi_tree", test_left_to_right_multi_tree },
{ "test_gappy_multi_tree", test_gappy_multi_tree },
{ "test_tsk_treeseq_bad_records", test_tsk_treeseq_bad_records },
Expand Down
5 changes: 3 additions & 2 deletions c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -351,8 +351,9 @@ tsk_strerror_internal(int err)
ret = "Migrations not currently supported by this operation";
break;
case TSK_ERR_SORT_OFFSET_NOT_SUPPORTED:
ret = "Specifying position for mutation, sites or migrations is not "
"supported";
ret = "Sort offsets for sites and mutations must be either 0 "
"or the length of the respective tables. Intermediate values "
"are not supported";
break;
case TSK_ERR_NONBINARY_MUTATIONS_UNSUPPORTED:
ret = "Only binary mutations are supported for this operation";
Expand Down
Loading