Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions c/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

**Features**

- Add ``TSK_KEEP_UNARY_IN_INDIVIDUALS`` flag to simplify, which allows the user to
keep unary nodes only if they belong to a tabled individual. This is useful for
simplification in forwards simulations (:user:`hyanwong`, :issue:`1113`, :pr:`1119`).

**Bugfixes**


Expand Down
98 changes: 97 additions & 1 deletion c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -940,11 +940,22 @@ test_simplest_records(void)
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);

ret = tsk_treeseq_simplify(&ts, sample_ids, 2,
TSK_KEEP_UNARY | TSK_KEEP_UNARY_IN_INDIVIDUALS, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE);
tsk_treeseq_free(&simplified);

ret = tsk_treeseq_simplify(&ts, sample_ids, 2, TSK_KEEP_UNARY, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);

ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_KEEP_UNARY_IN_INDIVIDUALS, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);

tsk_treeseq_free(&ts);
}

Expand Down Expand Up @@ -978,6 +989,12 @@ test_simplest_nonbinary_records(void)
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);

ret = tsk_treeseq_simplify(
&ts, sample_ids, 4, TSK_KEEP_UNARY_IN_INDIVIDUALS, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);

tsk_treeseq_free(&ts);
}

Expand All @@ -993,7 +1010,7 @@ test_simplest_unary_records(void)
const char *edges = "0 1 2 0\n"
"0 1 3 1\n"
"0 1 4 2,3\n";
tsk_treeseq_t ts, simplified;
tsk_treeseq_t ts, simplified, simplified_other;
tsk_id_t sample_ids[] = { 0, 1 };

tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
Expand All @@ -1019,6 +1036,84 @@ test_simplest_unary_records(void)
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);

ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_KEEP_UNARY_IN_INDIVIDUALS, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified_other, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(
tsk_table_collection_equals(simplified.tables, simplified_other.tables, 0));
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&simplified_other);

tsk_treeseq_free(&ts);
}

static void
test_simplest_unary_with_individuals(void)
{
int ret;
const char *nodes = "1 0 0 -1\n"
"1 0 0 0\n"
"0 1 0 -1\n"
"0 1 0 1\n"
"0 2 0 -1\n"
"0 3 0 -1\n"
"0 3 0 2\n"
"0 1 0 -1\n"
"0 1 0 3\n"
"0 0 0 -1\n"
"0 0 0 4\n"
"0 1 0 3\n";
const char *edges = "0 2 2 0\n"
"0 2 3 1\n"
"2 3 7 0\n"
"2 3 8 1,9\n"
"2 3 11 10\n"
"0 2 4 2,3\n"
"0 1 5 4\n"
"1 2 6 4\n";
const char *individuals = "0 0.5\n"
"0 1.5,3.1\n"
"0 2.1\n"
"0 3.2\n"
"0 4.2\n";
const char *nodes_expect = "1 0 0 -1\n"
"1 0 0 0\n"
"0 1 0 1\n"
"0 1 0 3\n"
"0 2 0 -1\n"
"0 3 0 2\n";
const char *edges_expect = "0 2 2 1\n"
"2 3 3 1\n"
"0 2 4 0,2\n"
"1 2 5 4\n";
const char *individuals_expect = "0 0.5\n"
"0 1.5,3.1\n"
"0 2.1\n"
"0 3.2\n";
tsk_treeseq_t ts, simplified, expected;
tsk_id_t sample_ids[] = { 0, 1 };

tsk_treeseq_from_text(&ts, 3, nodes, edges, NULL, NULL, NULL, individuals, NULL, 0);
tsk_treeseq_from_text(&expected, 3, nodes_expect, edges_expect, NULL, NULL, NULL,
individuals_expect, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 3.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 12);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_individuals(&ts), 5);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_populations(&ts), 1);

ret = tsk_treeseq_simplify(&ts, sample_ids, 2,
TSK_KEEP_UNARY_IN_INDIVIDUALS | TSK_FILTER_INDIVIDUALS, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(simplified.tables, expected.tables, 0));
tsk_treeseq_free(&simplified);

tsk_treeseq_free(&expected);
tsk_treeseq_free(&ts);
}

Expand Down Expand Up @@ -5987,6 +6082,7 @@ main(int argc, char **argv)
{ "test_simplest_records", test_simplest_records },
{ "test_simplest_nonbinary_records", test_simplest_nonbinary_records },
{ "test_simplest_unary_records", test_simplest_unary_records },
{ "test_simplest_unary_with_individuals", test_simplest_unary_with_individuals },
{ "test_simplest_non_sample_leaf_records",
test_simplest_non_sample_leaf_records },
{ "test_simplest_degenerate_multiple_root_records",
Expand Down
5 changes: 5 additions & 0 deletions c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,11 @@ tsk_strerror_internal(int err)
case TSK_ERR_DUPLICATE_SAMPLE_PAIRS:
ret = "There are duplicate sample pairs.";
break;

/* Simplify errors */
case TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE:
ret = "You cannot specify both KEEP_UNARY and KEEP_UNARY_IN_INDIVDUALS.";
break;
}
return ret;
}
Expand Down
3 changes: 3 additions & 0 deletions c/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,9 @@ not found in the file.
#define TSK_ERR_NO_SAMPLE_PAIRS -1500
#define TSK_ERR_DUPLICATE_SAMPLE_PAIRS -1501

/* Simplify errors */
#define TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE -1600

// clang-format on

/* This bit is 0 for any errors originating from kastore */
Expand Down
17 changes: 16 additions & 1 deletion c/tskit/tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -6094,6 +6094,8 @@ simplifier_print_state(simplifier_t *self, FILE *out)
fprintf(out, "\tkeep_unary : %d\n", !!(self->options & TSK_KEEP_UNARY));
fprintf(out, "\tkeep_input_roots : %d\n",
!!(self->options & TSK_KEEP_INPUT_ROOTS));
fprintf(out, "\tkeep_unary_in_individuals : %d\n",
!!(self->options & TSK_KEEP_UNARY_IN_INDIVIDUALS));

fprintf(out, "===\nInput tables\n==\n");
tsk_table_collection_print_state(&self->input_tables, out);
Expand Down Expand Up @@ -6635,8 +6637,16 @@ simplifier_merge_ancestors(simplifier_t *self, tsk_id_t input_id)
double left, right, prev_right;
tsk_id_t ancestry_node;
tsk_id_t output_id = self->node_id_map[input_id];

bool is_sample = output_id != TSK_NULL;
bool keep_unary = !!(self->options & TSK_KEEP_UNARY);
bool keep_unary = false;
if (self->options & TSK_KEEP_UNARY) {
keep_unary = true;
}
if ((self->options & TSK_KEEP_UNARY_IN_INDIVIDUALS)
&& (self->tables->nodes.individual[input_id] != TSK_NULL)) {
keep_unary = true;
}

if (is_sample) {
/* Free up the existing ancestry mapping. */
Expand Down Expand Up @@ -8546,6 +8556,11 @@ tsk_table_collection_simplify(tsk_table_collection_t *self, const tsk_id_t *samp
/* Avoid calling to simplifier_free with uninit'd memory on error branches */
memset(&simplifier, 0, sizeof(simplifier_t));

if ((options & TSK_KEEP_UNARY) && (options & TSK_KEEP_UNARY_IN_INDIVIDUALS)) {
ret = TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE;
goto out;
}

/* For now we don't bother with edge metadata, but it can easily be
* implemented. */
if (self->edges.metadata_length > 0) {
Expand Down
4 changes: 4 additions & 0 deletions c/tskit/tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,7 @@ typedef struct {
#define TSK_REDUCE_TO_SITE_TOPOLOGY (1 << 3)
#define TSK_KEEP_UNARY (1 << 4)
#define TSK_KEEP_INPUT_ROOTS (1 << 5)
#define TSK_KEEP_UNARY_IN_INDIVIDUALS (1 << 6)

/* Flags for check_integrity */
#define TSK_CHECK_EDGE_ORDERING (1 << 0)
Expand Down Expand Up @@ -2671,6 +2672,9 @@ TSK_KEEP_INPUT_ROOTS
By default simplify removes all topology ancestral the MRCAs of the samples.
This option inserts edges from these MRCAs back to the roots of the input
trees.
TSK_KEEP_UNARY_IN_INDIVDUALS
This acts like TSK_KEEP_UNARY (and is mutually exclusive with that flag). It
keeps unary nodes, but only if the unary node is referenced from an individual.

.. note:: Migrations are currently not supported by simplify, and an error will
be raised if we attempt call simplify on a table collection with greater
Expand Down