Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed Jul 18, 2022
1 parent e5fdd41 commit 4d9e26f
Show file tree
Hide file tree
Showing 8 changed files with 197 additions and 17 deletions.
49 changes: 49 additions & 0 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -1198,6 +1198,12 @@ test_simplest_records(void)
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);

ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY_IF_COALESCENT, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);

tsk_treeseq_free(&ts);
}

Expand Down Expand Up @@ -1379,6 +1385,48 @@ test_simplest_unary_with_individuals(void)
tsk_treeseq_free(&ts);
}

static void
test_simplest_partially_unary(void)
{
int ret;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges = "0 2 2 0\n"
"0 1 2 1\n";
tsk_treeseq_t ts, simplified;
tsk_id_t sample_ids[] = { 0, 1 };

tsk_treeseq_from_text(&ts, 2, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 2.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);

ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&simplified), 2.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(&simplified), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&simplified), 2);
tsk_treeseq_free(&simplified);

ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);

ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY_IF_COALESCENT, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);

tsk_treeseq_free(&ts);
}

static void
test_simplest_non_sample_leaf_records(void)
{
Expand Down Expand Up @@ -7840,6 +7888,7 @@ main(int argc, char **argv)
{ "test_simplest_nonbinary_records", test_simplest_nonbinary_records },
{ "test_simplest_unary_records", test_simplest_unary_records },
{ "test_simplest_unary_with_individuals", test_simplest_unary_with_individuals },
{ "test_simplest_partially_unary", test_simplest_partially_unary },
{ "test_simplest_non_sample_leaf_records",
test_simplest_non_sample_leaf_records },
{ "test_simplest_degenerate_multiple_root_records",
Expand Down
48 changes: 36 additions & 12 deletions c/tskit/tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -8677,6 +8677,8 @@ simplifier_print_state(simplifier_t *self, FILE *out)
!!(self->options & TSK_SIMPLIFY_KEEP_INPUT_ROOTS));
fprintf(out, "\tkeep_unary_in_individuals : %d\n",
!!(self->options & TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS));
fprintf(out, "\tkeep_unary_if_coalescent: %d\n",
!!(self->options & TSK_SIMPLIFY_KEEP_UNARY_IF_COALESCENT));

fprintf(out, "===\nInput tables\n==\n");
tsk_table_collection_print_state(&self->input_tables, out);
Expand Down Expand Up @@ -9224,16 +9226,8 @@ simplifier_merge_ancestors(simplifier_t *self, tsk_id_t input_id)
double left, right, prev_right;
tsk_id_t ancestry_node;
tsk_id_t output_id = self->node_id_map[input_id];

bool is_sample = output_id != TSK_NULL;
bool keep_unary = false;
if (self->options & TSK_SIMPLIFY_KEEP_UNARY) {
keep_unary = true;
}
if ((self->options & TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS)
&& (self->input_tables.nodes.individual[input_id] != TSK_NULL)) {
keep_unary = true;
}

if (is_sample) {
/* Free up the existing ancestry mapping. */
Expand All @@ -9243,6 +9237,32 @@ simplifier_merge_ancestors(simplifier_t *self, tsk_id_t input_id)
self->ancestor_map_tail[input_id] = NULL;
}

if (self->options & TSK_SIMPLIFY_KEEP_UNARY) {
keep_unary = true;
}
if ((self->options & TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS)
&& (self->input_tables.nodes.individual[input_id] != TSK_NULL)) {
keep_unary = true;
}
if (self->options & TSK_SIMPLIFY_KEEP_UNARY_IF_COALESCENT) {
/* Make an initial pass through the overlapping segments to see
* if there's any coalescence.
*/
ret = segment_overlapper_start(
&self->segment_overlapper, self->segment_queue, self->segment_queue_size);
if (ret != 0) {
goto out;
}
while ((ret = segment_overlapper_next(
&self->segment_overlapper, &left, &right, &X, &num_overlapping))
== 1) {
if (num_overlapping > 1) {
keep_unary = true;
break;
}
}
}

ret = segment_overlapper_start(
&self->segment_overlapper, self->segment_queue, self->segment_queue_size);
if (ret != 0) {
Expand Down Expand Up @@ -11491,10 +11511,14 @@ tsk_table_collection_simplify(tsk_table_collection_t *self, const tsk_id_t *samp
/* Avoid calling to simplifier_free with uninit'd memory on error branches */
tsk_memset(&simplifier, 0, sizeof(simplifier_t));

if ((options & TSK_SIMPLIFY_KEEP_UNARY)
&& (options & TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS)) {
ret = TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE;
goto out;
if ((options & TSK_SIMPLIFY_KEEP_UNARY)) {
// FIXME what about specifying keep_unary_in_individuals *and*
// unary_if_coalescent?
if ((options & TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS)
|| (options & TSK_SIMPLIFY_KEEP_UNARY_IF_COALESCENT)) {
ret = TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE;
goto out;
}
}

/* For now we don't bother with edge metadata, but it can easily be
Expand Down
7 changes: 7 additions & 0 deletions c/tskit/tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,13 @@ flag). It keeps unary nodes, but only if the unary node is referenced from an in
@endrst
*/
#define TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS (1 << 6)
/**
@rst
DOCUMENT ME
@endrst
*/
#define TSK_SIMPLIFY_KEEP_UNARY_IF_COALESCENT (1 << 7)

/** @} */

/**
Expand Down
15 changes: 10 additions & 5 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -6590,19 +6590,21 @@ TableCollection_simplify(TableCollection *self, PyObject *args, PyObject *kwds)
int filter_populations = false;
int keep_unary = false;
int keep_unary_in_individuals = false;
int keep_unary_if_coalescent = false;
int keep_input_roots = false;
int reduce_to_site_topology = false;
static char *kwlist[] = { "samples", "filter_sites", "filter_populations",
"filter_individuals", "reduce_to_site_topology", "keep_unary",
"keep_unary_in_individuals", "keep_input_roots", NULL };
static char *kwlist[]
= { "samples", "filter_sites", "filter_populations", "filter_individuals",
"reduce_to_site_topology", "keep_unary", "keep_unary_in_individuals",
"keep_unary_if_coalescent", "keep_input_roots", NULL };

if (TableCollection_check_state(self) != 0) {
goto out;
}
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iiiiiii", kwlist, &samples,
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iiiiiiii", kwlist, &samples,
&filter_sites, &filter_populations, &filter_individuals,
&reduce_to_site_topology, &keep_unary, &keep_unary_in_individuals,
&keep_input_roots)) {
&keep_unary_if_coalescent, &keep_input_roots)) {
goto out;
}
samples_array = (PyArrayObject *) PyArray_FROMANY(
Expand Down Expand Up @@ -6630,6 +6632,9 @@ TableCollection_simplify(TableCollection *self, PyObject *args, PyObject *kwds)
if (keep_unary_in_individuals) {
options |= TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS;
}
if (keep_unary_if_coalescent) {
options |= TSK_SIMPLIFY_KEEP_UNARY_IF_COALESCENT;
}
if (keep_input_roots) {
options |= TSK_SIMPLIFY_KEEP_INPUT_ROOTS;
}
Expand Down
2 changes: 2 additions & 0 deletions python/tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,8 @@ def test_simplify_bad_args(self):
tc.simplify([0, 1], keep_unary="sdf")
with pytest.raises(TypeError):
tc.simplify([0, 1], keep_unary_in_individuals="abc")
with pytest.raises(TypeError):
tc.simplify([0, 1], keep_unary_if_coalescent="abc")
with pytest.raises(TypeError):
tc.simplify([0, 1], keep_input_roots="sdf")
with pytest.raises(TypeError):
Expand Down
87 changes: 87 additions & 0 deletions python/tests/test_topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -2478,6 +2478,93 @@ def test_nonbinary_tree_sequence_unary_nodes(self):
self.verify_unary_tree_sequence(ts)


class TestSimplifyKeepUnaryIfCoalescent:
def test_simple_tree_unchanged(self):
ts1 = tskit.Tree.generate_balanced(5).tree_sequence
ts2 = ts1.simplify(keep_unary_if_coalescent=True)
ts1.tables.assert_equals(ts2.tables, ignore_provenance=True)

@pytest.mark.parametrize("num_internal_nodes", [1, 2, 10])
def test_stick(self, num_internal_nodes):
tables = tskit.TableCollection(1)
u = tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)
for _ in range(num_internal_nodes + 1):
v = tables.nodes.add_row(time=u + 1)
tables.edges.add_row(0, 1, v, u)
u = v
ts = tables.tree_sequence()
ts = ts.simplify(keep_unary_if_coalescent=True)
assert ts.num_nodes == 1
assert ts.num_edges == 0

def test_simplest_partially_unary(self):
nodes = io.StringIO(
"""\
id is_sample time
0 1 0
1 1 0
2 0 1
"""
)
edges = io.StringIO(
"""\
left right parent child
0 1 2 1
0 2 2 0
"""
)
ts1 = tskit.load_text(nodes=nodes, edges=edges, strict=False)
ts2 = ts1.simplify(keep_unary_if_coalescent=True)
ts1.tables.assert_equals(ts2.tables, ignore_provenance=True)

def test_fully_and_partially_unary(self):
nodes = io.StringIO(
"""\
id is_sample time
0 1 0
1 1 0
2 0 2
3 0 3
4 0 1
"""
)
edges = io.StringIO(
"""\
left right parent child
0 2 2 0
0 2 3 2
0 2 4 1
0 1 3 4
1 2 2 4
"""
)
# 3.00┊ 3 ┊ 3 ┊
# ┊ ┏┻┓ ┊ ┃ ┊
# 2.00┊ 2 ┃ ┊ 2 ┊
# ┊ ┃ ┃ ┊ ┏┻┓ ┊
# 1.00┊ ┃ 4 ┊ ┃ 4 ┊
# ┊ ┃ ┃ ┊ ┃ ┃ ┊
# 0.00┊ 0 1 ┊ 0 1 ┊
# 0 1 2
ts1 = tskit.load_text(nodes=nodes, edges=edges, strict=False)

# If we use keep_unary, then the tree sequence is unchanged (modulo
# node reordering)
ts2 = ts1.simplify(keep_unary=True)
assert ts2.num_nodes == 5

ts2 = ts1.simplify(keep_unary_if_coalescent=True)
# 3.00┊ 3 ┊ 3 ┊
# ┊ ┏┻┓ ┊ ┃ ┊
# 2.00┊ 2 ┃ ┊ 2 ┊
# ┊ ┃ ┃ ┊ ┏┻┓ ┊
# 0.00┊ 0 1 ┊ 0 1 ┊
# 0 1 2
assert ts2.num_nodes == 4
assert ts2.first().parent_dict == {0: 2, 1: 3, 2: 3}
assert ts2.last().parent_dict == {0: 2, 1: 2, 2: 3}


class TestGeneralSamples(TopologyTestCase):
"""
Test cases in which we have samples at arbitrary nodes (i.e., not at
Expand Down
4 changes: 4 additions & 0 deletions python/tskit/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3361,6 +3361,7 @@ def simplify(
filter_sites=True,
keep_unary=False,
keep_unary_in_individuals=None,
keep_unary_if_coalescent=None,
keep_input_roots=False,
record_provenance=True,
filter_zero_mutation_sites=None, # Deprecated alias for filter_sites
Expand Down Expand Up @@ -3447,6 +3448,8 @@ def simplify(
samples = util.safe_np_int_cast(samples, np.int32)
if keep_unary_in_individuals is None:
keep_unary_in_individuals = False
if keep_unary_if_coalescent is None:
keep_unary_if_coalescent = False

node_map = self._ll_tables.simplify(
samples,
Expand All @@ -3456,6 +3459,7 @@ def simplify(
reduce_to_site_topology=reduce_to_site_topology,
keep_unary=keep_unary,
keep_unary_in_individuals=keep_unary_in_individuals,
keep_unary_if_coalescent=keep_unary_if_coalescent,
keep_input_roots=keep_input_roots,
)
if record_provenance:
Expand Down
2 changes: 2 additions & 0 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -6145,6 +6145,7 @@ def simplify(
filter_sites=True,
keep_unary=False,
keep_unary_in_individuals=None,
keep_unary_if_coalescent=None,
keep_input_roots=False,
record_provenance=True,
filter_zero_mutation_sites=None, # Deprecated alias for filter_sites
Expand Down Expand Up @@ -6241,6 +6242,7 @@ def simplify(
filter_sites=filter_sites,
keep_unary=keep_unary,
keep_unary_in_individuals=keep_unary_in_individuals,
keep_unary_if_coalescent=keep_unary_if_coalescent,
keep_input_roots=keep_input_roots,
record_provenance=record_provenance,
filter_zero_mutation_sites=filter_zero_mutation_sites,
Expand Down

0 comments on commit 4d9e26f

Please sign in to comment.