Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions docs/data-model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1016,9 +1016,19 @@ Consider the following example:
In this tree, node 4 is isolated, and therefore for any sites that are
on this tree, the state that it is assigned is a special value
``tskit.MISSING_DATA``, or ``-1``, as long as there are no mutations above
that node at that site. See the :meth:`TreeSequence.variants`
method and :class:`Variant` class for more information on how missing
data is represented.
the node at that site. Note that, although isolated, because node 4
is a sample node it is still considered as being present in the
tree, meaning it will still returned by the :meth:`Tree.nodes` and
:meth:`Tree.samples` methods. The :meth:`Tree.is_isolated` method can be used to
identify nodes which are isolated samples:

>>> [u for u in tree.samples() if tree.is_isolated(u)] # isolated samples in this tree
[4]
>>> [u for u in tree.nodes() if not tree.is_isolated(u)] # topologically connected nodes
[0, 1, 2, 3, 5, 6, 7]

See the :meth:`TreeSequence.variants` method and :class:`Variant` class for
more information on how missing data is represented in variant data.


.. _sec_text_file_format:
Expand Down
2 changes: 2 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
- :issue:`832` - Add ``metadata_bytes`` method to allow access to raw
TableCollection metadata (:user:`benjeffery`, :pr:`842`)

- New ``tree.is_isolated(u)`` method (:user:`hyanwong`, :pr:`443`).

--------------------
[0.3.1] - 2020-09-04
--------------------
Expand Down
55 changes: 55 additions & 0 deletions python/tests/test_topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -7857,3 +7857,58 @@ def test_failure_with_migrations(self):
self.assertRaises(ValueError, ts.ltrim)
self.assertRaises(ValueError, ts.rtrim)
self.assertRaises(ValueError, ts.trim)


class TestMissingData(unittest.TestCase):
"""
Test various aspects of missing data functionality
"""

# TODO tests for missing data currently sparse: more tests should go here

def ts_missing_middle(self):
# Simple ts with sample 0 missing a middle section
ts = msprime.simulate(4, mutation_rate=1, recombination_rate=4, random_seed=2)
tables = ts.dump_tables()
tables.edges.clear()
# mark the middle as missing
for e in ts.tables.edges:
if e.child == 0:
if e.left == 0.0:
missing_from = e.right
elif e.right == 1.0:
missing_to = e.left
else:
continue # omit this edge => node is isolated
tables.edges.add_row(e.left, e.right, e.parent, e.child)
# Check we have non-missing to L & R
self.assertTrue(0.0 < missing_from < 1.0)
self.assertTrue(0.0 < missing_to < 1.0)
return tables.tree_sequence(), missing_from, missing_to

def test_is_isolated(self):
Copy link
Member

@benjeffery benjeffery Aug 28, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this is a very simple method - but it's always good to test a couple of error cases, e.g. a negative index, a index bigger than the number of nodes, an index of the wrong type, just to check we get the right error messages.

ts, missing_from, missing_to = self.ts_missing_middle()
for tree in ts.trees():
if tree.interval[1] > missing_from and tree.interval[0] < missing_to:
self.assertTrue(tree.is_isolated(0))
self.assertFalse(tree.is_isolated(1))
else:
self.assertFalse(tree.is_isolated(0))
self.assertFalse(tree.is_isolated(1))
# A non-sample node is isolated if not in the tree
tree_nodes = set(tree.nodes())
for nonsample_node in np.setdiff1d(np.arange(ts.num_nodes), ts.samples()):
if nonsample_node in tree_nodes:
self.assertFalse(tree.is_isolated(nonsample_node))
else:
self.assertTrue(tree.is_isolated(nonsample_node))

def test_is_isolated_bad(self):
ts, missing_from, missing_to = self.ts_missing_middle()
for tree in ts.trees():
self.assertRaises(ValueError, tree.is_isolated, tskit.NULL)
self.assertRaises(ValueError, tree.is_isolated, ts.num_nodes)
self.assertRaises(ValueError, tree.is_isolated, -2)
self.assertRaises(TypeError, tree.is_isolated, None)
self.assertRaises(TypeError, tree.is_isolated, "abc")
self.assertRaises(TypeError, tree.is_isolated, 1.1)
13 changes: 13 additions & 0 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -1216,6 +1216,19 @@ def is_leaf(self, u):
"""
return len(self.children(u)) == 0

def is_isolated(self, u):
"""
Returns True if the specified node is isolated in this tree: that is
it has no parents and no children. Sample nodes that are isolated
and have no mutations above them are used to represent
:ref:`missing data<sec_data_model_missing_data>`.

:param int u: The node of interest.
:return: True if u is an isolated node.
:rtype: bool
"""
return self.num_children(u) == 0 and self.parent(u) == NULL

def is_sample(self, u):
"""
Returns True if the specified node is a sample. A node :math:`u` is a
Expand Down