diff --git a/docs/python-api.rst b/docs/python-api.rst index 52d705f9de..287e81ec38 100644 --- a/docs/python-api.rst +++ b/docs/python-api.rst @@ -484,9 +484,9 @@ Table functions .. _sec_metadata_api: -******** -Metadata -******** +************ +Metadata API +************ The ``metadata`` module provides validation, encoding and decoding of metadata using a schema. See :ref:`sec_metadata`, :ref:`sec_metadata_api_overview` and @@ -498,16 +498,20 @@ using a schema. See :ref:`sec_metadata`, :ref:`sec_metadata_api_overview` and .. autofunction:: register_metadata_codec -.. _sec_stats_api: +.. _sec_combinatorics_api: -************* -Combinatorics -************* -The following are generators for fully enumerating unique tree topologies. -The position of a tree in the enumeration ``all_trees`` is given by -:meth:`Tree.rank`. Inversely, a :class:`Tree` can be constructed from a -position in the enumeration with :meth:`Tree.unrank`. -See :ref:`sec_combinatorics` for details. +***************** +Combinatorics API +***************** + +The combinatorics API deals with tree topologies, allowing them to be counted, +listed and generated: see :ref:`sec_combinatorics` for a detailed description. Briefly, +the position of a tree in the enumeration ``all_trees`` can be obtained using the tree's +:meth:`~Tree.rank` method. Inversely, a :class:`Tree` can be constructed from a position +in the enumeration with :meth:`Tree.unrank`. Generated trees are associated with a new +tree sequence containing only that tree for the entire genome (i.e. with +:attr:`~TreeSequence.num_trees` = 1 and a :attr:`~TreeSequence.sequence_length` equal to +the :attr:`~Tree.span` of the tree). .. autofunction:: all_trees diff --git a/python/tests/test_combinatorics.py b/python/tests/test_combinatorics.py index b6a178e4d5..52d6ef0ff2 100644 --- a/python/tests/test_combinatorics.py +++ b/python/tests/test_combinatorics.py @@ -182,6 +182,16 @@ def test_generate_trees_roundtrip(self): for rank_tree, tsk_tree in zip(all_rank_trees, all_tsk_trees): assert rank_tree == RankTree.from_tsk_tree(tsk_tree) + def test_generate_treeseq_roundtrip(self): + n = 5 + span = 9 + all_rank_trees = RankTree.all_labelled_trees(n) + all_tsk_trees = tskit.all_trees(n, span=span) + for rank_tree, tsk_tree in zip(all_rank_trees, all_tsk_trees): + ts1 = tsk_tree.tree_sequence + ts2 = rank_tree.to_tsk_tree(span=span).tree_sequence + assert ts1.tables.equals(ts2.tables, ignore_provenance=True) + def test_all_shapes_roundtrip(self): n = 5 all_rank_tree_shapes = RankTree.all_unlabelled_trees(n) @@ -389,6 +399,16 @@ def test_rank_errors_multiple_roots(self): with pytest.raises(ValueError): ts.first().rank() + def test_span(self): + n = 5 + span = 8 + # Create a start tree, with a single root + tsk_tree = tskit.Tree.unrank((0, 0), n, span=span) + assert tsk_tree.num_nodes == n + 1 + assert tsk_tree.interval.left == 0 + assert tsk_tree.interval.right == span + assert tsk_tree.tree_sequence.sequence_length == span + def test_big_trees(self): n = 14 shape = 22 diff --git a/python/tskit/combinatorics.py b/python/tskit/combinatorics.py index 517da2e749..d3fe262a8e 100644 --- a/python/tskit/combinatorics.py +++ b/python/tskit/combinatorics.py @@ -255,7 +255,7 @@ def join_topologies(child_topologies): return RankTree(children).rank() -def all_trees(num_leaves): +def all_trees(num_leaves, span=1): """ Generates all unique leaf-labelled trees with ``num_leaves`` leaves. See :ref:`sec_combinatorics` on the details of this @@ -264,25 +264,27 @@ def all_trees(num_leaves): chosen arbitrarily. :param int num_leaves: The number of leaves of the tree to generate. + :param float span: The genomic span of each returned tree. :rtype: tskit.Tree """ for rank_tree in RankTree.all_labelled_trees(num_leaves): - yield rank_tree.to_tsk_tree() + yield rank_tree.to_tsk_tree(span=span) -def all_tree_shapes(num_leaves): +def all_tree_shapes(num_leaves, span=1): """ Generates all unique shapes of trees with ``num_leaves`` leaves. :param int num_leaves: The number of leaves of the tree to generate. + :param float span: The genomic span of each returned tree. :rtype: tskit.Tree """ for rank_tree in RankTree.all_unlabelled_trees(num_leaves): default_labelling = rank_tree.label_unrank(0) - yield default_labelling.to_tsk_tree() + yield default_labelling.to_tsk_tree(span=span) -def all_tree_labellings(tree): +def all_tree_labellings(tree, span=1): """ Generates all unique labellings of the leaves of a :class:`tskit.Tree`. Leaves are labelled from the set @@ -290,11 +292,12 @@ def all_tree_labellings(tree): :param tskit.Tree tree: The tree used to generate labelled trees of the same shape. + :param float span: The genomic span of each returned tree. :rtype: tskit.Tree """ rank_tree = RankTree.from_tsk_tree(tree) for labelling in RankTree.all_labellings(rank_tree): - yield labelling.to_tsk_tree() + yield labelling.to_tsk_tree(span=span) class RankTree: @@ -515,12 +518,19 @@ def from_tsk_tree(tree): return RankTree.from_tsk_tree_node(tree, tree.root) - def to_tsk_tree(self): + def to_tsk_tree(self, span=1): + """ + Convert a ``RankTree`` into the only tree in a new tree sequence. + + :param float span: The genomic span of the returned tree. The tree will cover + the interval :math:`[0, span)` and the :attr:`~Tree.tree_sequence` from which + the tree is taken will have its :attr:`~tskit.TreeSequence.sequence_length` + equal to ``span``. + """ if set(self.labels) != set(range(self.num_leaves)): raise ValueError("Labels set must be equivalent to [0, num_leaves)") - seq_length = 1 - tables = tskit.TableCollection(seq_length) + tables = tskit.TableCollection(span) def add_node(node): if node.is_leaf(): @@ -532,7 +542,7 @@ def add_node(node): max_child_time = max(tables.nodes.time[c] for c in child_ids) parent_id = tables.nodes.add_row(time=max_child_time + 1) for child_id in child_ids: - tables.edges.add_row(0, seq_length, parent_id, child_id) + tables.edges.add_row(0, span, parent_id, child_id) return parent_id diff --git a/python/tskit/trees.py b/python/tskit/trees.py index 7fd2abcf0d..31077334bd 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -847,7 +847,7 @@ def rank(self): return combinatorics.RankTree.from_tsk_tree(self).rank() @staticmethod - def unrank(rank, num_leaves): + def unrank(rank, num_leaves, *, span=1): """ Reconstruct the tree of the given ``rank`` (see :meth:`tskit.Tree.rank`) with ``num_leaves`` leaves. @@ -859,11 +859,15 @@ def unrank(rank, num_leaves): :param tuple(int) rank: The rank of the tree to generate. :param int num_leaves: The number of leaves of the tree to generate. + :param float span: The genomic span of the returned tree. The tree will cover + the interval :math:`[0, \\text{span})` and the :attr:`~Tree.tree_sequence` + from which the tree is taken will have its + :attr:`~tskit.TreeSequence.sequence_length` equal to ``span``. :rtype: Tree :raises: ValueError: If the given rank is out of bounds for trees with ``num_leaves`` leaves. """ - return combinatorics.RankTree.unrank(rank, num_leaves).to_tsk_tree() + return combinatorics.RankTree.unrank(rank, num_leaves).to_tsk_tree(span=span) def count_topologies(self, sample_sets=None): """