Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ In development

**New features**

- Add a ``nexus`` function that outputs a tree sequence in Nexus format
(:user:`saunack`, :pr:`550`).

- Add extension of Kendall-Colijn tree distance metric for tree sequences
computed by ``TreeSequence.kc_distance``
(:user:`daniel-goldstein`, :pr:`548`)
Expand Down
138 changes: 115 additions & 23 deletions python/tests/test_newick.py → python/tests/test_phylo_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,40 +23,24 @@
"""
Tests for the newick output feature.
"""
import itertools
import unittest

import msprime
import newick
from Bio.Nexus import Nexus

import tskit
from tests import tsutil

class TestNewick(unittest.TestCase):

class TreeExamples(unittest.TestCase):
"""
Tests that the newick output has the properties that we need using
external Newick parser.
Generates trees for testing the phylo format outputs.
"""

random_seed = 155

def verify_newick_topology(self, tree, root=None, node_labels=None):
if root is None:
root = tree.root
ns = tree.newick(precision=16, root=root, node_labels=node_labels)
if node_labels is None:
leaf_labels = {u: str(u + 1) for u in tree.leaves(root)}
else:
leaf_labels = {u: node_labels[u] for u in tree.leaves(root)}
newick_tree = newick.loads(ns)[0]
leaf_names = newick_tree.get_leaf_names()
self.assertEqual(sorted(leaf_names), sorted(leaf_labels.values()))
for u in tree.leaves(root):
name = leaf_labels[u]
node = newick_tree.get_node(name)
while u != root:
self.assertAlmostEqual(node.length, tree.branch_length(u))
node = node.ancestor
u = tree.parent(u)
self.assertIsNone(node.ancestor)

def get_nonbinary_example(self):
ts = msprime.simulate(
sample_size=20,
Expand Down Expand Up @@ -94,6 +78,36 @@ def get_multiroot_example(self):
)
return tables.tree_sequence()

def get_single_tree(self):
return msprime.simulate(10, random_seed=2)


class TestNewick(TreeExamples):
"""
Tests that the newick output has the properties that we need using
external Newick parser.
"""

def verify_newick_topology(self, tree, root=None, node_labels=None):
if root is None:
root = tree.root
ns = tree.newick(precision=16, root=root, node_labels=node_labels)
if node_labels is None:
leaf_labels = {u: str(u + 1) for u in tree.leaves(root)}
else:
leaf_labels = {u: node_labels[u] for u in tree.leaves(root)}
newick_tree = newick.loads(ns)[0]
leaf_names = newick_tree.get_leaf_names()
self.assertEqual(sorted(leaf_names), sorted(leaf_labels.values()))
for u in tree.leaves(root):
name = leaf_labels[u]
node = newick_tree.get_node(name)
while u != root:
self.assertAlmostEqual(node.length, tree.branch_length(u))
node = node.ancestor
u = tree.parent(u)
self.assertIsNone(node.ancestor)

def test_nonbinary_tree(self):
ts = self.get_nonbinary_example()
for t in ts.trees():
Expand Down Expand Up @@ -146,3 +160,81 @@ def test_single_node_label(self):
[n.name for n in root.walk()],
[labels[tree.root]] + [None for _ in range(len(list(tree.nodes())) - 1)],
)


class TestNexus(TreeExamples):
"""
Tests that the nexus output has the properties that we need using
external Nexus parser.
"""

def verify_tree(self, nexus_tree, tsk_tree):
self.assertEqual(len(nexus_tree.get_terminals()), tsk_tree.num_samples())

bio_node_map = {}
for node_id in nexus_tree.all_ids():
bio_node = nexus_tree.node(node_id)
bio_node_map[bio_node.data.taxon] = bio_node

for u in tsk_tree.nodes():
node = tsk_tree.tree_sequence.node(u)
label = f"tsk_{node.id}_{node.flags}"
bio_node = bio_node_map.pop(label)
self.assertAlmostEqual(
bio_node.data.branchlength, tsk_tree.branch_length(u)
)
if tsk_tree.parent(u) == tskit.NULL:
self.assertEqual(bio_node.prev, None)
else:
bio_node_parent = nexus_tree.node(bio_node.prev)
parent = tsk_tree.tree_sequence.node(tsk_tree.parent(u))
self.assertEqual(
bio_node_parent.data.taxon, f"tsk_{parent.id}_{parent.flags}"
)
self.assertEqual(len(bio_node_map), 0)

def verify_nexus_topology(self, treeseq):
nexus = treeseq.nexus(precision=16)
nexus_treeseq = Nexus.Nexus(nexus)
self.assertEqual(treeseq.num_trees, len(nexus_treeseq.trees))
for tree, nexus_tree in itertools.zip_longest(
treeseq.trees(), nexus_treeseq.trees
):
name = nexus_tree.name
split_name = name.split("_")
self.assertEqual(len(split_name), 2)
start = float(split_name[0][4:])
end = float(split_name[1])
self.assertAlmostEqual(tree.interval[0], start)
self.assertAlmostEqual(tree.interval[1], end)

self.verify_tree(nexus_tree, tree)

def test_binary_tree(self):
ts = self.get_binary_example()
self.verify_nexus_topology(ts)

def test_nonbinary_example(self):
ts = self.get_nonbinary_example()
self.verify_nexus_topology(ts)

def test_single_tree(self):
ts = self.get_single_tree()
self.verify_nexus_topology(ts)

def test_multiroot(self):
ts = self.get_multiroot_example()
self.assertRaises(ValueError, ts.nexus)

def test_many_trees(self):
ts = msprime.simulate(4, recombination_rate=2, random_seed=123)
self.verify_nexus_topology(ts)

def test_many_trees_sequence_length(self):
ts = msprime.simulate(4, length=10, recombination_rate=0.2, random_seed=13)
self.verify_nexus_topology(ts)

def test_internal_samples(self):
ts = msprime.simulate(8, random_seed=2)
ts = tsutil.jiggle_samples(ts)
self.verify_nexus_topology(ts)
38 changes: 38 additions & 0 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -3800,6 +3800,44 @@ def write_vcf(
)
writer.write(output)

def nexus(self, precision=14):
"""
Returns a `nexus encoding <https://en.wikipedia.org/wiki/Nexus_file>`_
of this tree sequence. Trees along the sequence are listed sequentially in
the TREES block. The tree spanning the interval :math:`[x, y)`` is
given the name "tree_x_y". Spatial positions are written at the
specified precision.

Nodes in the tree sequence are identified by the taxon labels of the
form ``f"tsk_{node.id}_{node.flags}"``, such that a node with ``id=5``
and ``flags=1`` will have the label ``"tsk_5_1"`` (please see the
:ref:`data model <sec_node_table_definition>` section for details
on the interpretation of node ID and flags values). These labels are
listed for all nodes in the tree sequence in the ``TAXLABELS`` block.

:param int precision: The numerical precision with which branch lengths
and tree positions are printed.
:return: A nexus representation of this TreeSequence.
:rtype: str
"""
node_labels = {node.id: f"tsk_{node.id}_{node.flags}" for node in self.nodes()}

s = "#NEXUS\n"
s += "BEGIN TAXA;\n"
s += "TAXLABELS "
s += ",".join(node_labels[node.id] for node in self.nodes()) + ";\n"
s += "END;\n"

s += "BEGIN TREES;\n"
for tree in self.trees():
start_interval = "{0:.{1}f}".format(tree.interval[0], precision)
end_interval = "{0:.{1}f}".format(tree.interval[1], precision)
newick = tree.newick(precision, node_labels=node_labels)
s += f"\tTREE tree{start_interval}_{end_interval} = {newick}\n"

s += "END;\n"
return s

def simplify(
self,
samples=None,
Expand Down