Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

**Features**

- ``Tree.newick()`` now has extra option ``include_branch_lengths`` to allow branch
lengths to be omitted (:user:`hyanwong`, :pr:`931`).

- Added ``Tree.generate_star`` static method to create star-topologies (:user:`hyanwong`,
:pr:`934`).

Expand All @@ -28,6 +31,7 @@
**Breaking changes**

- The argument to ``ts.dump`` and ``tskit.load`` has been renamed `file` from `path`.
- All arguments to ``Tree.newick()`` except precision are now keyword-only.

--------------------
[0.3.2] - 2020-09-29
Expand Down
22 changes: 22 additions & 0 deletions python/tests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2092,6 +2092,28 @@ def test_newick_large_times(self):
newick_c = tree.newick(precision=precision)
assert newick_c == newick_py

def test_bifurcating_newick(self):
for n_tips in range(2, 6):
ts = msprime.simulate(n_tips, random_seed=1) # msprime trees are binary
for tree in ts.trees():
base_newick = tree.newick(include_branch_lengths=False).strip(";")
for i in range(n_tips):
# Each tip number (i+1) mentioned once
assert base_newick.count(str(i + 1)) == 1
# Binary newick trees have 3 chars per extra tip: "(,)"
assert len(base_newick) == n_tips + 3 * (n_tips - 1)

def test_newick_topology_equiv(self):
replace_numeric = {ord(x): None for x in "1234567890:."}
for ts in get_example_tree_sequences():
for tree in ts.trees():
if tree.num_roots > 1:
continue
plain_newick = tree.newick(node_labels={}, include_branch_lengths=False)
newick1 = tree.newick().translate(replace_numeric)
newick2 = tree.newick(node_labels={}).translate(replace_numeric)
assert newick1 == newick2 == plain_newick

def test_newick_buffer_too_small_bug(self):
nodes = io.StringIO(
"""\
Expand Down
87 changes: 83 additions & 4 deletions python/tests/test_phylo_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,22 +89,73 @@ class TestNewick(TreeExamples):
external Newick parser.
"""

def verify_newick_topology(self, tree, root=None, node_labels=None):
# 3 methods to return example tree sequences with internal samples:
# (copied from test_highlevel.py)
def all_nodes_samples_example(self):
n = 5
ts = msprime.simulate(n, random_seed=10, mutation_rate=5)
assert ts.num_mutations > 0
tables = ts.dump_tables()
nodes = tables.nodes
flags = nodes.flags
# Set all nodes to be samples.
flags[:] = tskit.NODE_IS_SAMPLE
nodes.flags = flags
return tables.tree_sequence()

def only_internal_samples_example(self):
n = 5
ts = msprime.simulate(n, random_seed=10, mutation_rate=5)
assert ts.num_mutations > 0
tables = ts.dump_tables()
nodes = tables.nodes
flags = nodes.flags
# Set just internal nodes to be samples.
flags[:] = 0
flags[n:] = tskit.NODE_IS_SAMPLE
nodes.flags = flags
return tables.tree_sequence()

def mixed_node_samples_example(self):
n = 5
ts = msprime.simulate(n, random_seed=10, mutation_rate=5)
assert ts.num_mutations > 0
tables = ts.dump_tables()
nodes = tables.nodes
flags = nodes.flags
# Set a mixture of internal and leaf samples.
flags[:] = 0
flags[n // 2 : n + n // 2] = tskit.NODE_IS_SAMPLE
nodes.flags = flags
return tables.tree_sequence()

def verify_newick_topology(
self, tree, root=None, node_labels=None, include_branch_lengths=True
):
if root is None:
root = tree.root
ns = tree.newick(precision=16, root=root, node_labels=node_labels)
ns = tree.newick(
precision=16,
root=root,
node_labels=node_labels,
include_branch_lengths=include_branch_lengths,
)
if node_labels is None:
leaf_labels = {u: str(u + 1) for u in tree.leaves(root)}
else:
leaf_labels = {u: node_labels[u] for u in tree.leaves(root)}
newick_tree = newick.loads(ns)[0]
# default newick lib outputs 0.0 if length is None => replace the length_parser
newick_tree = newick.loads(
ns, length_parser=lambda x: None if x is None else float(x)
)[0]
leaf_names = newick_tree.get_leaf_names()
assert sorted(leaf_names) == sorted(leaf_labels.values())
for u in tree.leaves(root):
name = leaf_labels[u]
node = newick_tree.get_node(name)
while u != root:
self.assertAlmostEqual(node.length, tree.branch_length(u))
branch_len = tree.branch_length(u) if include_branch_lengths else None
self.assertAlmostEqual(node.length, branch_len)
node = node.ancestor
u = tree.parent(u)
assert node.ancestor is None
Expand Down Expand Up @@ -162,6 +213,34 @@ def test_single_node_label(self):
None for _ in range(len(list(tree.nodes())) - 1)
]

def test_no_lengths(self):
t = msprime.simulate(5, random_seed=2).first()
self.verify_newick_topology(t, include_branch_lengths=False)

def test_samples_differ_from_leaves(self):
for ts in (
self.all_nodes_samples_example(),
self.only_internal_samples_example(),
self.mixed_node_samples_example(),
):
for t in ts.trees():
self.verify_newick_topology(t)

def test_no_lengths_equiv(self):
for ts in (
self.all_nodes_samples_example(),
self.only_internal_samples_example(),
self.mixed_node_samples_example(),
):
for t in ts.trees():
newick_nolengths = t.newick(include_branch_lengths=False)
newick_nolengths = newick.loads(newick_nolengths)[0]
newick_lengths = t.newick()
newick_lengths = newick.loads(newick_lengths)[0]
for node in newick_lengths.walk():
node.length = None
assert newick.dumps(newick_nolengths) == newick.dumps(newick_lengths)


class TestNexus(TreeExamples):
"""
Expand Down
41 changes: 34 additions & 7 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -2154,10 +2154,10 @@ def nodes(self, root=None, order="preorder"):
yield from iterator(u)

# TODO make this a bit less embarrassing by using an iterative method.
def __build_newick(self, node, precision, node_labels):
def __build_newick(self, *, node, precision, node_labels, include_branch_lengths):
"""
Simple recursive version of the newick generator used when non-default
node labels are needed.
node labels are needed, or when branch lengths are omitted
"""
label = node_labels.get(node, "")
if self.is_leaf(node):
Expand All @@ -2166,12 +2166,26 @@ def __build_newick(self, node, precision, node_labels):
s = "("
for child in self.children(node):
branch_length = self.branch_length(child)
subtree = self.__build_newick(child, precision, node_labels)
s += subtree + ":{0:.{1}f},".format(branch_length, precision)
subtree = self.__build_newick(
node=child,
precision=precision,
node_labels=node_labels,
include_branch_lengths=include_branch_lengths,
)
if include_branch_lengths:
subtree += ":{0:.{1}f}".format(branch_length, precision)
s += subtree + ","
s = s[:-1] + f"){label}"
return s

def newick(self, precision=14, root=None, node_labels=None):
def newick(
self,
precision=14, # Should probably be keyword only, left positional for legacy use
*,
root=None,
node_labels=None,
include_branch_lengths=True,
):
"""
Returns a `newick encoding <https://en.wikipedia.org/wiki/Newick_format>`_
of this tree. If the ``root`` argument is specified, return a representation
Expand All @@ -2193,6 +2207,8 @@ def newick(self, precision=14, root=None, node_labels=None):
:param dict node_labels: If specified, show custom labels for the nodes
that are present in the map. Any nodes not specified in the map will
not have a node label.
:param include_branch_lengths: If True (default), output branch lengths in the
Newick string. If False, only output the topology, without branch lengths.
:return: A newick representation of this tree.
:rtype: str
"""
Expand All @@ -2204,6 +2220,9 @@ def newick(self, precision=14, root=None, node_labels=None):
"newick trees, one for each root."
)
root = self.root
if not include_branch_lengths and node_labels is None:
# C code always puts branch lengths: force Py code by setting default labels
node_labels = {i: str(i + 1) for i in self.leaves()}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, leaves() or samples()? I think it should be samples(). Leaves is tricky, as we can have non-sample leaves.

What does the C engine do? We should add a test for this, since we are doing this workaround.

Maybe it would be better to not do this, and to change the condition on the line below instead to

if node_labels is None and include_branch_lengths:

Then it's completely explicit and we don't need any comments explaining what we're doing.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We claim it does leaves only (regardless of whether they are samples), and that seems sensible to me. Some newick readers only bother with names for tips anyway. In particular, the docs currently say:

By default, leaf nodes are labelled with their numerical ID + 1, and internal nodes are not labelled.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then it's completely explicit and we don't need any comments explaining what we're doing.

We would still need to set the node labels to the leaves only when node_labels == None for the python Newick generating code. But happy to move the logic around.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah right, what you have sounds good then. We should add a test to verify that we get the same labels with and without include_branch_lengths in a corner case when we have non-sample leaves, though.

if node_labels is None:
root_time = max(1, self.time(root))
max_label_size = math.ceil(math.log10(self.tree_sequence.num_nodes))
Expand All @@ -2216,7 +2235,15 @@ def newick(self, precision=14, root=None, node_labels=None):
)
s = s.decode()
else:
return self.__build_newick(root, precision, node_labels) + ";"
s = (
self.__build_newick(
node=root,
precision=precision,
node_labels=node_labels,
include_branch_lengths=include_branch_lengths,
)
+ ";"
)
return s

def as_dict_of_dicts(self):
Expand Down Expand Up @@ -4547,7 +4574,7 @@ def to_nexus(self, precision=14):
for tree in self.trees():
start_interval = "{0:.{1}f}".format(tree.interval.left, precision)
end_interval = "{0:.{1}f}".format(tree.interval.right, precision)
newick = tree.newick(precision, node_labels=node_labels)
newick = tree.newick(precision=precision, node_labels=node_labels)
s += f"\tTREE tree{start_interval}_{end_interval} = {newick}\n"

s += "END;\n"
Expand Down