Skip to content

Commit d30e188

Browse files
committed
Allow newicks to be output without branch lengths
1 parent cf1c23c commit d30e188

File tree

4 files changed

+76
-10
lines changed

4 files changed

+76
-10
lines changed

python/CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
**Features**
66

7+
- ``Tree.newick()`` now has extra option ``include_branch_lengths`` to allow branch
8+
lengths to be omitted (:user:`hyanwong`, :pr:`931`).
9+
710
- Added ``TableCollection.equals``, a method that compares two table
811
collections with the options to ignore top-level metadata/schema or
912
provenance tables. (:user:`mufernando`, :issue:`896`, :pr:`897`).
@@ -14,6 +17,7 @@
1417
**Breaking changes**
1518

1619
- The argument to ``ts.dump`` and ``tskit.load`` has been renamed `file` from `path`.
20+
- All arguments to ``Tree.newick()`` except precision are now keyword-only.
1721

1822
--------------------
1923
[0.3.2] - 2020-09-29

python/tests/test_highlevel.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2054,6 +2054,28 @@ def test_newick_large_times(self):
20542054
newick_c = tree.newick(precision=precision)
20552055
assert newick_c == newick_py
20562056

2057+
def test_bifurcating_newick(self):
2058+
for n_tips in range(2, 6):
2059+
ts = msprime.simulate(n_tips, random_seed=1) # msprime trees are binary
2060+
for tree in ts.trees():
2061+
base_newick = tree.newick(include_branch_lengths=False).strip(";")
2062+
for i in range(n_tips):
2063+
# Each tip number (i+1) mentioned once
2064+
assert base_newick.count(str(i + 1)) == 1
2065+
# Binary newick trees have 3 chars per extra tip: "(,)"
2066+
assert len(base_newick) == n_tips + 3 * (n_tips - 1)
2067+
2068+
def test_newick_topology_equiv(self):
2069+
replace_numeric = {ord(x): None for x in "1234567890:."}
2070+
for ts in get_example_tree_sequences():
2071+
for tree in ts.trees():
2072+
if tree.num_roots > 1:
2073+
continue
2074+
plain_newick = tree.newick(node_labels={}, include_branch_lengths=False)
2075+
newick1 = tree.newick().translate(replace_numeric)
2076+
newick2 = tree.newick(node_labels={}).translate(replace_numeric)
2077+
assert newick1 == newick2 == plain_newick
2078+
20572079
def test_newick_buffer_too_small_bug(self):
20582080
nodes = io.StringIO(
20592081
"""\

python/tests/test_phylo_formats.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,17 @@ class TestNewick(TreeExamples):
8989
external Newick parser.
9090
"""
9191

92-
def verify_newick_topology(self, tree, root=None, node_labels=None):
92+
def verify_newick_topology(
93+
self, tree, root=None, node_labels=None, include_branch_lengths=True
94+
):
9395
if root is None:
9496
root = tree.root
95-
ns = tree.newick(precision=16, root=root, node_labels=node_labels)
97+
ns = tree.newick(
98+
precision=16,
99+
root=root,
100+
node_labels=node_labels,
101+
include_branch_lengths=include_branch_lengths,
102+
)
96103
if node_labels is None:
97104
leaf_labels = {u: str(u + 1) for u in tree.leaves(root)}
98105
else:
@@ -104,7 +111,9 @@ def verify_newick_topology(self, tree, root=None, node_labels=None):
104111
name = leaf_labels[u]
105112
node = newick_tree.get_node(name)
106113
while u != root:
107-
self.assertAlmostEqual(node.length, tree.branch_length(u))
114+
# the newick library encodes absent branch lengths as 0.0
115+
branch_length = tree.branch_length(u) if include_branch_lengths else 0.0
116+
self.assertAlmostEqual(node.length, branch_length)
108117
node = node.ancestor
109118
u = tree.parent(u)
110119
assert node.ancestor is None
@@ -162,6 +171,10 @@ def test_single_node_label(self):
162171
None for _ in range(len(list(tree.nodes())) - 1)
163172
]
164173

174+
def test_no_lengths(self):
175+
t = msprime.simulate(5, random_seed=2).first()
176+
self.verify_newick_topology(t, include_branch_lengths=False)
177+
165178

166179
class TestNexus(TreeExamples):
167180
"""

python/tskit/trees.py

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2148,10 +2148,10 @@ def nodes(self, root=None, order="preorder"):
21482148
yield from iterator(u)
21492149

21502150
# TODO make this a bit less embarrassing by using an iterative method.
2151-
def __build_newick(self, node, precision, node_labels):
2151+
def __build_newick(self, *, node, precision, node_labels, include_branch_lengths):
21522152
"""
21532153
Simple recursive version of the newick generator used when non-default
2154-
node labels are needed.
2154+
node labels are needed, or when branch lengths are omitted
21552155
"""
21562156
label = node_labels.get(node, "")
21572157
if self.is_leaf(node):
@@ -2160,12 +2160,26 @@ def __build_newick(self, node, precision, node_labels):
21602160
s = "("
21612161
for child in self.children(node):
21622162
branch_length = self.branch_length(child)
2163-
subtree = self.__build_newick(child, precision, node_labels)
2164-
s += subtree + ":{0:.{1}f},".format(branch_length, precision)
2163+
subtree = self.__build_newick(
2164+
node=child,
2165+
precision=precision,
2166+
node_labels=node_labels,
2167+
include_branch_lengths=include_branch_lengths,
2168+
)
2169+
if include_branch_lengths:
2170+
subtree += ":{0:.{1}f}".format(branch_length, precision)
2171+
s += subtree + ","
21652172
s = s[:-1] + f"){label}"
21662173
return s
21672174

2168-
def newick(self, precision=14, root=None, node_labels=None):
2175+
def newick(
2176+
self,
2177+
precision=14, # Should probably be keyword only, left positional for legacy use
2178+
*,
2179+
root=None,
2180+
node_labels=None,
2181+
include_branch_lengths=True,
2182+
):
21692183
"""
21702184
Returns a `newick encoding <https://en.wikipedia.org/wiki/Newick_format>`_
21712185
of this tree. If the ``root`` argument is specified, return a representation
@@ -2187,6 +2201,8 @@ def newick(self, precision=14, root=None, node_labels=None):
21872201
:param dict node_labels: If specified, show custom labels for the nodes
21882202
that are present in the map. Any nodes not specified in the map will
21892203
not have a node label.
2204+
:param include_branch_lengths: If True (default), output branch lengths in the
2205+
Newick file. If False, only output the topology, without branch lengths.
21902206
:return: A newick representation of this tree.
21912207
:rtype: str
21922208
"""
@@ -2198,6 +2214,9 @@ def newick(self, precision=14, root=None, node_labels=None):
21982214
"newick trees, one for each root."
21992215
)
22002216
root = self.root
2217+
if not include_branch_lengths and node_labels is None:
2218+
# Force the python generator for simplicity, by specifying the default labels
2219+
node_labels = {i: str(i + 1) for i in self.leaves()}
22012220
if node_labels is None:
22022221
root_time = max(1, self.time(root))
22032222
max_label_size = math.ceil(math.log10(self.tree_sequence.num_nodes))
@@ -2210,7 +2229,15 @@ def newick(self, precision=14, root=None, node_labels=None):
22102229
)
22112230
s = s.decode()
22122231
else:
2213-
return self.__build_newick(root, precision, node_labels) + ";"
2232+
s = (
2233+
self.__build_newick(
2234+
node=root,
2235+
precision=precision,
2236+
node_labels=node_labels,
2237+
include_branch_lengths=include_branch_lengths,
2238+
)
2239+
+ ";"
2240+
)
22142241
return s
22152242

22162243
def as_dict_of_dicts(self):
@@ -4489,7 +4516,7 @@ def to_nexus(self, precision=14):
44894516
for tree in self.trees():
44904517
start_interval = "{0:.{1}f}".format(tree.interval.left, precision)
44914518
end_interval = "{0:.{1}f}".format(tree.interval.right, precision)
4492-
newick = tree.newick(precision, node_labels=node_labels)
4519+
newick = tree.newick(precision=precision, node_labels=node_labels)
44934520
s += f"\tTREE tree{start_interval}_{end_interval} = {newick}\n"
44944521

44954522
s += "END;\n"

0 commit comments

Comments
 (0)