diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index 2cec6bf8d3..3c9f92c64a 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -4,6 +4,9 @@ **Features** +- ``Tree.newick()`` now has extra option ``include_branch_lengths`` to allow branch + lengths to be omitted (:user:`hyanwong`, :pr:`931`). + - Added ``Tree.generate_star`` static method to create star-topologies (:user:`hyanwong`, :pr:`934`). @@ -28,6 +31,7 @@ **Breaking changes** - The argument to ``ts.dump`` and ``tskit.load`` has been renamed `file` from `path`. +- All arguments to ``Tree.newick()`` except precision are now keyword-only. -------------------- [0.3.2] - 2020-09-29 diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py index 29d003a4aa..ed9df8ea90 100644 --- a/python/tests/test_highlevel.py +++ b/python/tests/test_highlevel.py @@ -2092,6 +2092,28 @@ def test_newick_large_times(self): newick_c = tree.newick(precision=precision) assert newick_c == newick_py + def test_bifurcating_newick(self): + for n_tips in range(2, 6): + ts = msprime.simulate(n_tips, random_seed=1) # msprime trees are binary + for tree in ts.trees(): + base_newick = tree.newick(include_branch_lengths=False).strip(";") + for i in range(n_tips): + # Each tip number (i+1) mentioned once + assert base_newick.count(str(i + 1)) == 1 + # Binary newick trees have 3 chars per extra tip: "(,)" + assert len(base_newick) == n_tips + 3 * (n_tips - 1) + + def test_newick_topology_equiv(self): + replace_numeric = {ord(x): None for x in "1234567890:."} + for ts in get_example_tree_sequences(): + for tree in ts.trees(): + if tree.num_roots > 1: + continue + plain_newick = tree.newick(node_labels={}, include_branch_lengths=False) + newick1 = tree.newick().translate(replace_numeric) + newick2 = tree.newick(node_labels={}).translate(replace_numeric) + assert newick1 == newick2 == plain_newick + def test_newick_buffer_too_small_bug(self): nodes = io.StringIO( """\ diff --git a/python/tests/test_phylo_formats.py b/python/tests/test_phylo_formats.py index 7322fb2d98..72026d5f26 100644 --- a/python/tests/test_phylo_formats.py +++ b/python/tests/test_phylo_formats.py @@ -89,22 +89,73 @@ class TestNewick(TreeExamples): external Newick parser. """ - def verify_newick_topology(self, tree, root=None, node_labels=None): + # 3 methods to return example tree sequences with internal samples: + # (copied from test_highlevel.py) + def all_nodes_samples_example(self): + n = 5 + ts = msprime.simulate(n, random_seed=10, mutation_rate=5) + assert ts.num_mutations > 0 + tables = ts.dump_tables() + nodes = tables.nodes + flags = nodes.flags + # Set all nodes to be samples. + flags[:] = tskit.NODE_IS_SAMPLE + nodes.flags = flags + return tables.tree_sequence() + + def only_internal_samples_example(self): + n = 5 + ts = msprime.simulate(n, random_seed=10, mutation_rate=5) + assert ts.num_mutations > 0 + tables = ts.dump_tables() + nodes = tables.nodes + flags = nodes.flags + # Set just internal nodes to be samples. + flags[:] = 0 + flags[n:] = tskit.NODE_IS_SAMPLE + nodes.flags = flags + return tables.tree_sequence() + + def mixed_node_samples_example(self): + n = 5 + ts = msprime.simulate(n, random_seed=10, mutation_rate=5) + assert ts.num_mutations > 0 + tables = ts.dump_tables() + nodes = tables.nodes + flags = nodes.flags + # Set a mixture of internal and leaf samples. + flags[:] = 0 + flags[n // 2 : n + n // 2] = tskit.NODE_IS_SAMPLE + nodes.flags = flags + return tables.tree_sequence() + + def verify_newick_topology( + self, tree, root=None, node_labels=None, include_branch_lengths=True + ): if root is None: root = tree.root - ns = tree.newick(precision=16, root=root, node_labels=node_labels) + ns = tree.newick( + precision=16, + root=root, + node_labels=node_labels, + include_branch_lengths=include_branch_lengths, + ) if node_labels is None: leaf_labels = {u: str(u + 1) for u in tree.leaves(root)} else: leaf_labels = {u: node_labels[u] for u in tree.leaves(root)} - newick_tree = newick.loads(ns)[0] + # default newick lib outputs 0.0 if length is None => replace the length_parser + newick_tree = newick.loads( + ns, length_parser=lambda x: None if x is None else float(x) + )[0] leaf_names = newick_tree.get_leaf_names() assert sorted(leaf_names) == sorted(leaf_labels.values()) for u in tree.leaves(root): name = leaf_labels[u] node = newick_tree.get_node(name) while u != root: - self.assertAlmostEqual(node.length, tree.branch_length(u)) + branch_len = tree.branch_length(u) if include_branch_lengths else None + self.assertAlmostEqual(node.length, branch_len) node = node.ancestor u = tree.parent(u) assert node.ancestor is None @@ -162,6 +213,34 @@ def test_single_node_label(self): None for _ in range(len(list(tree.nodes())) - 1) ] + def test_no_lengths(self): + t = msprime.simulate(5, random_seed=2).first() + self.verify_newick_topology(t, include_branch_lengths=False) + + def test_samples_differ_from_leaves(self): + for ts in ( + self.all_nodes_samples_example(), + self.only_internal_samples_example(), + self.mixed_node_samples_example(), + ): + for t in ts.trees(): + self.verify_newick_topology(t) + + def test_no_lengths_equiv(self): + for ts in ( + self.all_nodes_samples_example(), + self.only_internal_samples_example(), + self.mixed_node_samples_example(), + ): + for t in ts.trees(): + newick_nolengths = t.newick(include_branch_lengths=False) + newick_nolengths = newick.loads(newick_nolengths)[0] + newick_lengths = t.newick() + newick_lengths = newick.loads(newick_lengths)[0] + for node in newick_lengths.walk(): + node.length = None + assert newick.dumps(newick_nolengths) == newick.dumps(newick_lengths) + class TestNexus(TreeExamples): """ diff --git a/python/tskit/trees.py b/python/tskit/trees.py index e5f864d4d9..3e597148ce 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -2154,10 +2154,10 @@ def nodes(self, root=None, order="preorder"): yield from iterator(u) # TODO make this a bit less embarrassing by using an iterative method. - def __build_newick(self, node, precision, node_labels): + def __build_newick(self, *, node, precision, node_labels, include_branch_lengths): """ Simple recursive version of the newick generator used when non-default - node labels are needed. + node labels are needed, or when branch lengths are omitted """ label = node_labels.get(node, "") if self.is_leaf(node): @@ -2166,12 +2166,26 @@ def __build_newick(self, node, precision, node_labels): s = "(" for child in self.children(node): branch_length = self.branch_length(child) - subtree = self.__build_newick(child, precision, node_labels) - s += subtree + ":{0:.{1}f},".format(branch_length, precision) + subtree = self.__build_newick( + node=child, + precision=precision, + node_labels=node_labels, + include_branch_lengths=include_branch_lengths, + ) + if include_branch_lengths: + subtree += ":{0:.{1}f}".format(branch_length, precision) + s += subtree + "," s = s[:-1] + f"){label}" return s - def newick(self, precision=14, root=None, node_labels=None): + def newick( + self, + precision=14, # Should probably be keyword only, left positional for legacy use + *, + root=None, + node_labels=None, + include_branch_lengths=True, + ): """ Returns a `newick encoding `_ of this tree. If the ``root`` argument is specified, return a representation @@ -2193,6 +2207,8 @@ def newick(self, precision=14, root=None, node_labels=None): :param dict node_labels: If specified, show custom labels for the nodes that are present in the map. Any nodes not specified in the map will not have a node label. + :param include_branch_lengths: If True (default), output branch lengths in the + Newick string. If False, only output the topology, without branch lengths. :return: A newick representation of this tree. :rtype: str """ @@ -2204,6 +2220,9 @@ def newick(self, precision=14, root=None, node_labels=None): "newick trees, one for each root." ) root = self.root + if not include_branch_lengths and node_labels is None: + # C code always puts branch lengths: force Py code by setting default labels + node_labels = {i: str(i + 1) for i in self.leaves()} if node_labels is None: root_time = max(1, self.time(root)) max_label_size = math.ceil(math.log10(self.tree_sequence.num_nodes)) @@ -2216,7 +2235,15 @@ def newick(self, precision=14, root=None, node_labels=None): ) s = s.decode() else: - return self.__build_newick(root, precision, node_labels) + ";" + s = ( + self.__build_newick( + node=root, + precision=precision, + node_labels=node_labels, + include_branch_lengths=include_branch_lengths, + ) + + ";" + ) return s def as_dict_of_dicts(self): @@ -4547,7 +4574,7 @@ def to_nexus(self, precision=14): for tree in self.trees(): start_interval = "{0:.{1}f}".format(tree.interval.left, precision) end_interval = "{0:.{1}f}".format(tree.interval.right, precision) - newick = tree.newick(precision, node_labels=node_labels) + newick = tree.newick(precision=precision, node_labels=node_labels) s += f"\tTREE tree{start_interval}_{end_interval} = {newick}\n" s += "END;\n"