Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ In development

**New features**

- Add an optional node traversal order in ``tskit.Tree`` that uses the minimum
lexicographic order of leaf nodes visited. This ordering (``"minlex_postorder"``)
adds more determinism because it constraints the order in which children of
a node are visited (:user:`brianzhang01`, :pr:`411`).

- Add ``_repr_html_`` to tables, so that jupyter notebooks render them as
html tables (:user:`benjeffery`, :pr:`514`)

Expand Down
26 changes: 26 additions & 0 deletions python/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,30 @@ def _levelorder_nodes(self, u, l, level):
for c in self.children(u):
self._levelorder_nodes(c, l, level + 1)

def _minlex_postorder_nodes(self, u, l):
l.extend(self._minlex_postorder_nodes_helper(u)[1])

def _minlex_postorder_nodes_helper(self, u):
"""
For a given input ID u, this function returns a tuple whose first value
is the minimum leaf node ID under node u, and whose second value is
a list containing the minlex postorder for the subtree rooted at node u.
The first value is needed for sorting, and the second value is what
finally gets returned.
"""
children = self.children(u)
if len(children) > 0:
children_return = [self._minlex_postorder_nodes_helper(c) for c in children]
# sorts by first value, which is the minimum leaf node ID
children_return.sort()
minlex_postorder = []
for _, child_minlex_postorder in children_return:
minlex_postorder.extend(child_minlex_postorder)
minlex_postorder.extend([u])
return (children_return[0][0], minlex_postorder)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that we can't use sorted(children_return) above because we need to get the first element here as well

else:
return (u, [u])

def nodes(self, root=None, order="preorder"):
roots = [root]
if root is None:
Expand All @@ -127,6 +151,8 @@ def nodes(self, root=None, order="preorder"):
# Nested list comprehension flattens node_list in order
self._levelorder_nodes(u, node_list, 0)
node_list = iter([i for level in node_list for i in level])
elif order == "minlex_postorder":
self._minlex_postorder_nodes(u, node_list)
else:
raise ValueError("order not supported")
yield from node_list
Expand Down
18 changes: 13 additions & 5 deletions python/tests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1888,9 +1888,8 @@ def test_traversals(self):
tree = next(ts.trees())
self.verify_traversals(tree)

# To verify time-ordered traversal we can't use the method used for the
# other traversals above, it checks for one-to-one correspondence.
# As more than one ordering is valid for time, we do it separately here
# Verify time-ordered traversals separately, because the PythonTree
# class does not contain time information at the moment
for root in tree.roots:
time_ordered = tree.nodes(root, order="timeasc")
t = tree.time(next(time_ordered))
Expand All @@ -1909,7 +1908,13 @@ def verify_traversals(self, tree):
t1 = tree
t2 = tests.PythonTree.from_tree(t1)
self.assertEqual(list(t1.nodes()), list(t2.nodes()))
orders = ["inorder", "postorder", "levelorder", "breadthfirst"]
orders = [
"inorder",
"postorder",
"levelorder",
"breadthfirst",
"minlex_postorder",
]
if tree.num_roots == 1:
self.assertRaises(ValueError, list, t1.nodes(order="bad order"))
self.assertEqual(list(t1.nodes()), list(t1.nodes(t1.get_root())))
Expand Down Expand Up @@ -1946,7 +1951,10 @@ def verify_traversals(self, tree):
list(t2.nodes(root, order=test_order)),
)
all_nodes.extend(t1.nodes(root, order=test_order))
self.assertEqual(all_nodes, list(t1.nodes(order=test_order)))
# minlex_postorder reorders the roots, so this last test is
# not appropriate
if test_order != "minlex_postorder":
self.assertEqual(all_nodes, list(t1.nodes(order=test_order)))

def test_total_branch_length(self):
# Note: this definition works when we have no non-sample branches.
Expand Down
162 changes: 159 additions & 3 deletions python/tests/test_topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -1952,6 +1952,162 @@ def test_nonbinary_tree_sequence_permuted_nodes(self):
self.verify_permuted_nodes(ts)


class TestTraversalOrder(unittest.TestCase):
"""
Tests node traversal orders.
"""

#
# 9 10
# / \ / \
# / \ / 8
# / \ / / \
# 7 \ / / \
# / \ 6 / / 6
# / 5 / \ / 5 / \
# / / \ / \ / / \ / \
# 4 0 1 2 3 4 0 1 2 3
#
# 0 ------------------ 0.5 ------------------ 1.0
nodes = """\
id is_sample population time
0 1 0 0.00000000000000
1 1 0 0.00000000000000
2 1 0 0.00000000000000
3 1 0 0.00000000000000
4 1 0 0.00000000000000
5 0 0 0.14567111023387
6 0 0 0.21385545626353
7 0 0 0.43508024345063
8 0 0 0.60156352971203
9 0 0 0.90000000000000
10 0 0 1.20000000000000
"""
edges = """\
id left right parent child
0 0.00000000 1.00000000 5 0,1
1 0.00000000 1.00000000 6 2,3
2 0.00000000 0.50000000 7 4,5
3 0.50000000 1.00000000 8 5,6
4 0.00000000 0.50000000 9 6,7
5 0.50000000 1.00000000 10 4,8
"""
node_order_results = {
"preorder": [[9, 6, 2, 3, 7, 4, 5, 0, 1], [10, 4, 8, 5, 0, 1, 6, 2, 3]],
"inorder": [[2, 6, 3, 9, 4, 7, 0, 5, 1], [4, 10, 0, 5, 1, 8, 2, 6, 3]],
"postorder": [[2, 3, 6, 4, 0, 1, 5, 7, 9], [4, 0, 1, 5, 2, 3, 6, 8, 10]],
"levelorder": [[9, 6, 7, 2, 3, 4, 5, 0, 1], [10, 4, 8, 5, 6, 0, 1, 2, 3]],
"breadthfirst": [[9, 6, 7, 2, 3, 4, 5, 0, 1], [10, 4, 8, 5, 6, 0, 1, 2, 3]],
"timeasc": [[0, 1, 2, 3, 4, 5, 6, 7, 9], [0, 1, 2, 3, 4, 5, 6, 8, 10]],
"timedesc": [[9, 7, 6, 5, 4, 3, 2, 1, 0], [10, 8, 6, 5, 4, 3, 2, 1, 0]],
"minlex_postorder": [[0, 1, 5, 4, 7, 2, 3, 6, 9], [0, 1, 5, 2, 3, 6, 8, 4, 10]],
}

def test_traversal_order(self):
ts = tskit.load_text(
nodes=io.StringIO(self.nodes), edges=io.StringIO(self.edges), strict=False
)
for test_order, expected_result in self.node_order_results.items():
tree_orders = []
for tree in ts.trees():
tree_orders.append(list(tree.nodes(order=test_order)))
self.assertEqual(tree_orders, expected_result)

def test_polytomy_inorder(self):
"""
If there are N children, current inorder traversal first visits
floor(N/2) children, then the parent, then the remaining children.
Here we explicitly test that behaviour.
"""
#
# __4__
# / / \ \
# 0 1 2 3
#
nodes_polytomy_4 = """\
id is_sample population time
0 1 0 0.00000000000000
1 1 0 0.00000000000000
2 1 0 0.00000000000000
3 1 0 0.00000000000000
4 0 0 1.00000000000000
"""
edges_polytomy_4 = """\
id left right parent child
0 0.00000000 1.00000000 4 0,1,2,3
"""
#
# __5__
# / /|\ \
# 0 1 2 3 4
#
nodes_polytomy_5 = """\
id is_sample population time
0 1 0 0.00000000000000
1 1 0 0.00000000000000
2 1 0 0.00000000000000
3 1 0 0.00000000000000
4 1 0 0.00000000000000
5 0 0 1.00000000000000
"""
edges_polytomy_5 = """\
id left right parent child
0 0.00000000 1.00000000 5 0,1,2,3,4
"""
for nodes_string, edges_string, expected_result in [
[nodes_polytomy_4, edges_polytomy_4, [[0, 1, 4, 2, 3]]],
[nodes_polytomy_5, edges_polytomy_5, [[0, 1, 5, 2, 3, 4]]],
]:
ts = tskit.load_text(
nodes=io.StringIO(nodes_string),
edges=io.StringIO(edges_string),
strict=False,
)
tree_orders = []
for tree in ts.trees():
tree_orders.append(list(tree.nodes(order="inorder")))
self.assertEqual(tree_orders, expected_result)

def test_minlex_postorder_multiple_roots(self):
#
# 10 8 9 11
# / \ / \ / \ / \
# 5 3 2 4 6 7 1 0
#
nodes_string = """\
id is_sample population time
0 1 0 0.00000000000000
1 1 0 0.00000000000000
2 1 0 0.00000000000000
3 1 0 0.00000000000000
4 1 0 0.00000000000000
5 1 0 0.00000000000000
6 1 0 0.00000000000000
7 1 0 0.00000000000000
8 0 0 1.00000000000000
9 0 0 1.00000000000000
10 0 0 1.00000000000000
11 0 0 1.00000000000000
"""
edges_string = """\
id left right parent child
0 0.00000000 1.00000000 8 2,4
1 0.00000000 1.00000000 9 6,7
2 0.00000000 1.00000000 10 5,3
3 0.00000000 1.00000000 11 1,0
"""
expected_result = [[0, 1, 11, 2, 4, 8, 3, 5, 10, 6, 7, 9]]
ts = tskit.load_text(
nodes=io.StringIO(nodes_string),
edges=io.StringIO(edges_string),
strict=False,
)
tree_orders = []
for tree in ts.trees():
tree_orders.append(list(tree.nodes(order="minlex_postorder")))
self.assertEqual(tree_orders, expected_result)


class TestSimplifyExamples(TopologyTestCase):
"""
Tests for simplify where we write out the input and expected output
Expand Down Expand Up @@ -2598,7 +2754,7 @@ def test_simplest_non_degenerate_case(self):
t = next(ts_simplified.trees())
self.assertEqual(t.parent_dict, {0: 4, 1: 4, 2: 5, 3: 5})

def test_two_reducable_trees(self):
def test_two_reducible_trees(self):
# We have n = 4 and two trees, with some unary nodes and non-sample leaves
nodes = io.StringIO(
"""\
Expand Down Expand Up @@ -2682,8 +2838,8 @@ def test_two_reducable_trees(self):
self.assertEqual(sites[-1].position, 0.4)
self.assertEqual(t.parent_dict, {0: 4, 1: 4, 2: 5, 3: 5})

def test_one_reducable_tree(self):
# We have n = 4 and two trees. One tree is reducable and the other isn't.
def test_one_reducible_tree(self):
# We have n = 4 and two trees. One tree is reducible and the other isn't.
nodes = io.StringIO(
"""\
id is_sample time
Expand Down
4 changes: 3 additions & 1 deletion python/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,9 @@ def test_nonrectangular_input(self):
]
for dtype in self.dtypes_to_test:
for bad_input in bad_inputs:
with self.assertRaises(TypeError):
# On some platforms and Python / numpy versions, a ValueError
# occurs instead
with self.assertRaises((TypeError, ValueError)):
util.safe_np_int_cast(bad_input, dtype)


Expand Down
Loading