Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@
[0.5.3] - 2022-XX-XX
--------------------

**Fixes**

- ``ts.subset()`` produces valid tree sequences even if nodes are shuffled
out of time order (:user:`hyanwong`, :pr:`2479`, :issue:`2473`)

**Features**

- The ``ts.nodes`` method now takes an ``order`` parameter so that nodes
Expand All @@ -27,6 +22,11 @@
- Accessor methods such as ts.edge(n) and ts.node(n) now allow negative
indexes (:user:`hyanwong`, :pr:`2478`, :issue:`1008`)

- ``ts.subset()`` produces valid tree sequences even if nodes are shuffled
out of time order (:user:`hyanwong`, :pr:`2479`, :issue:`2473`), and the
same for ``tables.subset()`` (:user:`hyanwong`, :pr:`2489`). This involves
sorting the returned tables, potentially changing the returned edge order.

**Performance improvements**

- TreeSequence.link_ancestors no longer continues to process edges once all
Expand Down
47 changes: 36 additions & 11 deletions python/tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4510,19 +4510,15 @@ def verify_subset(self, tables, nodes):
for k, p in zip(pops, subset.populations):
pp = tables.populations[k]
assert pp == p
edges = [
i
for i, e in enumerate(tables.edges)
# subset can reorder the edges: we need to check we have the same set
edges = {
e.replace(parent=node_map[e.parent], child=node_map[e.child])
for e in tables.edges
if e.parent in nodes and e.child in nodes
]
}
assert subset.edges.num_rows == len(edges)
for k, e in zip(edges, subset.edges):
ee = tables.edges[k]
assert ee.left == e.left
assert ee.right == e.right
assert node_map[ee.parent] == e.parent
assert node_map[ee.child] == e.child
assert ee.metadata == e.metadata
for e in edges:
assert e in subset.edges
muts = []
sites = []
for k, m in enumerate(tables.mutations):
Expand Down Expand Up @@ -4649,6 +4645,35 @@ def test_no_remove_unreferenced(self):
assert tables.populations == sub_tables.populations
assert tables.individuals == sub_tables.individuals

def test_subset_reverse_all_nodes(self):
ts = tskit.Tree.generate_comb(5).tree_sequence
assert np.all(ts.samples() == np.arange(ts.num_samples))
tables = ts.dump_tables()
flipped_ids = np.flip(np.arange(tables.nodes.num_rows))
self.verify_subset(tables, flipped_ids)
# Now test the topology is the same
tables.subset(flipped_ids)
new_ts = tables.tree_sequence()
assert set(new_ts.samples()) == set(flipped_ids[np.arange(ts.num_samples)])
r1 = ts.first().rank()
r2 = new_ts.first().rank()
assert r1.shape == r2.shape
assert r1.label != r2.label

def test_subset_reverse_internal_nodes(self):
ts = tskit.Tree.generate_balanced(5).tree_sequence
internal_nodes = np.ones(ts.num_nodes, dtype=bool)
internal_nodes[ts.samples()] = False
tables = ts.dump_tables()
node_ids = np.arange(tables.nodes.num_rows)
node_ids[internal_nodes] = np.flip(node_ids[internal_nodes])
self.verify_subset(tables, node_ids)
# Now test the topology and the sample labels are the same
tables.subset(node_ids)
new_ts = tables.tree_sequence()
assert np.any(new_ts.nodes_time != ts.nodes_time)
assert new_ts.first().rank() == ts.first().rank()


class TestUnionTables(unittest.TestCase):
"""
Expand Down
2 changes: 2 additions & 0 deletions python/tests/tsutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,8 @@ def py_subset(
)
mutation_map[i] = new_mut

tables.sort()


def py_union(tables, other, nodes, record_provenance=True, add_populations=True):
"""
Expand Down
10 changes: 6 additions & 4 deletions python/tskit/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4024,10 +4024,11 @@ def subset(
"""
Modifies the tables in place to contain only the entries referring to
the provided list of node IDs, with nodes reordered according to the
order they appear in the list. See :meth:`TreeSequence.subset` for a
more detailed description.

Note: there are no sortedness requirements on the tables.
order they appear in the list. Other tables are :meth:`sorted <sort>`
to conform to the :ref:`sec_valid_tree_sequence_requirements`, and
additionally sorted as described in the documentation for the equivalent
tree sequence method :meth:`TreeSequence.subset`: please see this for more
detail.

:param list nodes: The list of nodes for which to retain information. This
may be a numpy array (or array-like) object (dtype=np.int32).
Expand All @@ -4052,6 +4053,7 @@ def subset(
reorder_populations=reorder_populations,
remove_unreferenced=remove_unreferenced,
)
self.sort()
if record_provenance:
parameters = {"command": "subset", "nodes": nodes.tolist()}
self.provenances.add_row(
Expand Down
1 change: 0 additions & 1 deletion python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -6801,7 +6801,6 @@ def subset(
reorder_populations=reorder_populations,
remove_unreferenced=remove_unreferenced,
)
tables.sort()
return tables.tree_sequence()

def union(
Expand Down