From 0006e4a86103827a41dba51f2e09380e62c15116 Mon Sep 17 00:00:00 2001 From: Yan Wong Date: Fri, 26 Aug 2022 17:57:19 +0100 Subject: [PATCH] Add a sort to ts.subset() Fixes #2473 --- python/CHANGELOG.rst | 5 +++++ python/tests/test_highlevel.py | 21 +++++++++++++++++++++ python/tskit/trees.py | 13 +++++++------ 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index bfd3e8d776..d7dfc8a5d4 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -2,6 +2,11 @@ [0.5.3] - 2022-XX-XX -------------------- +**Fixes** + + - ``ts.subset()`` produces valid tree sequences even if nodes are shuffled + out of time order (:user:`hyanwong`, :pr:`2479`, :issue:`2473`) + **Changes** - Single statistics computed with ``TreeSequence.general_stat`` are now diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py index 1bd8e57e58..6a6aa8d1ec 100644 --- a/python/tests/test_highlevel.py +++ b/python/tests/test_highlevel.py @@ -1949,6 +1949,27 @@ def test_simplify_migrations_fails(self): with pytest.raises(_tskit.LibraryError): ts.simplify() + def test_subset_reverse_all_nodes(self): + ts = tskit.Tree.generate_comb(5).tree_sequence + assert np.all(ts.samples() == np.arange(ts.num_samples)) + flipped_ids = np.flip(np.arange(ts.num_nodes)) + new_ts = ts.subset(flipped_ids) + assert set(new_ts.samples()) == set(flipped_ids[np.arange(ts.num_samples)]) + r1 = ts.first().rank() + r2 = new_ts.first().rank() + assert r1.shape == r2.shape + assert r1.label != r2.label + + def test_subset_reverse_internal_nodes(self): + ts = tskit.Tree.generate_balanced(5).tree_sequence + internal_nodes = np.ones(ts.num_nodes, dtype=bool) + internal_nodes[ts.samples()] = False + node_ids = np.arange(ts.num_nodes) + node_ids[internal_nodes] = np.flip(node_ids[internal_nodes]) + new_ts = ts.subset(node_ids) + assert np.any(new_ts.nodes_time != ts.nodes_time) + assert new_ts.first().rank() == ts.first().rank() + def test_deprecated_apis(self): ts = msprime.simulate(10, random_seed=1) assert ts.get_ll_tree_sequence() == ts.ll_tree_sequence diff --git a/python/tskit/trees.py b/python/tskit/trees.py index 39f05e7217..a10cf3d433 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -6694,12 +6694,12 @@ def subset( of the retained nodes. Note that this does *not* retain the ancestry of these nodes - for that, see :meth:`.simplify`. - This has the side effect of reordering the nodes, individuals, and - populations in the tree sequence: the nodes in the new tree sequence - will be in the order provided in ``nodes``, and both individuals and - populations will be ordered by the earliest retained node that refers - to them. (However, ``reorder_populations`` may be set to False - to keep the population table unchanged.) + This has the side effect that it may change the order of the nodes, + individuals, populations, and migrations in the tree sequence: the nodes + in the new tree sequence will be in the order provided in ``nodes``, and + both individuals and populations will be ordered by the earliest retained + node that refers to them. (However, ``reorder_populations`` may be set to + False to keep the population table unchanged.) By default, the method removes all individuals and populations not referenced by any nodes, and all sites not referenced by any mutations. @@ -6734,6 +6734,7 @@ def subset( reorder_populations=reorder_populations, remove_unreferenced=remove_unreferenced, ) + tables.sort() return tables.tree_sequence() def union(