-
Notifications
You must be signed in to change notification settings - Fork 79
Add ts.coiterate method #1022
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add ts.coiterate method #1022
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4758,6 +4758,95 @@ def test_partial_overlap_contradictory_children(self): | |
| tskit.load_text(nodes=nodes, edges=edges, strict=False) | ||
|
|
||
|
|
||
| class TestCoiteration: | ||
| """ | ||
| Test ability to iterate over multiple (currently 2) tree sequences simultaneously | ||
| """ | ||
|
|
||
| def test_identical_ts(self): | ||
| ts = msprime.simulate(4, recombination_rate=1, random_seed=123) | ||
| assert ts.num_trees > 1 | ||
| total_iterations = 0 | ||
| for tree, (_, t1, t2) in zip(ts.trees(), ts.coiterate(ts)): | ||
| total_iterations += 1 | ||
| assert tree == t1 == t2 | ||
| assert ts.num_trees == total_iterations | ||
|
|
||
| def test_intervals(self): | ||
| ts1 = msprime.simulate(4, recombination_rate=1, random_seed=1) | ||
| assert ts1.num_trees > 1 | ||
| one_tree_ts = msprime.simulate(5, random_seed=2) | ||
| multi_tree_ts = msprime.simulate(5, recombination_rate=1, random_seed=2) | ||
| assert multi_tree_ts.num_trees > 1 | ||
| for ts2 in (one_tree_ts, multi_tree_ts): | ||
| bp1 = set(ts1.breakpoints()) | ||
| bp2 = set(ts2.breakpoints()) | ||
| assert bp1 != bp2 | ||
| breaks = set() | ||
| for interval, t1, t2 in ts1.coiterate(ts2): | ||
| assert set(interval) <= set(t1.interval) | set(t2.interval) | ||
| breaks.add(interval.left) | ||
| breaks.add(interval.right) | ||
hyanwong marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| assert t1.tree_sequence == ts1 | ||
| assert t2.tree_sequence == ts2 | ||
| assert breaks == bp1 | bp2 | ||
|
|
||
| def test_simple_ts(self): | ||
| nodes = """\ | ||
| id is_sample time | ||
| 0 1 0 | ||
| 1 1 0 | ||
| 2 1 0 | ||
| 3 0 1 | ||
| 4 0 2 | ||
| """ | ||
| edges1 = """\ | ||
| left right parent child | ||
| 0 0.2 3 0,1 | ||
| 0 0.2 4 2,3 | ||
| 0.2 1 3 2,1 | ||
| 0.2 1 4 0,3 | ||
| """ | ||
| edges2 = """\ | ||
| left right parent child | ||
| 0 0.8 3 2,1 | ||
| 0 0.8 4 0,3 | ||
| 0.8 1 3 0,1 | ||
| 0.8 1 4 2,3 | ||
| """ | ||
| ts1 = tskit.load_text(io.StringIO(nodes), io.StringIO(edges1), strict=False) | ||
| ts2 = tskit.load_text(io.StringIO(nodes), io.StringIO(edges2), strict=False) | ||
| coiterator = ts1.coiterate(ts2) | ||
| interval, tree1, tree2 = next(coiterator) | ||
| assert interval.left == 0 | ||
| assert interval.right == 0.2 | ||
| assert tree1 == ts1.at_index(0) | ||
| assert tree2 == ts2.at_index(0) | ||
| interval, tree1, tree2 = next(coiterator) | ||
| assert interval.left == 0.2 | ||
| assert interval.right == 0.8 | ||
| assert tree1 == ts1.at_index(1) | ||
| assert tree2 == ts2.at_index(0) | ||
| interval, tree1, tree2 = next(coiterator) | ||
| assert interval.left == 0.8 | ||
| assert interval.right == 1 | ||
| assert tree1 == ts1.at_index(1) | ||
| assert tree2 == ts2.at_index(1) | ||
|
|
||
| def test_nonequal_lengths(self): | ||
| ts1 = msprime.simulate(4, random_seed=1, length=2) | ||
| ts2 = msprime.simulate(4, random_seed=1) | ||
| with pytest.raises(ValueError, match="equal sequence length"): | ||
| next(ts1.coiterate(ts2)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We shouldn't need the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we do. If I remove it, we don't get the error raised, because we don't actually start the generator going?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Generator functions are only executed when after the first
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, my bad. |
||
|
|
||
| def test_kwargs(self): | ||
| ts = msprime.simulate(4, recombination_rate=1, random_seed=123) | ||
| for _, t1, t2 in ts.coiterate(ts): | ||
| assert t1.num_tracked_samples() == t2.num_tracked_samples() == 0 | ||
| for _, t1, t2 in ts.coiterate(ts, tracked_samples=ts.samples()): | ||
| assert t1.num_tracked_samples() == t2.num_tracked_samples() == 4 | ||
|
|
||
|
|
||
| class SimplifyTestBase: | ||
| """ | ||
| Base class for simplify tests. | ||
|
|
@@ -5695,9 +5784,7 @@ def verify_keep_input_roots(self, ts, samples): | |
| new_to_input_map = { | ||
| value: key for key, value in enumerate(node_map) if value != tskit.NULL | ||
| } | ||
| for (left, right), input_tree, tree_with_roots in tsutil.coiterate( | ||
| ts, ts_with_roots | ||
| ): | ||
| for (left, right), input_tree, tree_with_roots in ts.coiterate(ts_with_roots): | ||
| input_roots = input_tree.roots | ||
| assert len(tree_with_roots.roots) > 0 | ||
| for root in tree_with_roots.roots: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,6 +62,19 @@ | |
|
|
||
|
|
||
| class Interval(BaseInterval): | ||
| """ | ||
| A tuple of 2 numbers, ``[left, right)``, defining an interval over the genome. | ||
|
|
||
| :ivar left: The left hand end of the interval. By convention this value is included | ||
| in the interval. | ||
| :vartype left: float | ||
| :ivar right: The right hand end of the iterval. By convention this value is *not* | ||
| included in the interval, i.e. the interval is half-open. | ||
| :vartype right: float | ||
| :ivar span: The span of the genome covered by this interval, simply ``right-left``. | ||
| :vartype span: float | ||
| """ | ||
|
|
||
| @property | ||
| def span(self): | ||
| return self.right - self.left | ||
|
|
@@ -3933,6 +3946,43 @@ def trees( | |
| ) | ||
| return TreeIterator(tree) | ||
|
|
||
| def coiterate(self, other, **kwargs): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, might make more sense to put this next to the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Erm. That's where it is, isn't it?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, so it is. GitHub's way of showing context is a bit weird sometimes |
||
| """ | ||
| Returns an iterator over the pairs of trees for each distinct | ||
| interval in the specified pair of tree sequences. | ||
|
|
||
| :param TreeSequence other: The other tree sequence from which to take trees. The | ||
| sequence length must be the same as the current tree sequence. | ||
| :param \\**kwargs: Further named arguments that will be passed to the | ||
| :meth:`.trees` method when constructing the returned trees. | ||
|
|
||
| :return: An iterator returning successive tuples of the form | ||
| ``(interval, tree_self, tree_other)``. For example, the first item returned | ||
| will consist of an tuple of the initial interval, the first tree of the | ||
| current tree sequence, and the first tree of the ``other`` tree sequence; | ||
| the ``.left`` attribute of the initial interval will be 0 and the ``.right`` | ||
| attribute will be the smallest non-zero breakpoint of the 2 tree sequences. | ||
| :rtype: iter(:class:`Interval`, :class:`Tree`, :class:`Tree`) | ||
|
|
||
| """ | ||
| if self.sequence_length != other.sequence_length: | ||
| raise ValueError("Tree sequences must be of equal sequence length.") | ||
| L = self.sequence_length | ||
| trees1 = self.trees(**kwargs) | ||
| trees2 = other.trees(**kwargs) | ||
| tree1 = next(trees1) | ||
| tree2 = next(trees2) | ||
| right = 0 | ||
| while right != L: | ||
| left = right | ||
| right = min(tree1.interval[1], tree2.interval[1]) | ||
| yield Interval(left, right), tree1, tree2 | ||
| # Advance | ||
| if tree1.interval[1] == right: | ||
| tree1 = next(trees1, None) | ||
| if tree2.interval[1] == right: | ||
| tree2 = next(trees2, None) | ||
|
|
||
| def haplotypes( | ||
| self, | ||
| *, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.