-
Notifications
You must be signed in to change notification settings - Fork 69
/
trees.py
9751 lines (8716 loc) · 414 KB
/
trees.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# MIT License
#
# Copyright (c) 2018-2024 Tskit Developers
# Copyright (c) 2015-2018 University of Oxford
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""
Module responsible for managing trees and tree sequences.
"""
from __future__ import annotations
import base64
import builtins
import collections
import concurrent.futures
import functools
import io
import itertools
import math
import numbers
import warnings
from dataclasses import dataclass
from typing import Any
from typing import NamedTuple
import numpy as np
import _tskit
import tskit
import tskit.combinatorics as combinatorics
import tskit.drawing as drawing
import tskit.metadata as metadata_module
import tskit.tables as tables
import tskit.text_formats as text_formats
import tskit.util as util
import tskit.vcf as vcf
from tskit import NODE_IS_SAMPLE
from tskit import NULL
from tskit import UNKNOWN_TIME
LEGACY_MS_LABELS = "legacy_ms"
class CoalescenceRecord(NamedTuple):
left: float
right: float
node: int
children: np.ndarray
time: float
population: int
class Interval(NamedTuple):
"""
A tuple of 2 numbers, ``[left, right)``, defining an interval over the genome.
"""
left: float | int
"""
The left hand end of the interval. By convention this value is included
in the interval
"""
right: float | int
"""
The right hand end of the interval. By convention this value is *not*
included in the interval, i.e., the interval is half-open.
"""
@property
def span(self) -> float | int:
"""
The span of the genome covered by this interval, simply ``right-left``.
"""
return self.right - self.left
class EdgeDiff(NamedTuple):
interval: Interval
edges_out: list
edges_in: list
def store_tree_sequence(cls):
wrapped_init = cls.__init__
# Intercept the init to record the tree_sequence
def new_init(self, *args, tree_sequence=None, **kwargs):
builtins.object.__setattr__(self, "_tree_sequence", tree_sequence)
wrapped_init(self, *args, **kwargs)
cls.__init__ = new_init
return cls
@store_tree_sequence
@metadata_module.lazy_decode()
@dataclass
class Individual(util.Dataclass):
"""
An :ref:`individual <sec_individual_table_definition>` in a tree sequence.
Since nodes correspond to genomes, individuals are associated with a collection
of nodes (e.g., two nodes per diploid). See :ref:`sec_nodes_or_individuals`
for more discussion of this distinction.
Modifying the attributes in this class will have **no effect** on the
underlying tree sequence data.
"""
__slots__ = [
"id",
"flags",
"location",
"parents",
"nodes",
"metadata",
"_tree_sequence",
]
id: int # noqa A003
"""
The integer ID of this individual. Varies from 0 to
:attr:`TreeSequence.num_individuals` - 1."""
flags: int
"""
The bitwise flags for this individual.
"""
location: np.ndarray
"""
The spatial location of this individual as a numpy array. The location is an empty
array if no spatial location is defined.
"""
parents: np.ndarray
"""
The parent individual ids of this individual as a numpy array. The parents is an
empty array if no parents are defined.
"""
nodes: np.ndarray
"""
The IDs of the nodes that are associated with this individual as
a numpy array (dtype=np.int32). If no nodes are associated with the
individual this array will be empty.
"""
metadata: bytes | dict | None
"""
The :ref:`metadata <sec_metadata_definition>`
for this individual, decoded if a schema applies.
"""
@property
def population(self) -> int:
populations = {self._tree_sequence.node(n).population for n in self.nodes}
if len(populations) > 1:
raise ValueError("Individual has nodes with mis-matched populations")
if len(populations) == 0:
return tskit.NULL
return populations.pop()
@property
def time(self) -> int:
times = {self._tree_sequence.node(n).time for n in self.nodes}
if len(times) > 1:
raise ValueError("Individual has nodes with mis-matched times")
if len(times) == 0:
return tskit.UNKNOWN_TIME
return times.pop()
# Custom eq for the numpy arrays
def __eq__(self, other):
return (
self.id == other.id
and self.flags == other.flags
and np.array_equal(self.location, other.location)
and np.array_equal(self.parents, other.parents)
and np.array_equal(self.nodes, other.nodes)
and self.metadata == other.metadata
)
@metadata_module.lazy_decode()
@dataclass
class Node(util.Dataclass):
"""
A :ref:`node <sec_node_table_definition>` in a tree sequence, corresponding
to a single genome. The ``time`` and ``population`` are attributes of the
``Node``, rather than the ``Individual``, as discussed in
:ref:`sec_nodes_or_individuals`.
Modifying the attributes in this class will have **no effect** on the
underlying tree sequence data.
"""
__slots__ = ["id", "flags", "time", "population", "individual", "metadata"]
id: int # noqa A003
"""
The integer ID of this node. Varies from 0 to :attr:`TreeSequence.num_nodes` - 1.
"""
flags: int
"""
The bitwise flags for this node.
"""
time: float
"""
The birth time of this node.
"""
population: int
"""
The integer ID of the population that this node was born in.
"""
individual: int
"""
The integer ID of the individual that this node was a part of.
"""
metadata: bytes | dict | None
"""
The :ref:`metadata <sec_metadata_definition>` for this node, decoded if a schema
applies.
"""
def is_sample(self):
"""
Returns True if this node is a sample. This value is derived from the
``flag`` variable.
:rtype: bool
"""
return self.flags & NODE_IS_SAMPLE
@metadata_module.lazy_decode(own_init=True)
@dataclass
class Edge(util.Dataclass):
"""
An :ref:`edge <sec_edge_table_definition>` in a tree sequence.
Modifying the attributes in this class will have **no effect** on the
underlying tree sequence data.
"""
__slots__ = ["left", "right", "parent", "child", "metadata", "id"]
left: float
"""
The left coordinate of this edge.
"""
right: float
"""
The right coordinate of this edge.
"""
parent: int
"""
The integer ID of the parent node for this edge.
To obtain further information about a node with a given ID, use
:meth:`TreeSequence.node`.
"""
child: int
"""
The integer ID of the child node for this edge.
To obtain further information about a node with a given ID, use
:meth:`TreeSequence.node`.
"""
metadata: bytes | dict | None
"""
The :ref:`metadata <sec_metadata_definition>` for this edge, decoded if a schema
applies.
"""
id: int # noqa A003
"""
The integer ID of this edge. Varies from 0 to
:attr:`TreeSequence.num_edges` - 1.
"""
# Custom init to define default values with slots
def __init__(
self,
left,
right,
parent,
child,
metadata=b"",
id=None, # noqa A002
metadata_decoder=None,
):
self.id = id
self.left = left
self.right = right
self.parent = parent
self.child = child
self.metadata = metadata
self._metadata_decoder = metadata_decoder
@property
def span(self):
"""
Returns the span of this edge, i.e., the right position minus the left position
:return: The span of this edge.
:rtype: float
"""
return self.right - self.left
@metadata_module.lazy_decode()
@dataclass
class Site(util.Dataclass):
"""
A :ref:`site <sec_site_table_definition>` in a tree sequence.
Modifying the attributes in this class will have **no effect** on the
underlying tree sequence data.
"""
__slots__ = ["id", "position", "ancestral_state", "mutations", "metadata"]
id: int # noqa A003
"""
The integer ID of this site. Varies from 0 to :attr:`TreeSequence.num_sites` - 1.
"""
position: float
"""
The floating point location of this site in genome coordinates.
Ranges from 0 (inclusive) to :attr:`TreeSequence.sequence_length` (exclusive).
"""
ancestral_state: str
"""
The ancestral state at this site (i.e., the state inherited by nodes, unless
mutations occur).
"""
mutations: np.ndarray
"""
The list of mutations at this site. Mutations within a site are returned in the
order they are specified in the underlying :class:`MutationTable`.
"""
metadata: bytes | dict | None
"""
The :ref:`metadata <sec_metadata_definition>` for this site, decoded if a schema
applies.
"""
# We need a custom eq for the numpy arrays
def __eq__(self, other):
return (
isinstance(other, Site)
and self.id == other.id
and self.position == other.position
and self.ancestral_state == other.ancestral_state
and np.array_equal(self.mutations, other.mutations)
and self.metadata == other.metadata
)
@property
def alleles(self) -> set[str]:
"""
Return the set of all the alleles defined at this site
.. note::
This deliberately returns an (unordered) *set* of the possible allelic
states (as defined by the site's ancestral allele and its associated
mutations). If you wish to obtain an (ordered) *list* of alleles, for
example to translate the numeric genotypes at a site into allelic states,
you should instead use ``.alleles`` attribute of the :class:`Variant` class,
which unlike this attribute includes ``None`` as a state when there is
missing data at a site.
"""
return {self.ancestral_state} | {m.derived_state for m in self.mutations}
@metadata_module.lazy_decode()
@dataclass
class Mutation(util.Dataclass):
"""
A :ref:`mutation <sec_mutation_table_definition>` in a tree sequence.
Modifying the attributes in this class will have **no effect** on the
underlying tree sequence data.
"""
__slots__ = [
"id",
"site",
"node",
"derived_state",
"parent",
"metadata",
"time",
"edge",
]
id: int # noqa A003
"""
The integer ID of this mutation. Varies from 0 to
:attr:`TreeSequence.num_mutations` - 1.
Modifying the attributes in this class will have **no effect** on the
underlying tree sequence data.
"""
site: int
"""
The integer ID of the site that this mutation occurs at. To obtain
further information about a site with a given ID use :meth:`TreeSequence.site`.
"""
node: int
"""
The integer ID of the first node that inherits this mutation.
To obtain further information about a node with a given ID, use
:meth:`TreeSequence.node`.
"""
derived_state: str
"""
The derived state for this mutation. This is the state
inherited by nodes in the subtree rooted at this mutation's node, unless
another mutation occurs.
"""
parent: int
"""
The integer ID of this mutation's parent mutation. When multiple
mutations occur at a site along a path in the tree, mutations must
record the mutation that is immediately above them. If the mutation does
not have a parent, this is equal to the :data:`NULL` (-1).
To obtain further information about a mutation with a given ID, use
:meth:`TreeSequence.mutation`.
"""
metadata: bytes | dict | None
"""
The :ref:`metadata <sec_metadata_definition>` for this mutation, decoded if a schema
applies.
"""
time: float
"""
The occurrence time of this mutation.
"""
edge: int
"""
The ID of the edge that this mutation is on.
"""
# To get default values on slots we define a custom init
def __init__(
self,
id=NULL, # noqa A003
site=NULL,
node=NULL,
time=UNKNOWN_TIME,
derived_state=None,
parent=NULL,
metadata=b"",
edge=NULL,
):
self.id = id
self.site = site
self.node = node
self.time = time
self.derived_state = derived_state
self.parent = parent
self.metadata = metadata
self.edge = edge
# We need a custom eq to compare unknown times.
def __eq__(self, other):
return (
isinstance(other, Mutation)
and self.id == other.id
and self.site == other.site
and self.node == other.node
and self.derived_state == other.derived_state
and self.parent == other.parent
and self.edge == other.edge
and self.metadata == other.metadata
and (
self.time == other.time
or (
util.is_unknown_time(self.time) and util.is_unknown_time(other.time)
)
)
)
@metadata_module.lazy_decode()
@dataclass
class Migration(util.Dataclass):
"""
A :ref:`migration <sec_migration_table_definition>` in a tree sequence.
Modifying the attributes in this class will have **no effect** on the
underlying tree sequence data.
"""
__slots__ = ["left", "right", "node", "source", "dest", "time", "metadata", "id"]
left: float
"""
The left end of the genomic interval covered by this
migration (inclusive).
"""
right: float
"""
The right end of the genomic interval covered by this migration
(exclusive).
"""
node: int
"""
The integer ID of the node involved in this migration event.
To obtain further information about a node with a given ID, use
:meth:`TreeSequence.node`.
"""
source: int
"""
The source population ID.
"""
dest: int
"""
The destination population ID.
"""
time: float
"""
The time at which this migration occurred at.
"""
metadata: bytes | dict | None
"""
The :ref:`metadata <sec_metadata_definition>` for this migration, decoded if a schema
applies.
"""
id: int # noqa A003
"""
The integer ID of this mutation. Varies from 0 to
:attr:`TreeSequence.num_mutations` - 1.
"""
@metadata_module.lazy_decode()
@dataclass
class Population(util.Dataclass):
"""
A :ref:`population <sec_population_table_definition>` in a tree sequence.
Modifying the attributes in this class will have **no effect** on the
underlying tree sequence data.
"""
__slots__ = ["id", "metadata"]
id: int # noqa A003
"""
The integer ID of this population. Varies from 0 to
:attr:`TreeSequence.num_populations` - 1.
"""
metadata: bytes | dict | None
"""
The :ref:`metadata <sec_metadata_definition>` for this population, decoded if a
schema applies.
"""
@dataclass
class Edgeset(util.Dataclass):
__slots__ = ["left", "right", "parent", "children"]
left: int
right: int
parent: int
children: np.ndarray
# We need a custom eq for the numpy array
def __eq__(self, other):
return (
isinstance(other, Edgeset)
and self.left == other.left
and self.right == other.right
and self.parent == other.parent
and np.array_equal(self.children, other.children)
)
@dataclass
class Provenance(util.Dataclass):
"""
A provenance entry in a tree sequence, detailing how this tree
sequence was generated, or subsequent operations on it (see :ref:`sec_provenance`).
"""
__slots__ = ["id", "timestamp", "record"]
id: int # noqa A003
timestamp: str
"""
The time that this entry was made
"""
record: str
"""
A JSON string giving details of the provenance (see :ref:`sec_provenance_example`
for an example JSON string)
"""
class Tree:
"""
A single tree in a :class:`TreeSequence`. Please see the
:ref:`tutorials:sec_processing_trees` section for information
on how efficiently access trees sequentially or obtain a list
of individual trees in a tree sequence.
The ``sample_lists`` parameter controls the features that are enabled
for this tree. If ``sample_lists`` is True a more efficient algorithm is
used in the :meth:`Tree.samples` method.
The ``tracked_samples`` parameter can be used to efficiently count the
number of samples in a given set that exist in a particular subtree
using the :meth:`Tree.num_tracked_samples` method.
The :class:`Tree` class is a state-machine which has a state
corresponding to each of the trees in the parent tree sequence. We
transition between these states by using the seek functions like
:meth:`Tree.first`, :meth:`Tree.last`, :meth:`Tree.seek` and
:meth:`Tree.seek_index`. There is one more state, the so-called "null"
or "cleared" state. This is the state that a :class:`Tree` is in
immediately after initialisation; it has an index of -1, and no edges. We
can also enter the null state by calling :meth:`Tree.next` on the last
tree in a sequence, calling :meth:`Tree.prev` on the first tree in a
sequence or calling calling the :meth:`Tree.clear` method at any time.
The high-level TreeSequence seeking and iterations methods (e.g,
:meth:`TreeSequence.trees`) are built on these low-level state-machine
seek operations. We recommend these higher level operations for most
users.
:param TreeSequence tree_sequence: The parent tree sequence.
:param list tracked_samples: The list of samples to be tracked and
counted using the :meth:`Tree.num_tracked_samples` method.
:param bool sample_lists: If True, provide more efficient access
to the samples beneath a given node using the
:meth:`Tree.samples` method.
:param int root_threshold: The minimum number of samples that a node
must be ancestral to for it to be in the list of roots. By default
this is 1, so that isolated samples (representing missing data)
are roots. To efficiently restrict the roots of the tree to
those subtending meaningful topology, set this to 2. This value
is only relevant when trees have multiple roots.
:param bool sample_counts: Deprecated since 0.2.4.
"""
def __init__(
self,
tree_sequence,
tracked_samples=None,
*,
sample_lists=False,
root_threshold=1,
sample_counts=None,
):
options = 0
if sample_counts is not None:
warnings.warn(
"The sample_counts option is not supported since 0.2.4 "
"and is ignored",
RuntimeWarning,
stacklevel=4,
)
if sample_lists:
options |= _tskit.SAMPLE_LISTS
kwargs = {"options": options}
if root_threshold <= 0:
raise ValueError("Root threshold must be greater than 0")
if tracked_samples is not None:
# TODO remove this when we allow numpy arrays in the low-level API.
kwargs["tracked_samples"] = list(tracked_samples)
self._tree_sequence = tree_sequence
self._ll_tree = _tskit.Tree(tree_sequence.ll_tree_sequence, **kwargs)
self._ll_tree.set_root_threshold(root_threshold)
self._make_arrays()
def copy(self):
"""
Returns a deep copy of this tree. The returned tree will have identical state
to this tree.
:return: A copy of this tree.
:rtype: Tree
"""
copy = type(self).__new__(type(self))
copy._tree_sequence = self._tree_sequence
copy._ll_tree = self._ll_tree.copy()
copy._make_arrays()
return copy
# TODO make this method public and document it.
# Note that this probably does not cover all corner cases correctly
# https://github.com/tskit-dev/tskit/issues/1908
def _has_isolated_samples(self):
# TODO Is this definition correct for a single-node tree sequence?
for root in self.roots:
# If the root has no children then it must be a sample
if self.left_child(root) == NULL:
return True
return False
def _make_arrays(self):
# Store the low-level arrays for efficiency. There's no real overhead
# in this, because the refer to the same underlying memory as the
# tree object.
self._parent_array = self._ll_tree.parent_array
self._left_child_array = self._ll_tree.left_child_array
self._right_child_array = self._ll_tree.right_child_array
self._left_sib_array = self._ll_tree.left_sib_array
self._right_sib_array = self._ll_tree.right_sib_array
self._num_children_array = self._ll_tree.num_children_array
self._edge_array = self._ll_tree.edge_array
@property
def tree_sequence(self):
"""
Returns the tree sequence that this tree is from.
:return: The parent tree sequence for this tree.
:rtype: :class:`TreeSequence`
"""
return self._tree_sequence
@property
def root_threshold(self):
"""
Returns the minimum number of samples that a node must be an ancestor
of to be considered a potential root. This can be set, for example, when
calling the :meth:`TreeSequence.trees` iterator.
:return: The root threshold.
:rtype: :class:`TreeSequence`
"""
return self._ll_tree.get_root_threshold()
def __eq__(self, other):
ret = False
if type(other) is type(self):
ret = bool(self._ll_tree.equals(other._ll_tree))
return ret
def __ne__(self, other):
return not self.__eq__(other)
def first(self):
"""
Seeks to the first tree in the sequence. This can be called whether
the tree is in the null state or not.
"""
self._ll_tree.first()
def last(self):
"""
Seeks to the last tree in the sequence. This can be called whether
the tree is in the null state or not.
"""
self._ll_tree.last()
def next(self): # noqa A002
"""
Seeks to the next tree in the sequence. If the tree is in the initial
null state we seek to the first tree (equivalent to calling :meth:`~Tree.first`).
Calling ``next`` on the last tree in the sequence results in the tree
being cleared back into the null initial state (equivalent to calling
:meth:`~Tree.clear`). The return value of the function indicates whether the
tree is in a non-null state, and can be used to loop over the trees::
# Iterate over the trees from left-to-right
tree = tskit.Tree(tree_sequence)
while tree.next()
# Do something with the tree.
print(tree.index)
# tree is now back in the null state.
:return: True if the tree has been transformed into one of the trees
in the sequence; False if the tree has been transformed into the
null state.
:rtype: bool
"""
return bool(self._ll_tree.next())
def prev(self):
"""
Seeks to the previous tree in the sequence. If the tree is in the initial
null state we seek to the last tree (equivalent to calling :meth:`~Tree.last`).
Calling ``prev`` on the first tree in the sequence results in the tree
being cleared back into the null initial state (equivalent to calling
:meth:`~Tree.clear`). The return value of the function indicates whether the
tree is in a non-null state, and can be used to loop over the trees::
# Iterate over the trees from right-to-left
tree = tskit.Tree(tree_sequence)
while tree.prev()
# Do something with the tree.
print(tree.index)
# tree is now back in the null state.
:return: True if the tree has been transformed into one of the trees
in the sequence; False if the tree has been transformed into the
null state.
:rtype: bool
"""
return bool(self._ll_tree.prev())
def clear(self):
"""
Resets this tree back to the initial null state. Calling this method
on a tree already in the null state has no effect.
"""
self._ll_tree.clear()
def seek_index(self, index):
"""
Sets the state to represent the tree at the specified
index in the parent tree sequence. Negative indexes following the
standard Python conventions are allowed, i.e., ``index=-1`` will
seek to the last tree in the sequence.
.. include:: substitutions/linear_traversal_warning.rst
:param int index: The tree index to seek to.
:raises IndexError: If an index outside the acceptable range is provided.
"""
num_trees = self.tree_sequence.num_trees
if index < 0:
index += num_trees
if index < 0 or index >= num_trees:
raise IndexError("Index out of bounds")
self._ll_tree.seek_index(index)
def seek(self, position):
"""
Sets the state to represent the tree that covers the specified
position in the parent tree sequence. After a successful return
of this method we have ``tree.interval.left`` <= ``position``
< ``tree.interval.right``.
.. include:: substitutions/linear_traversal_warning.rst
:param float position: The position along the sequence length to
seek to.
:raises ValueError: If 0 < position or position >=
:attr:`TreeSequence.sequence_length`.
"""
if position < 0 or position >= self.tree_sequence.sequence_length:
raise ValueError("Position out of bounds")
self._ll_tree.seek(position)
def rank(self) -> tskit.Rank:
"""
Produce the rank of this tree in the enumeration of all leaf-labelled
trees of n leaves. See the :ref:`sec_tree_ranks` section for
details on ranking and unranking trees.
:raises ValueError: If the tree has multiple roots.
"""
return combinatorics.RankTree.from_tsk_tree(self).rank()
@staticmethod
def unrank(num_leaves, rank, *, span=1, branch_length=1) -> Tree:
"""
Reconstruct the tree of the given ``rank``
(see :meth:`tskit.Tree.rank`) with ``num_leaves`` leaves.
The labels and times of internal nodes are assigned by a postorder
traversal of the nodes, such that the time of each internal node
is the maximum time of its children plus the specified ``branch_length``.
The time of each leaf is 0.
See the :ref:`sec_tree_ranks` section for details on ranking and
unranking trees and what constitutes valid ranks.
:param int num_leaves: The number of leaves of the tree to generate.
:param tuple(int) rank: The rank of the tree to generate.
:param float span: The genomic span of the returned tree. The tree will cover
the interval :math:`[0, \\text{span})` and the :attr:`~Tree.tree_sequence`
from which the tree is taken will have its
:attr:`~tskit.TreeSequence.sequence_length` equal to ``span``.
:param: float branch_length: The minimum length of a branch in this tree.
:raises ValueError: If the given rank is out of bounds for trees
with ``num_leaves`` leaves.
"""
rank_tree = combinatorics.RankTree.unrank(num_leaves, rank)
return rank_tree.to_tsk_tree(span=span, branch_length=branch_length)
def count_topologies(self, sample_sets=None) -> tskit.TopologyCounter:
"""
Calculates the distribution of embedded topologies for every combination
of the sample sets in ``sample_sets``. ``sample_sets`` defaults to all
samples in the tree grouped by population.
``sample_sets`` need not include all samples but must be pairwise disjoint.
The returned object is a :class:`tskit.TopologyCounter` that contains
counts of topologies per combination of sample sets. For example,
>>> topology_counter = tree.count_topologies()
>>> rank, count = topology_counter[0, 1, 2].most_common(1)[0]
produces the most common tree topology, with populations 0, 1
and 2 as its tips, according to the genealogies of those
populations' samples in this tree.
The counts for each topology in the :class:`tskit.TopologyCounter`
are absolute counts that we would get if we were to select all
combinations of samples from the relevant sample sets.
For sample sets :math:`[s_0, s_1, ..., s_n]`, the total number of
topologies for those sample sets is equal to
:math:`|s_0| * |s_1| * ... * |s_n|`, so the counts in the counter
``topology_counter[0, 1, ..., n]`` should sum to
:math:`|s_0| * |s_1| * ... * |s_n|`.
To convert the topology counts to probabilities, divide by the total
possible number of sample combinations from the sample sets in question::
>>> set_sizes = [len(sample_set) for sample_set in sample_sets]
>>> p = count / (set_sizes[0] * set_sizes[1] * set_sizes[2])
.. warning:: The interface for this method is preliminary and may be subject to
backwards incompatible changes in the near future.
:param list sample_sets: A list of lists of Node IDs, specifying the
groups of nodes to compute the statistic with.
Defaults to all samples grouped by population.
:raises ValueError: If nodes in ``sample_sets`` are invalid or are
internal samples.
"""
if sample_sets is None:
sample_sets = [
self.tree_sequence.samples(population=pop.id)
for pop in self.tree_sequence.populations()
]
return combinatorics.tree_count_topologies(self, sample_sets)
def get_branch_length(self, u):
# Deprecated alias for branch_length
return self.branch_length(u)
def branch_length(self, u):
"""
Returns the length of the branch (in units of time) joining the
specified node to its parent. This is equivalent to
>>> tree.time(tree.parent(u)) - tree.time(u)
The branch length for a node that has no parent (e.g., a root) is
defined as zero.
Note that this is not related to the property `.length` which
is a deprecated alias for the genomic :attr:`~Tree.span` covered by a tree.
:param int u: The node of interest.
:return: The branch length from u to its parent.
:rtype: float
"""
ret = 0
parent = self.parent(u)
if parent != NULL:
ret = self.time(parent) - self.time(u)
return ret
def get_total_branch_length(self):
# Deprecated alias for total_branch_length
return self.total_branch_length
@property
def total_branch_length(self):
"""
Returns the sum of all the branch lengths in this tree (in
units of time). This is equivalent to
>>> sum(tree.branch_length(u) for u in tree.nodes())
Note that the branch lengths for root nodes are defined as zero.
As this is defined by a traversal of the tree, technically we
return the sum of all branch lengths that are reachable from
roots. Thus, this is the total length of all branches that are connected
to at least one sample. This distinction is only important
in tree sequences that contain 'dead branches', i.e., those
that define topology that is not connected to a tree root
(see :ref:`sec_data_model_tree_dead_leaves_and_branches`)
:return: The sum of lengths of branches in this tree.
:rtype: float
"""
return self._ll_tree.get_total_branch_length()
def get_mrca(self, u, v):
# Deprecated alias for mrca
return self.mrca(u, v)
def mrca(self, *args):
"""
Returns the most recent common ancestor of the specified nodes.
:param int `*args`: input node IDs, at least 2 arguments are required.
:return: The node ID of the most recent common ancestor of the