diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index 2bf28f5446..3d09d0b1af 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -7,6 +7,10 @@ - SVG drawing routines now return a special string object that is automatically rendered in a Jupyter notebook (:user:`hyanwong`, :pr:`2377`) +**Features** + +- New ``Site.alleles()`` method (:user:`hyanwong`, :issue:`2380`, :pr:`2385`) + -------------------- [0.5.0] - 2022-06-22 diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py index 96b6dadad5..32db07cf83 100644 --- a/python/tests/test_highlevel.py +++ b/python/tests/test_highlevel.py @@ -2592,6 +2592,29 @@ def test_individual_properties(self, n): self.verify_individual_properties(ts) +class TestSiteAlleles: + def test_no_mutations(self): + tables = tskit.TableCollection(sequence_length=1) + tables.sites.add_row(0, ancestral_state="") + site = tables.tree_sequence().site(0) + assert site.alleles == {""} + + @pytest.mark.parametrize("k", range(5)) + def test_k_mutations(self, k): + tables = tskit.TableCollection(sequence_length=1) + tables.sites.add_row(0, ancestral_state="ABC") + tables.nodes.add_row(1, 0) + tables.nodes.add_row(1, 0) # will not have any mutations => missing + for j in range(k): + tables.mutations.add_row(site=0, node=0, derived_state=str(j)) + ts = tables.tree_sequence() + variant = next(ts.variants()) + assert variant.has_missing_data + assert len(variant.site.alleles) == k + 1 + assert "ABC" in variant.site.alleles + assert variant.site.alleles == set(variant.alleles[:-1]) + + class TestEdgeDiffs: @pytest.mark.parametrize("ts", get_example_tree_sequences()) def test_correct_trees_forward(self, ts): diff --git a/python/tskit/trees.py b/python/tskit/trees.py index bfdec57ae4..a09c062060 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -357,6 +357,22 @@ def __eq__(self, other): and self.metadata == other.metadata ) + @property + def alleles(self) -> set[str]: + """ + Return the set of all the alleles defined at this site + + .. note:: + This deliberately returns an (unordered) *set* of the possible allelic + states (as defined by the site's ancestral allele and its associated + mutations). If you wish to obtain an (ordered) *list* of alleles, for + example to translate the numeric genotypes at a site into allelic states, + you should instead use ``.alleles`` attribute of the :class:`Variant` class, + which unlike this attribute includes ``None`` as a state when there is + missing data at a site. + """ + return {self.ancestral_state} | {m.derived_state for m in self.mutations} + @metadata_module.lazy_decode() @dataclass