From 71908cb778883c706b846b59a05bcec53cca6dbc Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Mon, 22 Sep 2025 14:29:10 +0100 Subject: [PATCH 1/3] Add mutation.inherited_state --- c/tests/test_trees.c | 7 +++++++ c/tskit/tables.h | 4 ++++ c/tskit/trees.c | 30 ++++++++++++++++++++++++++++++ docs/c-api.rst | 2 +- python/CHANGELOG.rst | 12 ++++++++---- python/_tskitmodule.c | 5 +++-- python/tests/__init__.py | 2 ++ python/tests/test_highlevel.py | 14 ++++++++++++++ python/tests/test_lowlevel.py | 13 ++++++++++++- python/tskit/trees.py | 12 ++++++++++++ 10 files changed, 93 insertions(+), 8 deletions(-) diff --git a/c/tests/test_trees.c b/c/tests/test_trees.c index 77060a85ba..2b0b042c88 100644 --- a/c/tests/test_trees.c +++ b/c/tests/test_trees.c @@ -3998,24 +3998,31 @@ test_single_tree_good_mutations(void) CU_ASSERT_EQUAL(other_mutations[0].id, 0); CU_ASSERT_EQUAL(other_mutations[0].node, 2); CU_ASSERT_NSTRING_EQUAL(other_mutations[0].derived_state, "1", 1); + CU_ASSERT_NSTRING_EQUAL(other_mutations[0].inherited_state, "0", 1); CU_ASSERT_EQUAL(other_mutations[1].id, 1); CU_ASSERT_EQUAL(other_mutations[1].node, 4); CU_ASSERT_NSTRING_EQUAL(other_mutations[1].derived_state, "1", 1); + CU_ASSERT_NSTRING_EQUAL(other_mutations[1].inherited_state, "0", 1); CU_ASSERT_EQUAL(other_mutations[2].id, 2); CU_ASSERT_EQUAL(other_mutations[2].node, 0); CU_ASSERT_NSTRING_EQUAL(other_mutations[2].derived_state, "0", 1); + CU_ASSERT_NSTRING_EQUAL(other_mutations[2].inherited_state, "1", 1); CU_ASSERT_EQUAL(other_mutations[3].id, 3); CU_ASSERT_EQUAL(other_mutations[3].node, 0); CU_ASSERT_NSTRING_EQUAL(other_mutations[3].derived_state, "1", 1); + CU_ASSERT_NSTRING_EQUAL(other_mutations[3].inherited_state, "0", 1); CU_ASSERT_EQUAL(other_mutations[4].id, 4); CU_ASSERT_EQUAL(other_mutations[4].node, 1); CU_ASSERT_NSTRING_EQUAL(other_mutations[4].derived_state, "1", 1); + CU_ASSERT_NSTRING_EQUAL(other_mutations[4].inherited_state, "0", 1); CU_ASSERT_EQUAL(other_mutations[5].id, 5); CU_ASSERT_EQUAL(other_mutations[5].node, 2); CU_ASSERT_NSTRING_EQUAL(other_mutations[5].derived_state, "1", 1); + CU_ASSERT_NSTRING_EQUAL(other_mutations[5].inherited_state, "0", 1); CU_ASSERT_EQUAL(other_mutations[6].id, 6); CU_ASSERT_EQUAL(other_mutations[6].node, 3); CU_ASSERT_NSTRING_EQUAL(other_mutations[6].derived_state, "1", 1); + CU_ASSERT_NSTRING_EQUAL(other_mutations[6].inherited_state, "0", 1); tsk_treeseq_free(&ts); } diff --git a/c/tskit/tables.h b/c/tskit/tables.h index ebeb366483..85ed29d58c 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -158,6 +158,10 @@ typedef struct { /** @brief The ID of the edge that this mutation lies on, or TSK_NULL if there is no corresponding edge.*/ tsk_id_t edge; + /** @brief Inherited state. */ + const char *inherited_state; + /** @brief Size of the inherited state in bytes. */ + tsk_size_t inherited_state_length; } tsk_mutation_t; /** diff --git a/c/tskit/trees.c b/c/tskit/trees.c index f0aaaba578..21e160e518 100644 --- a/c/tskit/trees.c +++ b/c/tskit/trees.c @@ -253,6 +253,13 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self) const tsk_size_t num_nodes = self->tables->nodes.num_rows; const double *restrict site_position = self->tables->sites.position; const tsk_id_t *restrict mutation_site = self->tables->mutations.site; + const tsk_id_t *restrict mutation_parent = self->tables->mutations.parent; + const char *restrict sites_ancestral_state = self->tables->sites.ancestral_state; + const tsk_size_t *restrict sites_ancestral_state_offset + = self->tables->sites.ancestral_state_offset; + const char *restrict mutations_derived_state = self->tables->mutations.derived_state; + const tsk_size_t *restrict mutations_derived_state_offset + = self->tables->mutations.derived_state_offset; const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order; const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order; const double *restrict edge_right = self->tables->edges.right; @@ -311,6 +318,26 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self) mutation_id < num_mutations && mutation_site[mutation_id] == site_id) { mutation = self->site_mutations_mem + mutation_id; mutation->edge = node_edge_map[mutation->node]; + + /* Compute inherited state */ + if (mutation_parent[mutation_id] == TSK_NULL) { + /* No parent: inherited state is the site's ancestral state */ + mutation->inherited_state + = sites_ancestral_state + sites_ancestral_state_offset[site_id]; + mutation->inherited_state_length + = sites_ancestral_state_offset[site_id + 1] + - sites_ancestral_state_offset[site_id]; + } else { + /* Has parent: inherited state is parent's derived state */ + tsk_id_t parent_id = mutation_parent[mutation_id]; + mutation->inherited_state + = mutations_derived_state + + mutations_derived_state_offset[parent_id]; + mutation->inherited_state_length + = mutations_derived_state_offset[parent_id + 1] + - mutations_derived_state_offset[parent_id]; + } + mutation_id++; } site_id++; @@ -5228,6 +5255,9 @@ tsk_treeseq_get_mutation( goto out; } mutation->edge = self->site_mutations_mem[index].edge; + mutation->inherited_state = self->site_mutations_mem[index].inherited_state; + mutation->inherited_state_length + = self->site_mutations_mem[index].inherited_state_length; out: return ret; } diff --git a/docs/c-api.rst b/docs/c-api.rst index 83f19ca9c1..a1f964113c 100644 --- a/docs/c-api.rst +++ b/docs/c-api.rst @@ -65,7 +65,7 @@ arbitrarily between releases. stability, since the primary use-case is for tskit to be embedded within another application rather than used as a shared library. If you do intend to use tskit as a shared library and ABI stability is - therefore imporant to you, please let us know and we can plan + therefore important to you, please let us know and we can plan accordingly. .. _sec_c_api_overview_structure: diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index 5d19a8e660..fd3d3d5b66 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -30,13 +30,14 @@ - Add ``TreeSequence.mutations_edge`` which returns the edge ID for each mutation's edge. (:user:`benjeffery`, :pr:`3226`, :issue:`3189`) -- Add ``TreeSequence.mutations_inherited_state`` which returns the inherited state +- Add which returns the inherited state for each mutation. (:user:`benjeffery`, :pr:`3276`, :issue:`2631`) -- Add ``TreeSequence.sites_ancestral_state`` and ``TreeSequence.mutations_derived_state`` properties - to return the ancestral state of sites and derived state of mutations as NumPy arrays of +- Add ``TreeSequence.sites_ancestral_state``, ``TreeSequence.mutations_derived_state`` and + ``TreeSequence.mutations_inherited_state`` properties to return the ancestral state of sites, + derived state of mutations and inherited state of mutations as NumPy arrays of the new numpy 2.0 StringDType. - (:user:`benjeffery`, :pr:`3228`, :issue:`2632`) + (:user:`benjeffery`, :pr:`3228`, :issue:`2632`, :pr:`3276`, :issue:`2631`) - Tskit now distributes with a requirement of numpy version 2 or greater. However, you can still use tskit with numpy 1.X by building tskit from source with numpy 1.X using ``pip install tskit --no-binary tskit``. @@ -45,6 +46,9 @@ the error "A module that was compiled using NumPy 1.x cannot be run in NumPy 2.0.0 as it may crash.". If no newer version of the module is available you will have to use the Numpy 1.X build as above. +- Add ``Mutation.inherited_state`` property which returns the inherited state + for a single mutation. (:user:`benjeffery`, :pr:`3277`, :issue:`2631`) + **Bugfixes** - In some tables with mutations out-of-order `TableCollection.sort` did not re-order diff --git a/python/_tskitmodule.c b/python/_tskitmodule.c index 8c11562722..4f2a675c44 100644 --- a/python/_tskitmodule.c +++ b/python/_tskitmodule.c @@ -363,9 +363,10 @@ make_mutation_object(const tsk_mutation_t *mutation) if (metadata == NULL) { goto out; } - ret = Py_BuildValue("iis#iOdi", mutation->site, mutation->node, + ret = Py_BuildValue("iis#iOdis#", mutation->site, mutation->node, mutation->derived_state, (Py_ssize_t) mutation->derived_state_length, - mutation->parent, metadata, mutation->time, mutation->edge); + mutation->parent, metadata, mutation->time, mutation->edge, + mutation->inherited_state, (Py_ssize_t) mutation->inherited_state_length); out: Py_XDECREF(metadata); return ret; diff --git a/python/tests/__init__.py b/python/tests/__init__.py index f069f04f2e..d4c39d400b 100644 --- a/python/tests/__init__.py +++ b/python/tests/__init__.py @@ -150,6 +150,7 @@ def make_mutation(id_): metadata, time, edge, + inherited_state, ) = ll_ts.get_mutation(id_) return tskit.Mutation( id=id_, @@ -160,6 +161,7 @@ def make_mutation(id_): parent=parent, metadata=metadata, edge=edge, + inherited_state=inherited_state, metadata_decoder=tskit.metadata.parse_metadata_schema( ll_ts.get_table_metadata_schemas().mutation ).decode_row, diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py index ac42a5ca9e..65879bfb3f 100644 --- a/python/tests/test_highlevel.py +++ b/python/tests/test_highlevel.py @@ -1218,6 +1218,20 @@ def test_trees(self, ts): def test_mutations(self, ts): self.verify_mutations(ts) + @pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences()) + def test_mutation_inherited_state_property(self, ts): + inherited_states = ts.mutations_inherited_state + for mut in ts.mutations(): + expected = inherited_states[mut.id] + actual = mut.inherited_state + assert actual == expected + + if mut.parent == tskit.NULL: + expected_direct = ts.site(mut.site).ancestral_state + else: + expected_direct = ts.mutation(mut.parent).derived_state + assert actual == expected_direct + def verify_pairwise_diversity(self, ts): haplotypes = ts.genotype_matrix(isolated_as_missing=False).T if ts.num_samples == 0: diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py index ee3e856628..5494a2a641 100644 --- a/python/tests/test_lowlevel.py +++ b/python/tests/test_lowlevel.py @@ -1336,7 +1336,16 @@ def test_get_mutation_interface(self): assert len(mutations) == ts.get_num_mutations() # Check the form of the mutations for packed in mutations: - site, node, derived_state, parent, metadata, time, edge = packed + ( + site, + node, + derived_state, + parent, + metadata, + time, + edge, + inherited_state, + ) = packed assert isinstance(site, int) assert isinstance(node, int) assert isinstance(derived_state, str) @@ -1344,6 +1353,7 @@ def test_get_mutation_interface(self): assert isinstance(metadata, bytes) assert isinstance(time, float) assert isinstance(edge, int) + assert isinstance(inherited_state, str) def test_get_edge_interface(self): for ts in self.get_example_tree_sequences(): @@ -3867,6 +3877,7 @@ def test_sites(self): metadata, time, edge, + inherited_state, ) = ts.get_mutation(mut_id) assert site == index assert mutation_id == mut_id diff --git a/python/tskit/trees.py b/python/tskit/trees.py index 5874392bd0..f43143c20a 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -425,6 +425,7 @@ class Mutation(util.Dataclass): "metadata", "time", "edge", + "inherited_state", ] id: int # noqa A003 """ @@ -473,6 +474,13 @@ class Mutation(util.Dataclass): """ The ID of the edge that this mutation is on. """ + inherited_state: str + """ + The inherited state for this mutation. This is the state that existed at the site + before this mutation occurred. This is either the ancestral state of the site + (if the mutation has no parent) or the derived state of the mutation's + parent mutation (if it has a parent). + """ # To get default values on slots we define a custom init def __init__( @@ -485,6 +493,7 @@ def __init__( parent=NULL, metadata=b"", edge=NULL, + inherited_state=None, ): self.id = id self.site = site @@ -494,6 +503,7 @@ def __init__( self.parent = parent self.metadata = metadata self.edge = edge + self.inherited_state = inherited_state # We need a custom eq to compare unknown times. def __eq__(self, other): @@ -6333,6 +6343,7 @@ def mutation(self, id_): metadata, time, edge, + inherited_state, ) = self._ll_tree_sequence.get_mutation(id_) return Mutation( id=id_, @@ -6343,6 +6354,7 @@ def mutation(self, id_): metadata=metadata, time=time, edge=edge, + inherited_state=inherited_state, metadata_decoder=self.table_metadata_schemas.mutation.decode_row, ) From 0527675987ade3ed36e15a94a14475316c81e2f3 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Tue, 23 Sep 2025 13:38:16 +0100 Subject: [PATCH 2/3] Move inherited_state array to CPython --- c/CHANGELOG.rst | 9 ++++++ c/tskit/trees.c | 3 +- docs/python-api.md | 1 + python/CHANGELOG.rst | 3 -- python/_tskitmodule.c | 56 ++++++++++++++++++++++++++++++++++ python/tests/test_highlevel.py | 8 +++++ python/tests/test_jit.py | 4 +++ python/tests/test_lowlevel.py | 32 ++++++++++++++++++- python/tskit/jit/numba.py | 22 +++++++++++-- python/tskit/trees.py | 14 ++++----- 10 files changed, 138 insertions(+), 14 deletions(-) diff --git a/c/CHANGELOG.rst b/c/CHANGELOG.rst index bd0017d145..014d5ca885 100644 --- a/c/CHANGELOG.rst +++ b/c/CHANGELOG.rst @@ -14,6 +14,15 @@ compatible with the correct mutation parent. (:user:`benjeffery`, :issue:`2729`, :issue:`2732`, :pr:`3212`). +- Mutations returned by ``tsk_treeseq_get_mutation`` now include pre-computed + ``inherited_state`` and ``inherited_state_length`` fields. The inherited state + is computed during tree sequence initialization and represents the state that + existed at the site before each mutation occurred (either the ancestral state + if the mutation is the root mutation or the derived state of the parent mutation). + Note that this breaks ABI compatibility due to the addition of these fields + to the ``tsk_mutation_t`` struct. + (:user:`benjeffery`, :pr:`3277`, :issue:`2631`). + **Features** - Add ``TSK_TS_INIT_COMPUTE_MUTATION_PARENTS`` to ``tsk_treeseq_init`` diff --git a/c/tskit/trees.c b/c/tskit/trees.c index 21e160e518..7a159a7fe4 100644 --- a/c/tskit/trees.c +++ b/c/tskit/trees.c @@ -269,6 +269,7 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self) bool discrete_breakpoints = true; tsk_id_t *node_edge_map = tsk_malloc(num_nodes * sizeof(*node_edge_map)); tsk_mutation_t *mutation; + tsk_id_t parent_id; self->tree_sites_length = tsk_malloc(num_trees_alloc * sizeof(*self->tree_sites_length)); @@ -329,7 +330,7 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self) - sites_ancestral_state_offset[site_id]; } else { /* Has parent: inherited state is parent's derived state */ - tsk_id_t parent_id = mutation_parent[mutation_id]; + parent_id = mutation_parent[mutation_id]; mutation->inherited_state = mutations_derived_state + mutations_derived_state_offset[parent_id]; diff --git a/docs/python-api.md b/docs/python-api.md index 10a52c8c6c..719866f3b8 100644 --- a/docs/python-api.md +++ b/docs/python-api.md @@ -92,6 +92,7 @@ for more information. TreeSequence.edges_parent TreeSequence.edges_child TreeSequence.sites_position + TreeSequence.sites_ancestral_state TreeSequence.mutations_site TreeSequence.mutations_node TreeSequence.mutations_parent diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index fd3d3d5b66..ab2393813d 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -30,9 +30,6 @@ - Add ``TreeSequence.mutations_edge`` which returns the edge ID for each mutation's edge. (:user:`benjeffery`, :pr:`3226`, :issue:`3189`) -- Add which returns the inherited state - for each mutation. (:user:`benjeffery`, :pr:`3276`, :issue:`2631`) - - Add ``TreeSequence.sites_ancestral_state``, ``TreeSequence.mutations_derived_state`` and ``TreeSequence.mutations_inherited_state`` properties to return the ancestral state of sites, derived state of mutations and inherited state of mutations as NumPy arrays of diff --git a/python/_tskitmodule.c b/python/_tskitmodule.c index 4f2a675c44..f595cf5150 100644 --- a/python/_tskitmodule.c +++ b/python/_tskitmodule.c @@ -11399,6 +11399,59 @@ TreeSequence_get_mutations_derived_state(TreeSequence *self, void *closure) out: return ret; } +static PyObject * +TreeSequence_get_mutations_inherited_state(TreeSequence *self, void *closure) +{ + PyObject *ret = NULL; + tsk_treeseq_t *ts; + tsk_size_t num_mutations; + char *inherited_state_data = NULL; + tsk_size_t *inherited_state_offsets = NULL; + tsk_size_t total_length = 0; + tsk_size_t j, offset; + + if (TreeSequence_check_state(self) != 0) { + goto out; + } + + ts = self->tree_sequence; + num_mutations = ts->tables->mutations.num_rows; + + /* Calculate total length needed for inherited state data */ + for (j = 0; j < num_mutations; j++) { + total_length += ts->site_mutations_mem[j].inherited_state_length; + } + + /* Allocate memory for the ragged array */ + inherited_state_data = PyMem_Malloc(total_length * sizeof(char)); + inherited_state_offsets = PyMem_Malloc((num_mutations + 1) * sizeof(tsk_size_t)); + if (inherited_state_data == NULL || inherited_state_offsets == NULL) { + PyErr_NoMemory(); + goto out; + } + + /* Populate the ragged array data */ + offset = 0; + for (j = 0; j < num_mutations; j++) { + inherited_state_offsets[j] = offset; + memcpy(inherited_state_data + offset, ts->site_mutations_mem[j].inherited_state, + ts->site_mutations_mem[j].inherited_state_length); + offset += ts->site_mutations_mem[j].inherited_state_length; + } + inherited_state_offsets[num_mutations] = offset; + + ret = TreeSequence_decode_ragged_string_column( + self, num_mutations, inherited_state_data, inherited_state_offsets); + +out: + if (inherited_state_data != NULL) { + PyMem_Free(inherited_state_data); + } + if (inherited_state_offsets != NULL) { + PyMem_Free(inherited_state_offsets); + } + return ret; +} #endif static PyObject * @@ -12018,6 +12071,9 @@ static PyGetSetDef TreeSequence_getsetters[] = { { .name = "mutations_derived_state", .get = (getter) TreeSequence_get_mutations_derived_state, .doc = "The mutation derived state array" }, + { .name = "mutations_inherited_state", + .get = (getter) TreeSequence_get_mutations_inherited_state, + .doc = "The mutation inherited state array" }, #endif { .name = "mutations_metadata", .get = (getter) TreeSequence_get_mutations_metadata, diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py index 65879bfb3f..177dfbed76 100644 --- a/python/tests/test_highlevel.py +++ b/python/tests/test_highlevel.py @@ -5560,6 +5560,14 @@ def test_equality_mutations_derived_state(self, ts): [mutation.derived_state for mutation in ts.mutations()], ) + @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason="Requires NumPy 2.0 or higher") + @pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences()) + def test_equality_mutations_inherited_state(self, ts): + assert_array_equal( + ts.mutations_inherited_state, + [mutation.inherited_state for mutation in ts.mutations()], + ) + @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason="Requires NumPy 2.0 or higher") @pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences()) def test_mutations_inherited_state(self, ts): diff --git a/python/tests/test_jit.py b/python/tests/test_jit.py index 0bdf9a5529..38aede1894 100644 --- a/python/tests/test_jit.py +++ b/python/tests/test_jit.py @@ -420,6 +420,10 @@ def test_jitwrap_properties(ts): assert numba_ts.mutations_time.dtype == np.float64 nt.assert_array_equal(numba_ts.mutations_derived_state, ts.mutations_derived_state) assert numba_ts.mutations_derived_state.dtype.kind == "U" # Unicode string + nt.assert_array_equal( + numba_ts.mutations_inherited_state, ts.mutations_inherited_state + ) + assert numba_ts.mutations_inherited_state.dtype.kind == "U" # Unicode string nt.assert_array_equal( numba_ts.indexes_edge_insertion_order, ts.indexes_edge_insertion_order ) diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py index 5494a2a641..1d702249b7 100644 --- a/python/tests/test_lowlevel.py +++ b/python/tests/test_lowlevel.py @@ -2192,7 +2192,12 @@ def test_generated_columns(self, ts_fixture, name): @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason="Requires NumPy 2.0+") @pytest.mark.parametrize( - "string_array", ["sites_ancestral_state", "mutations_derived_state"] + "string_array", + [ + "sites_ancestral_state", + "mutations_derived_state", + "mutations_inherited_state", + ], ) @pytest.mark.parametrize( "str_lengths", @@ -2210,6 +2215,9 @@ def test_string_arrays(self, ts_fixture, str_lengths, string_array): elif string_array == "mutations_derived_state": assert ts.num_mutations > 0 assert {len(mut.derived_state) for mut in ts.mutations()} == {1} + elif string_array == "mutations_inherited_state": + assert ts.num_mutations > 0 + assert {len(mut.inherited_state) for mut in ts.mutations()} == {1} else: tables = ts_fixture.dump_tables() @@ -2239,6 +2247,25 @@ def test_string_arrays(self, ts_fixture, str_lengths, string_array): derived_state=get_derived_state(i, mutation) ) ) + elif string_array == "mutations_inherited_state": + # For inherited state, we modify sites and mutations to create + # varied lengths + sites = tables.sites.copy() + tables.sites.clear() + get_ancestral_state = str_map[str_lengths] + for i, site in enumerate(sites): + tables.sites.append( + site.replace(ancestral_state=get_ancestral_state(i, site)) + ) + mutations = tables.mutations.copy() + tables.mutations.clear() + get_derived_state = str_map[str_lengths] + for i, mutation in enumerate(mutations): + tables.mutations.append( + mutation.replace( + derived_state=get_derived_state(i, mutation) + ) + ) ts = tables.tree_sequence() ll_ts = ts.ll_tree_sequence @@ -2255,6 +2282,9 @@ def test_string_arrays(self, ts_fixture, str_lengths, string_array): elif string_array == "mutations_derived_state": for mutation in ts.mutations(): assert a[mutation.id] == mutation.derived_state + elif string_array == "mutations_inherited_state": + for mutation in ts.mutations(): + assert a[mutation.id] == mutation.inherited_state # Read only with pytest.raises(AttributeError, match="not writable"): diff --git a/python/tskit/jit/numba.py b/python/tskit/jit/numba.py index b079e9d71f..40534431d1 100644 --- a/python/tskit/jit/numba.py +++ b/python/tskit/jit/numba.py @@ -402,9 +402,11 @@ def __init__( mutations_parent, mutations_time, mutations_derived_state, + mutations_inherited_state, breakpoints, max_ancestral_length, max_derived_length, + max_inherited_length, ): self.num_trees = num_trees self.num_nodes = num_nodes @@ -431,9 +433,11 @@ def __init__( self.mutations_parent = mutations_parent self.mutations_time = mutations_time self.mutations_derived_state = mutations_derived_state + self.mutations_inherited_state = mutations_inherited_state self.breakpoints = breakpoints self.max_ancestral_length = max_ancestral_length self.max_derived_length = max_derived_length + self.max_inherited_length = max_inherited_length def tree_index(self): """ @@ -526,7 +530,7 @@ def parent_index(self): # We cache these classes to avoid repeated JIT compilation @functools.lru_cache(None) -def _jitwrap(max_ancestral_length, max_derived_length): +def _jitwrap(max_ancestral_length, max_derived_length, max_inherited_length): # We have a circular dependency in JIT compilation between NumbaTreeSequence # and NumbaTreeIndex so we used a deferred type to break it tree_sequence_type = numba.deferred_type() @@ -576,9 +580,14 @@ def _jitwrap(max_ancestral_length, max_derived_length): ("mutations_parent", numba.int32[:]), ("mutations_time", numba.float64[:]), ("mutations_derived_state", numba.types.UnicodeCharSeq(max_derived_length)[:]), + ( + "mutations_inherited_state", + numba.types.UnicodeCharSeq(max_inherited_length)[:], + ), ("breakpoints", numba.float64[:]), ("max_ancestral_length", numba.int32), ("max_derived_length", numba.int32), + ("max_inherited_length", numba.int32), ] # The `tree_index` method on NumbaTreeSequence uses NumbaTreeIndex @@ -614,8 +623,13 @@ def jitwrap(ts): """ max_ancestral_length = max(1, max(map(len, ts.sites_ancestral_state), default=1)) max_derived_length = max(1, max(map(len, ts.mutations_derived_state), default=1)) + max_inherited_length = max( + 1, max(map(len, ts.mutations_inherited_state), default=1) + ) - JittedTreeSequence = _jitwrap(max_ancestral_length, max_derived_length) + JittedTreeSequence = _jitwrap( + max_ancestral_length, max_derived_length, max_inherited_length + ) # Create the tree sequence instance numba_ts = JittedTreeSequence( @@ -648,9 +662,13 @@ def jitwrap(ts): mutations_derived_state=ts.mutations_derived_state.astype( f"U{max_derived_length}" ), + mutations_inherited_state=ts.mutations_inherited_state.astype( + f"U{max_inherited_length}" + ), breakpoints=ts.breakpoints(as_array=True), max_ancestral_length=max_ancestral_length, max_derived_length=max_derived_length, + max_inherited_length=max_inherited_length, ) return numba_ts diff --git a/python/tskit/trees.py b/python/tskit/trees.py index f43143c20a..81c49c3224 100644 --- a/python/tskit/trees.py +++ b/python/tskit/trees.py @@ -6092,14 +6092,14 @@ def mutations_inherited_state(self): :return: Array of shape (num_mutations,) containing inherited states. :rtype: numpy.ndarray """ + if not _tskit.HAS_NUMPY_2: + raise RuntimeError( + "The mutations_inherited_state property requires numpy 2.0 or later." + ) if self._mutations_inherited_state is None: - inherited_state = self.sites_ancestral_state[self.mutations_site] - mutations_with_parent = self.mutations_parent != -1 - parent = self.mutations_parent[mutations_with_parent] - inherited_state[mutations_with_parent] = self.mutations_derived_state[ - parent - ] - self._mutations_inherited_state = inherited_state + self._mutations_inherited_state = ( + self._ll_tree_sequence.mutations_inherited_state + ) return self._mutations_inherited_state @property From f210492f91e09df71d720123fd8a130daab1d680 Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Mon, 22 Sep 2025 14:29:27 +0100 Subject: [PATCH 3/3] Document missing arrays --- docs/python-api.md | 4 ++++ python/tests/test_highlevel.py | 1 + 2 files changed, 5 insertions(+) diff --git a/docs/python-api.md b/docs/python-api.md index 719866f3b8..ae13ef1836 100644 --- a/docs/python-api.md +++ b/docs/python-api.md @@ -97,6 +97,10 @@ for more information. TreeSequence.mutations_node TreeSequence.mutations_parent TreeSequence.mutations_time + TreeSequence.mutations_derived_state + TreeSequence.mutations_metadata + TreeSequence.mutations_edge + TreeSequence.mutations_inherited_state TreeSequence.migrations_left TreeSequence.migrations_right TreeSequence.migrations_right diff --git a/python/tests/test_highlevel.py b/python/tests/test_highlevel.py index 177dfbed76..96d2947ff4 100644 --- a/python/tests/test_highlevel.py +++ b/python/tests/test_highlevel.py @@ -1218,6 +1218,7 @@ def test_trees(self, ts): def test_mutations(self, ts): self.verify_mutations(ts) + @pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason="Requires NumPy 2.0 or higher") @pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences()) def test_mutation_inherited_state_property(self, ts): inherited_states = ts.mutations_inherited_state