Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions c/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@
compatible with the correct mutation parent.
(:user:`benjeffery`, :issue:`2729`, :issue:`2732`, :pr:`3212`).

- Mutations returned by ``tsk_treeseq_get_mutation`` now include pre-computed
``inherited_state`` and ``inherited_state_length`` fields. The inherited state
is computed during tree sequence initialization and represents the state that
existed at the site before each mutation occurred (either the ancestral state
if the mutation is the root mutation or the derived state of the parent mutation).
Note that this breaks ABI compatibility due to the addition of these fields
to the ``tsk_mutation_t`` struct.
(:user:`benjeffery`, :pr:`3277`, :issue:`2631`).

**Features**

- Add ``TSK_TS_INIT_COMPUTE_MUTATION_PARENTS`` to ``tsk_treeseq_init``
Expand Down
7 changes: 7 additions & 0 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -3998,24 +3998,31 @@ test_single_tree_good_mutations(void)
CU_ASSERT_EQUAL(other_mutations[0].id, 0);
CU_ASSERT_EQUAL(other_mutations[0].node, 2);
CU_ASSERT_NSTRING_EQUAL(other_mutations[0].derived_state, "1", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[0].inherited_state, "0", 1);
CU_ASSERT_EQUAL(other_mutations[1].id, 1);
CU_ASSERT_EQUAL(other_mutations[1].node, 4);
CU_ASSERT_NSTRING_EQUAL(other_mutations[1].derived_state, "1", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[1].inherited_state, "0", 1);
CU_ASSERT_EQUAL(other_mutations[2].id, 2);
CU_ASSERT_EQUAL(other_mutations[2].node, 0);
CU_ASSERT_NSTRING_EQUAL(other_mutations[2].derived_state, "0", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[2].inherited_state, "1", 1);
CU_ASSERT_EQUAL(other_mutations[3].id, 3);
CU_ASSERT_EQUAL(other_mutations[3].node, 0);
CU_ASSERT_NSTRING_EQUAL(other_mutations[3].derived_state, "1", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[3].inherited_state, "0", 1);
CU_ASSERT_EQUAL(other_mutations[4].id, 4);
CU_ASSERT_EQUAL(other_mutations[4].node, 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[4].derived_state, "1", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[4].inherited_state, "0", 1);
CU_ASSERT_EQUAL(other_mutations[5].id, 5);
CU_ASSERT_EQUAL(other_mutations[5].node, 2);
CU_ASSERT_NSTRING_EQUAL(other_mutations[5].derived_state, "1", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[5].inherited_state, "0", 1);
CU_ASSERT_EQUAL(other_mutations[6].id, 6);
CU_ASSERT_EQUAL(other_mutations[6].node, 3);
CU_ASSERT_NSTRING_EQUAL(other_mutations[6].derived_state, "1", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[6].inherited_state, "0", 1);

tsk_treeseq_free(&ts);
}
Expand Down
4 changes: 4 additions & 0 deletions c/tskit/tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ typedef struct {
/** @brief The ID of the edge that this mutation lies on, or TSK_NULL
if there is no corresponding edge.*/
tsk_id_t edge;
/** @brief Inherited state. */
const char *inherited_state;
/** @brief Size of the inherited state in bytes. */
tsk_size_t inherited_state_length;
} tsk_mutation_t;

/**
Expand Down
31 changes: 31 additions & 0 deletions c/tskit/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,13 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self)
const tsk_size_t num_nodes = self->tables->nodes.num_rows;
const double *restrict site_position = self->tables->sites.position;
const tsk_id_t *restrict mutation_site = self->tables->mutations.site;
const tsk_id_t *restrict mutation_parent = self->tables->mutations.parent;
const char *restrict sites_ancestral_state = self->tables->sites.ancestral_state;
const tsk_size_t *restrict sites_ancestral_state_offset
= self->tables->sites.ancestral_state_offset;
const char *restrict mutations_derived_state = self->tables->mutations.derived_state;
const tsk_size_t *restrict mutations_derived_state_offset
= self->tables->mutations.derived_state_offset;
const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;
const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;
const double *restrict edge_right = self->tables->edges.right;
Expand All @@ -262,6 +269,7 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self)
bool discrete_breakpoints = true;
tsk_id_t *node_edge_map = tsk_malloc(num_nodes * sizeof(*node_edge_map));
tsk_mutation_t *mutation;
tsk_id_t parent_id;

self->tree_sites_length
= tsk_malloc(num_trees_alloc * sizeof(*self->tree_sites_length));
Expand Down Expand Up @@ -311,6 +319,26 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self)
mutation_id < num_mutations && mutation_site[mutation_id] == site_id) {
mutation = self->site_mutations_mem + mutation_id;
mutation->edge = node_edge_map[mutation->node];

/* Compute inherited state */
if (mutation_parent[mutation_id] == TSK_NULL) {
/* No parent: inherited state is the site's ancestral state */
mutation->inherited_state
= sites_ancestral_state + sites_ancestral_state_offset[site_id];
mutation->inherited_state_length
= sites_ancestral_state_offset[site_id + 1]
- sites_ancestral_state_offset[site_id];
} else {
/* Has parent: inherited state is parent's derived state */
parent_id = mutation_parent[mutation_id];
mutation->inherited_state
= mutations_derived_state
+ mutations_derived_state_offset[parent_id];
mutation->inherited_state_length
= mutations_derived_state_offset[parent_id + 1]
- mutations_derived_state_offset[parent_id];
}

mutation_id++;
}
site_id++;
Expand Down Expand Up @@ -5228,6 +5256,9 @@ tsk_treeseq_get_mutation(
goto out;
}
mutation->edge = self->site_mutations_mem[index].edge;
mutation->inherited_state = self->site_mutations_mem[index].inherited_state;
mutation->inherited_state_length
= self->site_mutations_mem[index].inherited_state_length;
out:
return ret;
}
Expand Down
2 changes: 1 addition & 1 deletion docs/c-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ arbitrarily between releases.
stability, since the primary use-case is for tskit to be embedded
within another application rather than used as a shared library. If you
do intend to use tskit as a shared library and ABI stability is
therefore imporant to you, please let us know and we can plan
therefore important to you, please let us know and we can plan
accordingly.

.. _sec_c_api_overview_structure:
Expand Down
5 changes: 5 additions & 0 deletions docs/python-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,15 @@ for more information.
TreeSequence.edges_parent
TreeSequence.edges_child
TreeSequence.sites_position
TreeSequence.sites_ancestral_state
TreeSequence.mutations_site
TreeSequence.mutations_node
TreeSequence.mutations_parent
TreeSequence.mutations_time
TreeSequence.mutations_derived_state
TreeSequence.mutations_metadata
TreeSequence.mutations_edge
TreeSequence.mutations_inherited_state
TreeSequence.migrations_left
TreeSequence.migrations_right
TreeSequence.migrations_right
Expand Down
13 changes: 7 additions & 6 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,11 @@
- Add ``TreeSequence.mutations_edge`` which returns the edge ID for each mutation's
edge. (:user:`benjeffery`, :pr:`3226`, :issue:`3189`)

- Add ``TreeSequence.mutations_inherited_state`` which returns the inherited state
for each mutation. (:user:`benjeffery`, :pr:`3276`, :issue:`2631`)

- Add ``TreeSequence.sites_ancestral_state`` and ``TreeSequence.mutations_derived_state`` properties
to return the ancestral state of sites and derived state of mutations as NumPy arrays of
- Add ``TreeSequence.sites_ancestral_state``, ``TreeSequence.mutations_derived_state`` and
``TreeSequence.mutations_inherited_state`` properties to return the ancestral state of sites,
derived state of mutations and inherited state of mutations as NumPy arrays of
the new numpy 2.0 StringDType.
(:user:`benjeffery`, :pr:`3228`, :issue:`2632`)
(:user:`benjeffery`, :pr:`3228`, :issue:`2632`, :pr:`3276`, :issue:`2631`)

- Tskit now distributes with a requirement of numpy version 2 or greater. However, you can still use
tskit with numpy 1.X by building tskit from source with numpy 1.X using ``pip install tskit --no-binary tskit``.
Expand All @@ -45,6 +43,9 @@
the error "A module that was compiled using NumPy 1.x cannot be run in NumPy 2.0.0 as it may crash.".
If no newer version of the module is available you will have to use the Numpy 1.X build as above.

- Add ``Mutation.inherited_state`` property which returns the inherited state
for a single mutation. (:user:`benjeffery`, :pr:`3277`, :issue:`2631`)

**Bugfixes**

- In some tables with mutations out-of-order `TableCollection.sort` did not re-order
Expand Down
61 changes: 59 additions & 2 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,10 @@ make_mutation_object(const tsk_mutation_t *mutation)
if (metadata == NULL) {
goto out;
}
ret = Py_BuildValue("iis#iOdi", mutation->site, mutation->node,
ret = Py_BuildValue("iis#iOdis#", mutation->site, mutation->node,
mutation->derived_state, (Py_ssize_t) mutation->derived_state_length,
mutation->parent, metadata, mutation->time, mutation->edge);
mutation->parent, metadata, mutation->time, mutation->edge,
mutation->inherited_state, (Py_ssize_t) mutation->inherited_state_length);
out:
Py_XDECREF(metadata);
return ret;
Expand Down Expand Up @@ -11398,6 +11399,59 @@ TreeSequence_get_mutations_derived_state(TreeSequence *self, void *closure)
out:
return ret;
}
static PyObject *
TreeSequence_get_mutations_inherited_state(TreeSequence *self, void *closure)
{
PyObject *ret = NULL;
tsk_treeseq_t *ts;
tsk_size_t num_mutations;
char *inherited_state_data = NULL;
tsk_size_t *inherited_state_offsets = NULL;
tsk_size_t total_length = 0;
tsk_size_t j, offset;

if (TreeSequence_check_state(self) != 0) {
goto out;
}

ts = self->tree_sequence;
num_mutations = ts->tables->mutations.num_rows;

/* Calculate total length needed for inherited state data */
for (j = 0; j < num_mutations; j++) {
total_length += ts->site_mutations_mem[j].inherited_state_length;
}

/* Allocate memory for the ragged array */
inherited_state_data = PyMem_Malloc(total_length * sizeof(char));
inherited_state_offsets = PyMem_Malloc((num_mutations + 1) * sizeof(tsk_size_t));
if (inherited_state_data == NULL || inherited_state_offsets == NULL) {
PyErr_NoMemory();
goto out;
}

/* Populate the ragged array data */
offset = 0;
for (j = 0; j < num_mutations; j++) {
inherited_state_offsets[j] = offset;
memcpy(inherited_state_data + offset, ts->site_mutations_mem[j].inherited_state,
ts->site_mutations_mem[j].inherited_state_length);
offset += ts->site_mutations_mem[j].inherited_state_length;
}
inherited_state_offsets[num_mutations] = offset;

ret = TreeSequence_decode_ragged_string_column(
self, num_mutations, inherited_state_data, inherited_state_offsets);

out:
if (inherited_state_data != NULL) {
PyMem_Free(inherited_state_data);
}
if (inherited_state_offsets != NULL) {
PyMem_Free(inherited_state_offsets);
}
return ret;
}
#endif

static PyObject *
Expand Down Expand Up @@ -12017,6 +12071,9 @@ static PyGetSetDef TreeSequence_getsetters[] = {
{ .name = "mutations_derived_state",
.get = (getter) TreeSequence_get_mutations_derived_state,
.doc = "The mutation derived state array" },
{ .name = "mutations_inherited_state",
.get = (getter) TreeSequence_get_mutations_inherited_state,
.doc = "The mutation inherited state array" },
#endif
{ .name = "mutations_metadata",
.get = (getter) TreeSequence_get_mutations_metadata,
Expand Down
2 changes: 2 additions & 0 deletions python/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ def make_mutation(id_):
metadata,
time,
edge,
inherited_state,
) = ll_ts.get_mutation(id_)
return tskit.Mutation(
id=id_,
Expand All @@ -160,6 +161,7 @@ def make_mutation(id_):
parent=parent,
metadata=metadata,
edge=edge,
inherited_state=inherited_state,
metadata_decoder=tskit.metadata.parse_metadata_schema(
ll_ts.get_table_metadata_schemas().mutation
).decode_row,
Expand Down
23 changes: 23 additions & 0 deletions python/tests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1218,6 +1218,21 @@ def test_trees(self, ts):
def test_mutations(self, ts):
self.verify_mutations(ts)

@pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason="Requires NumPy 2.0 or higher")
@pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences())
def test_mutation_inherited_state_property(self, ts):
inherited_states = ts.mutations_inherited_state
for mut in ts.mutations():
expected = inherited_states[mut.id]
actual = mut.inherited_state
assert actual == expected

if mut.parent == tskit.NULL:
expected_direct = ts.site(mut.site).ancestral_state
else:
expected_direct = ts.mutation(mut.parent).derived_state
assert actual == expected_direct

def verify_pairwise_diversity(self, ts):
haplotypes = ts.genotype_matrix(isolated_as_missing=False).T
if ts.num_samples == 0:
Expand Down Expand Up @@ -5546,6 +5561,14 @@ def test_equality_mutations_derived_state(self, ts):
[mutation.derived_state for mutation in ts.mutations()],
)

@pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason="Requires NumPy 2.0 or higher")
@pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences())
def test_equality_mutations_inherited_state(self, ts):
assert_array_equal(
ts.mutations_inherited_state,
[mutation.inherited_state for mutation in ts.mutations()],
)

@pytest.mark.skipif(not _tskit.HAS_NUMPY_2, reason="Requires NumPy 2.0 or higher")
@pytest.mark.parametrize("ts", tsutil.get_example_tree_sequences())
def test_mutations_inherited_state(self, ts):
Expand Down
4 changes: 4 additions & 0 deletions python/tests/test_jit.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,10 @@ def test_jitwrap_properties(ts):
assert numba_ts.mutations_time.dtype == np.float64
nt.assert_array_equal(numba_ts.mutations_derived_state, ts.mutations_derived_state)
assert numba_ts.mutations_derived_state.dtype.kind == "U" # Unicode string
nt.assert_array_equal(
numba_ts.mutations_inherited_state, ts.mutations_inherited_state
)
assert numba_ts.mutations_inherited_state.dtype.kind == "U" # Unicode string
nt.assert_array_equal(
numba_ts.indexes_edge_insertion_order, ts.indexes_edge_insertion_order
)
Expand Down
Loading
Loading