Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion c/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
- Change the type of genotypes to ``int32_t``, removing the TSK_16_BIT_GENOTYPES flag option.
(:user:`benjeffery`, :issue:`463`, :pr:`2108`)

- ``tsk_variant_t`` now includes its ``tsk_site_t`` rather than pointing to it.
- ``tsk_variant_t`` now includes its ``tsk_site_t`` rather than pointing to it.
(:user:`benjeffery`, :issue:`2161`, :pr:`2162`)

- Rename TSK_TAKE_TABLES to TSK_TAKE_OWNERSHIP.
Expand Down Expand Up @@ -37,6 +37,9 @@
- Make dumping of tables and tree sequences to disk a zero-copy operation.
(:user:`benjeffery`, :issue:`2111`, :pr:`2124`)

- Add ``edge`` attribute to ``mutation_t`` struct make available in tree sequence.
(:user:`jeromekelleher`, :issue:`685`, :pr:`2279`)

----------------------
[0.99.15] - 2021-12-07
----------------------
Expand Down
133 changes: 133 additions & 0 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -3437,6 +3437,63 @@ test_simplest_chained_map_mutations(void)
tsk_treeseq_free(&ts);
}

static void
test_simplest_mutation_edges(void)
{
const char *nodes = "1 0 0\n"
"0 1 0\n"
"0 1 0";
const char *edges = "0 1 1 0\n"
"1 2 2 0\n";
const char *sites = "0.5 0\n"
"1.5 0\n";
const char *mutations = "0 1 1\n"
"0 0 1\n"
"0 2 1\n"
"1 0 1\n"
"1 1 1\n"
"1 2 1\n";
tsk_treeseq_t ts;
tsk_tree_t tree;
/* We have mutations over roots, samples and just isolated nodes */
tsk_id_t mutation_edges[] = { -1, 0, -1, 1, -1, -1 };
tsk_size_t i, j, k, t;
tsk_mutation_t mut;
tsk_site_t site;
int ret;

tsk_treeseq_from_text(&ts, 2, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);

CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);

for (j = 0; j < tsk_treeseq_get_num_mutations(&ts); j++) {
ret = tsk_treeseq_get_mutation(&ts, (tsk_id_t) j, &mut);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(mut.edge, mutation_edges[j]);
}

ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
i = 0;
for (t = 0; t < 2; t++) {
ret = tsk_tree_next(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
for (j = 0; j < tree.sites_length; j++) {
site = tree.sites[j];
for (k = 0; k < site.mutations_length; k++) {
CU_ASSERT_EQUAL(site.mutations[k].edge, mutation_edges[i]);
i++;
}
}
}
CU_ASSERT_EQUAL(i, 6);

tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}

/*=======================================================
* Single tree tests.
*======================================================*/
Expand Down Expand Up @@ -4465,6 +4522,44 @@ test_single_tree_compute_mutation_times(void)
tsk_table_collection_free(&tables);
}

static void
test_single_tree_mutation_edges(void)
{
int ret = 0;
tsk_size_t i, j, k;
tsk_treeseq_t ts;
tsk_tree_t tree;
tsk_mutation_t mut;
tsk_site_t site;
tsk_id_t mutation_edges[] = { 2, 4, 0, 0, 1, 2, 3 };

tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);

for (j = 0; j < 7; j++) {
ret = tsk_treeseq_get_mutation(&ts, (tsk_id_t) j, &mut);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(mut.edge, mutation_edges[j]);
}

ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
i = 0;
for (j = 0; j < tree.sites_length; j++) {
site = tree.sites[j];
for (k = 0; k < site.mutations_length; k++) {
CU_ASSERT_EQUAL(site.mutations[k].edge, mutation_edges[i]);
i++;
}
}
CU_ASSERT_EQUAL(i, 7);

tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}

static void
test_single_tree_is_descendant(void)
{
Expand Down Expand Up @@ -6557,6 +6652,40 @@ test_tree_errors(void)
tsk_treeseq_free(&ts);
}

static void
test_treeseq_row_access_errors(void)
{
int ret;
tsk_table_collection_t tables;
tsk_treeseq_t ts;

ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_treeseq_get_individual(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_node(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_edge(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_migration(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_site(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_mutation(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_population(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_provenance(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);

tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}

static void
test_tree_copy_flags(void)
{
Expand Down Expand Up @@ -7206,6 +7335,7 @@ main(int argc, char **argv)
{ "test_simplest_multiple_root_map_mutations",
test_simplest_multiple_root_map_mutations },
{ "test_simplest_chained_map_mutations", test_simplest_chained_map_mutations },
{ "test_simplest_mutation_edges", test_simplest_mutation_edges },

/* Single tree tests */
{ "test_single_tree_good_records", test_single_tree_good_records },
Expand All @@ -7232,6 +7362,7 @@ main(int argc, char **argv)
test_single_tree_compute_mutation_parents },
{ "test_single_tree_compute_mutation_times",
test_single_tree_compute_mutation_times },
{ "test_single_tree_mutation_edges", test_single_tree_mutation_edges },
{ "test_single_tree_is_descendant", test_single_tree_is_descendant },
{ "test_single_tree_total_branch_length", test_single_tree_total_branch_length },
{ "test_single_tree_map_mutations", test_single_tree_map_mutations },
Expand All @@ -7250,6 +7381,7 @@ main(int argc, char **argv)
test_simplify_keep_input_roots_multi_tree },
{ "test_left_to_right_multi_tree", test_left_to_right_multi_tree },
{ "test_gappy_multi_tree", test_gappy_multi_tree },

{ "test_tsk_treeseq_bad_records", test_tsk_treeseq_bad_records },

/* multiroot tests */
Expand Down Expand Up @@ -7304,6 +7436,7 @@ main(int argc, char **argv)

/* Misc */
{ "test_tree_errors", test_tree_errors },
{ "test_treeseq_row_access_errors", test_treeseq_row_access_errors },
{ "test_tree_copy_flags", test_tree_copy_flags },
{ "test_genealogical_nearest_neighbours_errors",
test_genealogical_nearest_neighbours_errors },
Expand Down
1 change: 1 addition & 0 deletions c/tskit/tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -4351,6 +4351,7 @@ tsk_mutation_table_get_row_unsafe(
row->metadata_length
= self->metadata_offset[index + 1] - self->metadata_offset[index];
row->metadata = self->metadata + self->metadata_offset[index];
row->edge = TSK_NULL;
}

int
Expand Down
15 changes: 15 additions & 0 deletions c/tskit/tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ typedef struct {
const char *metadata;
/** @brief Size of the metadata in bytes. */
tsk_size_t metadata_length;
/** @brief The ID of the edge that this mutation lies on, or TSK_NULL
if there is no corresponding edge.*/
tsk_id_t edge;
} tsk_mutation_t;

/**
Expand Down Expand Up @@ -2394,6 +2397,12 @@ int tsk_site_table_copy(

@rst
Updates the specified site struct to reflect the values in the specified row.

This function always sets the ``mutations`` and ``mutations_length``
fields in the parameter :c:struct:`tsk_site_t` to ``NULL`` and ``0`` respectively.
To get access to the mutations for a particular site, please use the
tree sequence method, :c:func:`tsk_treeseq_get_site`.

Pointers to memory within this struct are handled by the table and should **not**
be freed by client code. These pointers are guaranteed to be valid until the
next operation that modifies the table (e.g., by adding a new row), but not afterwards.
Expand Down Expand Up @@ -2728,6 +2737,12 @@ int tsk_mutation_table_copy(

@rst
Updates the specified mutation struct to reflect the values in the specified row.

This function always sets the ``edge`` field in parameter
:c:struct:`tsk_mutation_t` to ``TSK_NULL``. To determine the ID of
the edge associated with a particular mutation, please use the
tree sequence method, :c:func:`tsk_treeseq_get_mutation`.

Pointers to memory within this struct are handled by the table and should **not**
be freed by client code. These pointers are guaranteed to be valid until the
next operation that modifies the table (e.g., by adding a new row), but not afterwards.
Expand Down
49 changes: 39 additions & 10 deletions c/tskit/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ tsk_treeseq_init_sites(tsk_treeseq_t *self)
const tsk_id_t *restrict mutation_site = self->tables->mutations.site;
const double *restrict site_position = self->tables->sites.position;
bool discrete_sites = true;
tsk_mutation_t *mutation;

self->site_mutations_mem
= tsk_malloc(num_mutations * sizeof(*self->site_mutations_mem));
Expand All @@ -149,7 +150,8 @@ tsk_treeseq_init_sites(tsk_treeseq_t *self)
}

for (k = 0; k < (tsk_id_t) num_mutations; k++) {
ret = tsk_treeseq_get_mutation(self, k, self->site_mutations_mem + k);
mutation = self->site_mutations_mem + k;
ret = tsk_treeseq_get_mutation(self, k, mutation);
if (ret != 0) {
goto out;
}
Expand Down Expand Up @@ -242,45 +244,57 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self)
{
int ret = TSK_ERR_GENERIC;
tsk_size_t j, k, tree_index;
tsk_id_t site;
tsk_id_t site_id, edge_id, mutation_id;
double tree_left, tree_right;
const double sequence_length = self->tables->sequence_length;
const tsk_id_t num_sites = (tsk_id_t) self->tables->sites.num_rows;
const tsk_id_t num_mutations = (tsk_id_t) self->tables->mutations.num_rows;
const tsk_size_t num_edges = self->tables->edges.num_rows;
const tsk_size_t num_nodes = self->tables->nodes.num_rows;
const double *restrict site_position = self->tables->sites.position;
const tsk_id_t *restrict mutation_site = self->tables->mutations.site;
const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;
const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;
const double *restrict edge_right = self->tables->edges.right;
const double *restrict edge_left = self->tables->edges.left;
const tsk_id_t *restrict edge_child = self->tables->edges.child;
tsk_size_t num_trees_alloc = self->num_trees + 1;
bool discrete_breakpoints = true;
tsk_id_t *node_edge_map = tsk_malloc(num_nodes * sizeof(*node_edge_map));
tsk_mutation_t *mutation;

self->tree_sites_length
= tsk_malloc(num_trees_alloc * sizeof(*self->tree_sites_length));
self->tree_sites = tsk_malloc(num_trees_alloc * sizeof(*self->tree_sites));
self->breakpoints = tsk_malloc(num_trees_alloc * sizeof(*self->breakpoints));
if (self->tree_sites == NULL || self->tree_sites_length == NULL
|| self->breakpoints == NULL) {
if (node_edge_map == NULL || self->tree_sites == NULL
|| self->tree_sites_length == NULL || self->breakpoints == NULL) {
ret = TSK_ERR_NO_MEMORY;
goto out;
}
tsk_memset(
self->tree_sites_length, 0, self->num_trees * sizeof(*self->tree_sites_length));
tsk_memset(self->tree_sites, 0, self->num_trees * sizeof(*self->tree_sites));
tsk_memset(node_edge_map, TSK_NULL, num_nodes * sizeof(*node_edge_map));

tree_left = 0;
tree_right = sequence_length;
tree_index = 0;
site = 0;
site_id = 0;
mutation_id = 0;
j = 0;
k = 0;
while (j < num_edges || tree_left < sequence_length) {
discrete_breakpoints = discrete_breakpoints && is_discrete(tree_left);
self->breakpoints[tree_index] = tree_left;
while (k < num_edges && edge_right[O[k]] == tree_left) {
edge_id = O[k];
node_edge_map[edge_child[edge_id]] = TSK_NULL;
k++;
}
while (j < num_edges && edge_left[I[j]] == tree_left) {
edge_id = I[j];
node_edge_map[edge_child[edge_id]] = edge_id;
j++;
}
tree_right = sequence_length;
Expand All @@ -290,21 +304,28 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self)
if (k < num_edges) {
tree_right = TSK_MIN(tree_right, edge_right[O[k]]);
}
self->tree_sites[tree_index] = self->tree_sites_mem + site;
while (site < num_sites && site_position[site] < tree_right) {
self->tree_sites[tree_index] = self->tree_sites_mem + site_id;
while (site_id < num_sites && site_position[site_id] < tree_right) {
self->tree_sites_length[tree_index]++;
site++;
while (
mutation_id < num_mutations && mutation_site[mutation_id] == site_id) {
mutation = self->site_mutations_mem + mutation_id;
mutation->edge = node_edge_map[mutation->node];
mutation_id++;
}
site_id++;
}
tree_left = tree_right;
tree_index++;
}
tsk_bug_assert(site == num_sites);
tsk_bug_assert(site_id == num_sites);
tsk_bug_assert(tree_index == self->num_trees);
self->breakpoints[tree_index] = tree_right;
discrete_breakpoints = discrete_breakpoints && is_discrete(tree_right);
self->discrete_genome = self->discrete_genome && discrete_breakpoints;
ret = 0;
out:
tsk_safe_free(node_edge_map);
return ret;
}

Expand Down Expand Up @@ -3144,7 +3165,15 @@ int TSK_WARN_UNUSED
tsk_treeseq_get_mutation(
const tsk_treeseq_t *self, tsk_id_t index, tsk_mutation_t *mutation)
{
return tsk_mutation_table_get_row(&self->tables->mutations, index, mutation);
int ret = 0;

ret = tsk_mutation_table_get_row(&self->tables->mutations, index, mutation);
if (ret != 0) {
goto out;
}
mutation->edge = self->site_mutations_mem[index].edge;
out:
return ret;
}

int TSK_WARN_UNUSED
Expand Down
4 changes: 4 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
- ``tree.mrca`` now takes 2 or more arguments and gives the common ancestor of them all.
(:user:`savitakartik`, :issue:`1340`, :pr:`2121`)

- Add a ``edge`` attribute to the ``Mutation`` class that gives the ID of the
edge that the mutation falls on.
(:user:`jeromekelleher`, :issue:`685`, :pr:`2279`).

**Breaking Changes**

- The JSON metadata codec now interprets the empty string as an empty object. This means
Expand Down
Loading