Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion c/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
**Features**

- Add ``parents`` to the individual table to enable recording of pedigrees
(:user:`ivan-krukov`, :user:`benjeffery`, :issue:`852`, :pr:`1125`, :pr:`866`, :pr:`1153`, :pr:`1177`).
(:user:`ivan-krukov`, :user:`benjeffery`, :issue:`852`, :pr:`1125`, :pr:`866`, :pr:`1153`, :pr:`1177`, :pr:`1199`).

- Added a ``tsk_table_collection_canonicalse`` method, that allows checking for equality between
tables that are equivalent up to reordering (:user:`petrelharp`, :user:`mufernando`, :pr:`1108`).
Expand Down
90 changes: 78 additions & 12 deletions c/tests/test_tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -3859,6 +3859,30 @@ test_sort_tables_offsets(void)
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);

/* Individuals must either all be sorted or all skipped */
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Add a parent relation that unsorts the table */
tables.individuals.parents[0] = 5;
ret = tsk_table_collection_copy(&tables, &copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);

memset(&bookmark, 0, sizeof(bookmark));
bookmark.individuals = tables.individuals.num_rows;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &copy, 0));

/* Check that sorting would have had an effect */
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tables, &copy, 0));

memset(&bookmark, 0, sizeof(bookmark));
bookmark.individuals = tables.individuals.num_rows - 1;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);

tsk_table_collection_free(&tables);
tsk_table_collection_free(&copy);
tsk_treeseq_free(ts);
Expand Down Expand Up @@ -3998,12 +4022,7 @@ test_sort_tables_errors(void)
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);

/* Individual, node, population and provenance positions are ignored */
memset(&pos, 0, sizeof(pos));
pos.individuals = 1;
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

/* Node, population and provenance positions are ignored */
memset(&pos, 0, sizeof(pos));
pos.nodes = 1;
ret = tsk_table_collection_sort(&tables, &pos, 0);
Expand Down Expand Up @@ -4311,6 +4330,52 @@ test_sort_tables_migrations(void)
free(ts);
}

static void
test_sort_tables_individuals(void)
{
int ret;
tsk_table_collection_t tables, copy;
const char *individuals = "1 0.25 2,3 0\n"
"2 0.5 5,-1 1\n"
"3 0.3 -1 2\n"
"4 0.3 -1 3\n"
"5 0.3 3 4\n"
"6 0.3 4 5\n";
const char *individuals_cycle = "1 0.2 2 0\n"
"2 0.5 0 1\n"
"3 0.3 1 2\n";

ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
parse_individuals(individuals, &tables.individuals);
ret = tsk_table_collection_check_integrity(&tables, TSK_CHECK_INDIVIDUAL_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS);

ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_check_integrity(&tables, TSK_CHECK_INDIVIDUAL_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, 0);

/* Check that the sort is stable */
ret = tsk_table_collection_copy(&tables, &copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &copy, 0));

ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT(tsk_table_collection_equals(&tables, &copy, 0));

/* Errors on cycle */
tsk_individual_table_clear(&tables.individuals);
parse_individuals(individuals_cycle, &tables.individuals);
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_PARENT_CYCLE);

tsk_table_collection_free(&tables);
tsk_table_collection_free(&copy);
}

static void
test_sorter_interface(void)
{
Expand Down Expand Up @@ -6083,7 +6148,6 @@ main(int argc, char **argv)
test_link_ancestors_samples_and_ancestors_overlap },
{ "test_link_ancestors_multiple_to_single_tree",
test_link_ancestors_multiple_to_single_tree },
{ "test_sort_tables_offsets", test_sort_tables_offsets },
{ "test_ibd_finder", test_ibd_finder },
{ "test_ibd_finder_multiple_trees", test_ibd_finder_multiple_trees },
{ "test_ibd_finder_empty_result", test_ibd_finder_empty_result },
Expand All @@ -6093,17 +6157,19 @@ main(int argc, char **argv)
{ "test_ibd_finder_multiple_ibd_paths", test_ibd_finder_multiple_ibd_paths },
{ "test_ibd_finder_odd_topologies", test_ibd_finder_odd_topologies },
{ "test_ibd_finder_errors", test_ibd_finder_errors },
{ "test_sorter_interface", test_sorter_interface },
{ "test_sort_tables_canonical_errors", test_sort_tables_canonical_errors },
{ "test_sort_tables_canonical", test_sort_tables_canonical },
{ "test_sort_tables_drops_indexes", test_sort_tables_drops_indexes },
{ "test_sort_tables_edge_metadata", test_sort_tables_edge_metadata },
{ "test_sort_tables_no_edge_metadata", test_sort_tables_no_edge_metadata },
{ "test_sort_tables_errors", test_sort_tables_errors },
{ "test_sort_tables_individuals", test_sort_tables_individuals },
{ "test_sort_tables_mutation_times", test_sort_tables_mutation_times },
{ "test_sort_tables_migrations", test_sort_tables_migrations },
{ "test_sort_tables_no_edge_metadata", test_sort_tables_no_edge_metadata },
{ "test_sort_tables_offsets", test_sort_tables_offsets },
{ "test_edge_update_invalidates_index", test_edge_update_invalidates_index },
{ "test_copy_table_collection", test_copy_table_collection },
{ "test_sort_tables_errors", test_sort_tables_errors },
{ "test_sort_tables_canonical_errors", test_sort_tables_canonical_errors },
{ "test_sort_tables_canonical", test_sort_tables_canonical },
{ "test_sorter_interface", test_sorter_interface },
{ "test_dump_unindexed", test_dump_unindexed },
{ "test_dump_load_empty", test_dump_load_empty },
{ "test_dump_load_unsorted", test_dump_load_unsorted },
Expand Down
4 changes: 4 additions & 0 deletions c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,10 @@ tsk_strerror_internal(int err)
case TSK_ERR_INDIVIDUAL_SELF_PARENT:
ret = "Individuals cannot be their own parents.";
break;

case TSK_ERR_INDIVIDUAL_PARENT_CYCLE:
ret = "Individuals cannot be their own ancestor.";
break;
}
return ret;
}
Expand Down
1 change: 1 addition & 0 deletions c/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ not found in the file.
/* Individual errors */
#define TSK_ERR_UNSORTED_INDIVIDUALS -1700
#define TSK_ERR_INDIVIDUAL_SELF_PARENT -1701
#define TSK_ERR_INDIVIDUAL_PARENT_CYCLE -1702

// clang-format on

Expand Down
137 changes: 137 additions & 0 deletions c/tskit/tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -4959,13 +4959,129 @@ tsk_table_sorter_sort_mutations_canonical(tsk_table_sorter_t *self)
return ret;
}

static int
tsk_table_sorter_sort_individuals(tsk_table_sorter_t *self)
{
int ret = 0;
tsk_id_t i;
tsk_individual_table_t copy;
tsk_individual_t individual;
tsk_individual_table_t *individuals = &self->tables->individuals;
tsk_node_table_t *nodes = &self->tables->nodes;
tsk_size_t num_individuals = individuals->num_rows;
tsk_size_t current_todo = 0;
tsk_size_t todo_insertion_point = 0;
tsk_size_t *incoming_edge_count
= malloc(num_individuals * sizeof(*incoming_edge_count));
tsk_id_t *individuals_todo
= malloc((num_individuals + 1) * sizeof(*individuals_todo));
tsk_id_t *new_id_map = malloc(num_individuals * sizeof(*new_id_map));

ret = tsk_individual_table_copy(individuals, &copy, 0);
if (ret != 0) {
goto out;
}

if (incoming_edge_count == NULL || individuals_todo == NULL || new_id_map == NULL) {
ret = TSK_ERR_NO_MEMORY;
goto out;
}

for (i = 0; i < (tsk_id_t) num_individuals; i++) {
incoming_edge_count[i] = 0;
individuals_todo[i] = TSK_NULL;
new_id_map[i] = TSK_NULL;
}
individuals_todo[num_individuals] = TSK_NULL; /* Sentinel value */

/* First find the set of individuals that have no children by creating
* an array of incoming edge counts */
for (i = 0; i < (tsk_id_t) individuals->parents_length; i++) {
if (individuals->parents[i] != TSK_NULL) {
incoming_edge_count[individuals->parents[i]]++;
}
}
/* Use these as the starting points for checking all individuals,
* doing this in reverse makes the sort stable */
for (i = (tsk_id_t) num_individuals - 1; i >= 0; i--) {
if (incoming_edge_count[i] == 0) {
individuals_todo[todo_insertion_point] = i;
todo_insertion_point++;
}
}

/* Now emit individuals from the set that have no children, removing their edges
* as we go adding new individuals to the no children set. */
while (individuals_todo[current_todo] != TSK_NULL) {
tsk_individual_table_get_row_unsafe(
individuals, individuals_todo[current_todo], &individual);
for (i = 0; i < (tsk_id_t) individual.parents_length; i++) {
if (individual.parents[i] != TSK_NULL) {
incoming_edge_count[individual.parents[i]]--;
if (incoming_edge_count[individual.parents[i]] == 0) {
individuals_todo[todo_insertion_point] = individual.parents[i];
todo_insertion_point++;
}
}
}
current_todo++;
}

/* Any edges left are parts of cycles */
for (i = 0; i < (tsk_id_t) num_individuals; i++) {
if (incoming_edge_count[i] > 0) {
ret = TSK_ERR_INDIVIDUAL_PARENT_CYCLE;
goto out;
}
}

ret = tsk_individual_table_clear(individuals);
if (ret != 0) {
goto out;
}

/* The sorted individuals are in reverse order */
for (i = (tsk_id_t) num_individuals - 1; i >= 0; i--) {
tsk_individual_table_get_row_unsafe(&copy, individuals_todo[i], &individual);
ret = tsk_individual_table_add_row(individuals, individual.flags,
individual.location, individual.location_length, individual.parents,
individual.parents_length, individual.metadata, individual.metadata_length);
if (ret < 0) {
goto out;
}
new_id_map[individuals_todo[i]] = ret;
}

/* Rewrite the parent ids */
for (i = 0; i < (tsk_id_t) individuals->parents_length; i++) {
if (individuals->parents[i] != TSK_NULL) {
individuals->parents[i] = new_id_map[individuals->parents[i]];
}
}
/* Rewrite the node individual ids */
for (i = 0; i < (tsk_id_t) nodes->num_rows; i++) {
if (nodes->individual[i] != TSK_NULL) {
nodes->individual[i] = new_id_map[nodes->individual[i]];
}
}

ret = 0;
out:
tsk_safe_free(incoming_edge_count);
tsk_safe_free(individuals_todo);
tsk_safe_free(new_id_map);
tsk_individual_table_free(&copy);
return ret;
}

int
tsk_table_sorter_run(tsk_table_sorter_t *self, const tsk_bookmark_t *start)
{
int ret = 0;
tsk_size_t edge_start = 0;
tsk_size_t migration_start = 0;
bool skip_sites = false;
bool skip_individuals = false;

if (start != NULL) {
if (start->edges > self->tables->edges.num_rows) {
Expand All @@ -4989,6 +5105,14 @@ tsk_table_sorter_run(tsk_table_sorter_t *self, const tsk_bookmark_t *start)
ret = TSK_ERR_SORT_OFFSET_NOT_SUPPORTED;
goto out;
}

/* Individuals also must be all sorted or skipped entirely */
if (start->individuals == self->tables->individuals.num_rows) {
skip_individuals = true;
} else if (start->individuals != 0) {
ret = TSK_ERR_SORT_OFFSET_NOT_SUPPORTED;
goto out;
}
}
/* The indexes will be invalidated, so drop them */
ret = tsk_table_collection_drop_index(self->tables, 0);
Expand Down Expand Up @@ -5019,6 +5143,12 @@ tsk_table_sorter_run(tsk_table_sorter_t *self, const tsk_bookmark_t *start)
goto out;
}
}
if (!skip_individuals) {
ret = tsk_table_sorter_sort_individuals(self);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
Expand Down Expand Up @@ -9751,6 +9881,13 @@ tsk_table_collection_subset(tsk_table_collection_t *self, const tsk_id_t *nodes,
}
}

/* Rewrite the individual parent ids */
for (k = 0; k < (tsk_id_t) self->individuals.parents_length; k++) {
if (self->individuals.parents[k] != TSK_NULL) {
self->individuals.parents[k] = individual_map[self->individuals.parents[k]];
}
}

ret = 0;
out:
tsk_safe_free(node_map);
Expand Down
2 changes: 1 addition & 1 deletion python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
label can be set (:user:`hyanwong`,:issue:`1155`, :issue:`1194`, :pr:`1182`, :pr:`1213`)

- Add ``parents`` column to the individual table to allow recording of pedigrees
(:user:`ivan-krukov`, :user:`benjeffery`, :issue:`852`, :pr:`1125`, :pr:`866`, :pr:`1153`, :pr:`1177` :pr:`1192`).
(:user:`ivan-krukov`, :user:`benjeffery`, :issue:`852`, :pr:`1125`, :pr:`866`, :pr:`1153`, :pr:`1177`, :pr:`1192` :pr:`1199`).

- Added ``Tree.generate_random_binary`` static method to create random
binary trees (:user:`hyanwong`, :user:`jeromekelleher`, :pr:`1037`).
Expand Down
Loading