Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions c/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ In development.
variants for tree sequence and table collection load/dump
(:user:`jeromekelleher`, :user:`grahamgower`, :issue:`565`, :pr:`599`).

- Add low-level sorting API and ``TSK_NO_CHECK_INTEGRITY`` flag
(:user:`jeromekelleher`, :pr:`627`, :issue:`626`).

**Deprecated**

- The ``TSK_SAMPLE_COUNTS`` options is now ignored and will print out a warning
Expand Down
181 changes: 178 additions & 3 deletions c/tests/test_minimal_cpp.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
/*
* MIT License
/* * MIT License
*
* Copyright (c) 2019 Tskit Developers
* Copyright (c) 2019-2020 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -28,6 +27,9 @@
#include <iostream>
#include <cassert>
#include <sstream>
#include <vector>
#include <algorithm>
#include <cstring>

#include <tskit.h>

Expand Down Expand Up @@ -78,12 +80,185 @@ test_table_basics()
tsk_table_collection_free(&tables);
}

/* A definition of sort_edges that uses C++ std::sort and inlining of the
* comparison function to achieve significantly better performance than
* the builtin method in tskit.
*/
int
cpp_sort_edges(tsk_table_sorter_t *sorter, tsk_size_t start)
{
struct _edge {
double left, right;
tsk_id_t parent, child;

_edge(double l, double r, tsk_id_t p, tsk_id_t c)
: left{ l }, right{ r }, parent{ p }, child{ c }
{
}
};
tsk_edge_table_t *edges = &sorter->tables->edges;
const double *node_time = sorter->tables->nodes.time;
std::vector<_edge> sorted_edges;
size_t num_edges = edges->num_rows;
size_t j;

/* This is the comparison function. We cannot define an
* operator < for _edge because we need to bind the node times
* so we have to use a functional method. This is a copy of the cmp
* from fwdpp. Only difference is the final time comparison
* (fwdpp table times go forwards). */
const auto cmp = [&node_time](const _edge &lhs, const _edge &rhs) {
auto tl = node_time[lhs.parent];
auto tr = node_time[rhs.parent];
if (tl == tr) {
if (lhs.parent == rhs.parent) {
if (lhs.child == rhs.child) {
return lhs.left < rhs.left;
}
return lhs.child < rhs.child;
}
return lhs.parent < rhs.parent;
}
return tl < tr;
};

assert(start == 0);
/* Let's not bother with metadata */
assert(edges->metadata_length == 0);

sorted_edges.reserve(num_edges);
for (j = 0; j < num_edges; j++) {
sorted_edges.emplace_back(
edges->left[j], edges->right[j], edges->parent[j], edges->child[j]);
}

std::sort(begin(sorted_edges), end(sorted_edges), cmp);

for (j = 0; j < num_edges; j++) {
edges->left[j] = sorted_edges[j].left;
edges->right[j] = sorted_edges[j].right;
edges->parent[j] = sorted_edges[j].parent;
edges->child[j] = sorted_edges[j].child;
}
return 0;
}

void
test_edge_sorting()
{
std::cout << "test_edge_sorting" << endl;
tsk_table_collection_t tables;
tsk_id_t n = 10;
tsk_id_t j;
tsk_id_t ret = tsk_table_collection_init(&tables, 0);
assert(ret == 0);

tables.sequence_length = 1.0;
/* Make a stick tree */
/* Add nodes and edges */
for (j = 0; j < n; j++) {
ret = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, j + 1, TSK_NULL, TSK_NULL, NULL, 0);
assert(ret == j);
}
for (j = n - 1; j > 0; j--) {
tsk_edge_table_add_row(&tables.edges, 0, 1, j, j - 1, NULL, 0);
}
assert(tables.nodes.num_rows == (tsk_size_t) n);
assert(tables.edges.num_rows == (tsk_size_t) n - 1);

/* Make sure the edges are unsorted */
ret = tsk_table_collection_check_integrity(&tables, TSK_CHECK_EDGE_ORDERING);
assert(ret == TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME);

/* Sort the tables */
tsk_table_sorter_t sorter;
ret = tsk_table_sorter_init(&sorter, &tables, 0);
assert(ret == 0);
/* Set the sort_edges to our local C++ version. We could also set some
* persistent state in sorter.params if we wanted to. */
sorter.sort_edges = cpp_sort_edges;
ret = tsk_table_sorter_run(&sorter, NULL);
assert(ret == 0);
tsk_table_sorter_free(&sorter);

/* Make sure the edges are now sorted */
ret = tsk_table_collection_check_integrity(&tables, TSK_CHECK_EDGE_ORDERING);
assert(ret == 0);

tsk_table_collection_free(&tables);
}

int
sort_edges_raises_exception(tsk_table_sorter_t *sorter, tsk_size_t start)
{
throw std::exception();
return 0;
}

int
sort_edges_raises_non_exception(tsk_table_sorter_t *sorter, tsk_size_t start)
{
throw 42;
return 0;
}

int
safe_sort_edges(tsk_table_sorter_t *sorter, tsk_size_t start)
{
int ret = 0;
if (sorter->user_data == NULL) {
try {
ret = sort_edges_raises_exception(sorter, start);
} catch (...) {
ret = -12345;
}
} else {
try {
ret = sort_edges_raises_non_exception(sorter, start);
} catch (...) {
ret = -123456;
}
}
return ret;
}

void
test_edge_sorting_errors()
{
std::cout << "test_edge_sorting_errors" << endl;
tsk_table_collection_t tables;
tsk_table_sorter_t sorter;
tsk_id_t ret = tsk_table_collection_init(&tables, 0);

assert(ret == 0);
tables.sequence_length = 1.0;

ret = tsk_table_sorter_init(&sorter, &tables, 0);
assert(ret == 0);
sorter.sort_edges = safe_sort_edges;
ret = tsk_table_sorter_run(&sorter, NULL);
assert(ret == -12345);

/* Use the user_data as a way to communicate with the sorter
* function. Here, we want to try out two different types
* of exception that get thrown. */
sorter.user_data = &tables;
ret = tsk_table_sorter_run(&sorter, NULL);
assert(ret == -123456);

tsk_table_sorter_free(&sorter);
tsk_table_collection_free(&tables);
}

int
main()
{
test_kas_strerror();
test_strerror();
test_load_error();
test_table_basics();
test_edge_sorting();
test_edge_sorting_errors();
return 0;
}
96 changes: 95 additions & 1 deletion c/tests/test_tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -2427,7 +2427,7 @@ test_simplify_empty_tables(void)
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;

// ret = tsk_table_collection_simplify(&tables, NULL, 0, 0, NULL);
ret = tsk_table_collection_simplify(&tables, NULL, 0, 0, NULL);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unclear why this is in the diffs; is this just an unrelated bug fix that's along for the ride?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, exactly. I must have spotted it while I was in there.

CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 0);
Expand Down Expand Up @@ -2569,6 +2569,99 @@ test_sort_tables_errors(void)
tsk_treeseq_free(&ts);
}

static void
reverse_edges(tsk_table_collection_t *tables)
{
int ret;
tsk_edge_table_t edges;
tsk_edge_t edge;
tsk_id_t j;

ret = tsk_edge_table_init(&edges, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

for (j = (tsk_id_t) tables->edges.num_rows - 1; j >= 0; j--) {
ret = tsk_edge_table_get_row(&tables->edges, j, &edge);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_add_row(&edges, edge.left, edge.right, edge.parent,
edge.child, edge.metadata, edge.metadata_length);
CU_ASSERT_FATAL(ret >= 0);
}

ret = tsk_edge_table_copy(&edges, &tables->edges, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);

tsk_edge_table_free(&edges);
}

static void
test_sorter_interface(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_table_sorter_t sorter;

tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables));

/* Nominal case */
reverse_edges(&tables);
CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables));
ret = tsk_table_sorter_init(&sorter, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_sorter_run(&sorter, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables));
CU_ASSERT_EQUAL(sorter.user_data, NULL);
tsk_table_sorter_free(&sorter);

/* If we set the sort_edges function to NULL then we should leave the
* node table as is. */
reverse_edges(&tables);
CU_ASSERT_FALSE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges));
ret = tsk_table_sorter_init(&sorter, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
sorter.sort_edges = NULL;
ret = tsk_table_sorter_run(&sorter, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges));
tsk_table_sorter_free(&sorter);

/* Reversing again should make them equal */
reverse_edges(&tables);
CU_ASSERT_TRUE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges));

/* Do not check integrity before sorting */
reverse_edges(&tables);
CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables));
ret = tsk_table_sorter_init(&sorter, &tables, TSK_NO_CHECK_INTEGRITY);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_sorter_run(&sorter, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables));
tsk_table_sorter_free(&sorter);

/* The user_data shouldn't be touched */
reverse_edges(&tables);
CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables));
ret = tsk_table_sorter_init(&sorter, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
sorter.user_data = (void *) &ts;
ret = tsk_table_sorter_run(&sorter, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables));
CU_ASSERT_EQUAL_FATAL(sorter.user_data, &ts);
tsk_table_sorter_free(&sorter);

tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}

static void
test_dump_unindexed(void)
{
Expand Down Expand Up @@ -3453,6 +3546,7 @@ main(int argc, char **argv)
{ "test_sort_tables_drops_indexes", test_sort_tables_drops_indexes },
{ "test_copy_table_collection", test_copy_table_collection },
{ "test_sort_tables_errors", test_sort_tables_errors },
{ "test_sorter_interface", test_sorter_interface },
{ "test_dump_unindexed", test_dump_unindexed },
{ "test_dump_load_empty", test_dump_load_empty },
{ "test_dump_load_unsorted", test_dump_load_unsorted },
Expand Down
Loading