Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions c/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ In development.

**Breaking changes**

- ``tsk_mutation_table_add_row`` has an extra ``time`` argument. If the time
is unknown ``TSK_UNKNOWN_TIME`` should be passed.
(:user:`benjeffery`, :pr:`672`)

- Change genotypes from unsigned to signed to accommodate missing data
(see :issue:`144` for discussion). This only affects users of the
``tsk_vargen_t`` class. Genotypes are now stored as int8_t and int16_t
Expand Down Expand Up @@ -34,6 +38,14 @@ In development.

**New features**

- Mutations now have an optional double-precision floating-point ``time`` column.
If not specified, this defaults to a particular NaN value (``TSK_UNKNOWN_TIME``)
indicating that the time is unknown. For a tree sequence to be considered valid
it must meet new criteria for mutation times, see :ref:`sec_mutation_requirements`.
Add ``tsk_table_collection_compute_mutation_times`` and new flag to
``tsk_table_collection_check_integrity``:``TSK_CHECK_MUTATION_TIME``.
(:user:`benjeffery`, :pr:`672`)

- Add ``metadata`` and ``metadata_schema`` fields to table collection, with accessors on
tree sequence. These store arbitrary bytes and are optional in the file format.
(:user: `benjeffery`, :pr:`641`)
Expand Down
12 changes: 12 additions & 0 deletions c/tests/test_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,17 @@ test_blkalloc(void)
tsk_blkalloc_free(&alloc);
}

static void
test_unknown_time(void)
{
CU_ASSERT_TRUE(isnan(TSK_UNKNOWN_TIME));
CU_ASSERT_TRUE(tsk_is_unknown_time(TSK_UNKNOWN_TIME));
CU_ASSERT_FALSE(tsk_is_unknown_time(NAN));
CU_ASSERT_FALSE(tsk_is_unknown_time(0));
CU_ASSERT_FALSE(tsk_is_unknown_time(INFINITY));
CU_ASSERT_FALSE(tsk_is_unknown_time(1));
}

int
main(int argc, char **argv)
{
Expand All @@ -343,6 +354,7 @@ main(int argc, char **argv)
{ "test_generate_uuid", test_generate_uuid },
{ "test_double_round", test_double_round },
{ "test_blkalloc", test_blkalloc },
{ "test_unknown_time", test_unknown_time },
{ NULL, NULL },
};

Expand Down
4 changes: 2 additions & 2 deletions c/tests/test_genotypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -649,8 +649,8 @@ test_single_tree_many_alleles(void)
/* Add j mutations over a single node. */
for (j = 0; j < (tsk_id_t) num_alleles; j++) {
/* When j = 0 we get a parent of -1, which is the NULL_NODE */
ret = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, j - 1, alleles, (tsk_size_t) j, NULL, 0);
ret = tsk_mutation_table_add_row(&tables.mutations, 0, 0, j - 1,
TSK_UNKNOWN_TIME, alleles, (tsk_size_t) j, NULL, 0);
CU_ASSERT_FATAL(ret >= 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
Expand Down
159 changes: 112 additions & 47 deletions c/tests/test_tables.c

Large diffs are not rendered by default.

192 changes: 176 additions & 16 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,29 @@ verify_compute_mutation_parents(tsk_treeseq_t *ts)
tsk_table_collection_free(&tables);
}

static void
verify_compute_mutation_times(tsk_treeseq_t *ts)
{
int ret;
size_t size = tsk_treeseq_get_num_mutations(ts) * sizeof(tsk_id_t);
tsk_id_t *time = malloc(size);
tsk_table_collection_t tables;

CU_ASSERT_FATAL(time != NULL);
ret = tsk_treeseq_copy_tables(ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
memcpy(time, tables.mutations.time, size);
/* Make sure the tables are actually updated */
memset(tables.mutations.time, 0, size);

ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(memcmp(time, tables.mutations.time, size), 0);

free(time);
tsk_table_collection_free(&tables);
}

static void
verify_individual_nodes(tsk_treeseq_t *ts)
{
Expand Down Expand Up @@ -3140,12 +3163,12 @@ test_single_tree_bad_mutations(void)
const char *sites = "0 0\n"
"0.1 0\n"
"0.2 0\n";
const char *mutations = "0 0 1 -1\n"
"1 1 1 -1\n"
"2 4 1 -1\n"
"2 1 0 2\n"
"2 1 1 3\n"
"2 2 1 -1\n";
const char *mutations = "0 0 1 -1 0\n"
"1 1 1 -1 0\n"
"2 4 1 -1 1\n"
"2 1 0 2 0\n"
"2 1 1 3 0\n"
"2 2 1 -1 0\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_flags_t load_flags = TSK_BUILD_INDEXES;
Expand Down Expand Up @@ -3282,6 +3305,27 @@ test_single_tree_bad_mutations(void)
tsk_treeseq_free(&ts);
tables.mutations.parent[2] = TSK_NULL;

/* time < node time */
tables.mutations.time[2] = 0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE);
tsk_treeseq_free(&ts);
tables.mutations.time[2] = 1;

/* time > parent mutation */
tables.mutations.time[4] = 0.5;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION);
tsk_treeseq_free(&ts);
tables.mutations.time[4] = 0;

/* time > parent node */
tables.mutations.time[0] = 1.5;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE);
tsk_treeseq_free(&ts);
tables.mutations.time[0] = 0;

/* Check to make sure we've maintained legal mutations */
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, 0);
Expand Down Expand Up @@ -3714,12 +3758,12 @@ test_single_tree_compute_mutation_parents(void)
const char *sites = "0 0\n"
"0.1 0\n"
"0.2 0\n";
const char *mutations = "0 0 1 -1\n"
"1 1 1 -1\n"
"2 4 1 -1\n"
"2 1 0 2\n"
"2 1 1 3\n"
"2 2 1 -1\n";
const char *mutations = "0 0 1 -1 0\n"
"1 1 1 -1 0\n"
"2 4 1 -1 1\n"
"2 1 0 2 0\n"
"2 1 1 3 0\n"
"2 2 1 -1 0\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;

Expand Down Expand Up @@ -3748,9 +3792,6 @@ test_single_tree_compute_mutation_parents(void)

/* Compute the mutation parents */
verify_compute_mutation_parents(&ts);

/* Verify consistency of individuals */
verify_individual_nodes(&ts);
tsk_treeseq_free(&ts);

/* Bad site reference */
Expand Down Expand Up @@ -3815,6 +3856,123 @@ test_single_tree_compute_mutation_parents(void)
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);
tsk_treeseq_free(&ts);

tsk_table_collection_free(&tables);
}

static void
test_single_tree_compute_mutation_times(void)
{
int ret = 0;
const char *sites = "0 0\n"
"0.1 0\n"
"0.2 0\n"
"0.3 0\n";
const char *mutations = "0 0 1 -1 3\n"
"1 1 1 -1 3\n"
"2 4 1 -1 8\n"
"2 1 0 2 4\n"
"2 1 1 3 2\n"
"2 2 1 -1 4\n"
"3 6 1 -1 10\n";
/* 6 */
/* 6 */
/* / \ */
/* / \ */
/* 2 \ */
/* / 5 */
/* 4 / \ */
/* 0 1,3,4 5 \ */
/* 0 1 2 3 */

tsk_treeseq_t ts;
tsk_table_collection_t tables;

ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

tables.sequence_length = 1;
parse_nodes(single_tree_ex_nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);
tables.nodes.time[4] = 6;
tables.nodes.time[5] = 8;
tables.nodes.time[6] = 10;
parse_edges(single_tree_ex_edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);
parse_sites(sites, &tables.sites);
parse_mutations(mutations, &tables.mutations);
CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 4);
CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 7);
tables.sequence_length = 1.0;

ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

/* Check to make sure we have legal mutations */
ret = tsk_treeseq_init(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 7);

/* Compute the mutation times */
verify_compute_mutation_times(&ts);

/* Verify consistency of individuals */
verify_individual_nodes(&ts);
tsk_treeseq_free(&ts);

/* Bad random param */
ret = tsk_table_collection_compute_mutation_times(&tables, (double *) 1, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);

/* Bad site reference */
tables.mutations.site[0] = -1;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
tables.mutations.site[0] = 0;

/* Bad site reference */
tables.mutations.site[0] = -1;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
tables.mutations.site[0] = 0;

/* mutation sites out of order */
tables.mutations.site[0] = 2;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_MUTATIONS);
tables.mutations.site[0] = 0;

/* sites out of order */
tables.sites.position[0] = 0.11;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_SITES);
tables.sites.position[0] = 0;

/* Bad node reference */
tables.mutations.node[0] = -1;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tables.mutations.node[0] = 0;

/* Bad node reference */
tables.mutations.node[0] = (tsk_id_t) tables.nodes.num_rows;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tables.mutations.node[0] = 0;

/* Mutations not ordered by site */
tables.mutations.site[2] = 0;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_MUTATIONS);
tables.mutations.site[2] = 2;

ret = tsk_treeseq_init(&ts, &tables, 0);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 7);
tsk_treeseq_free(&ts);

tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
Expand Down Expand Up @@ -5350,7 +5508,7 @@ test_deduplicate_sites_errors(void)
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_add_row(&tables.sites, 2, "TT", 2, "MM", 2);
CU_ASSERT_EQUAL_FATAL(ret, 1);
ret = tsk_mutation_table_add_row(&tables.mutations, 0, 0, -1, "T", 1, NULL, 0);
ret = tsk_mutation_table_add_row(&tables.mutations, 0, 0, -1, 0, "T", 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
Expand Down Expand Up @@ -5732,6 +5890,8 @@ main(int argc, char **argv)
test_single_tree_simplify_null_samples },
{ "test_single_tree_compute_mutation_parents",
test_single_tree_compute_mutation_parents },
{ "test_single_tree_compute_mutation_times",
test_single_tree_compute_mutation_times },
{ "test_single_tree_is_descendant", test_single_tree_is_descendant },
{ "test_single_tree_map_mutations", test_single_tree_map_mutations },
{ "test_single_tree_map_mutations_internal_samples",
Expand Down
11 changes: 9 additions & 2 deletions c/tests/testlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ parse_mutations(const char *text, tsk_mutation_table_t *mutation_table)
const char *whitespace = " \t";
char *p;
tsk_id_t node, site, parent;
double time;
char derived_state[MAX_LINE];

c = 0;
Expand Down Expand Up @@ -399,7 +400,12 @@ parse_mutations(const char *text, tsk_mutation_table_t *mutation_table)
if (p != NULL) {
parent = atoi(p);
}
ret = tsk_mutation_table_add_row(mutation_table, site, node, parent,
time = TSK_UNKNOWN_TIME;
p = strtok(NULL, whitespace);
if (p != NULL) {
time = atof(p);
}
ret = tsk_mutation_table_add_row(mutation_table, site, node, parent, time,
derived_state, strlen(derived_state), NULL, 0);
CU_ASSERT_FATAL(ret >= 0);
}
Expand Down Expand Up @@ -606,7 +612,8 @@ caterpillar_tree(tsk_size_t n, tsk_size_t num_sites, tsk_size_t num_mutations)
m = k % num_metadatas;
state = (state + 1) % 2;
ret = tsk_mutation_table_add_row(&tables.mutations, j, u, TSK_NULL,
states[state], strlen(states[state]), metadata[m], strlen(metadata[m]));
tables.nodes.time[u], states[state], strlen(states[state]), metadata[m],
strlen(metadata[m]));
CU_ASSERT_FATAL(ret >= 0);
u--;
}
Expand Down
24 changes: 24 additions & 0 deletions c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,18 @@ tsk_strerror_internal(int err)
case TSK_ERR_UNSORTED_MUTATIONS:
ret = "Mutations must be provided in non-decreasing site order";
break;
case TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE:
ret = "A mutation's time must be >= the node time, or be marked as "
"'unknown'";
break;
case TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION:
ret = "A mutation's time must be <= the parent mutation time (if known), or "
"be marked as 'unknown'";
break;
case TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE:
ret = "A mutation's time must be < the parent node of the edge on which it "
"occurs, or be marked as 'unknown'";
break;

/* Sample errors */
case TSK_ERR_DUPLICATE_SAMPLE:
Expand Down Expand Up @@ -625,3 +637,15 @@ tsk_round(double x, unsigned int ndigits)
}
return z;
}

/* As NANs are not equal, use this function to check for equality to TSK_UNKNOWN_TIME */
bool
tsk_is_unknown_time(double val)
{
union {
uint64_t i;
double f;
} nan_union;
nan_union.f = val;
return nan_union.i == TSK_UNKNOWN_TIME_HEX;
}
Loading