Skip to content

Commit 1374dbb

Browse files
authored
Merge pull request #672 from benjeffery/mutation_time
Mutation time
2 parents a9964b7 + bbef335 commit 1374dbb

31 files changed

+1285
-152
lines changed

c/CHANGELOG.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ In development.
66

77
**Breaking changes**
88

9+
- ``tsk_mutation_table_add_row`` has an extra ``time`` argument. If the time
10+
is unknown ``TSK_UNKNOWN_TIME`` should be passed.
11+
(:user:`benjeffery`, :pr:`672`)
12+
913
- Change genotypes from unsigned to signed to accommodate missing data
1014
(see :issue:`144` for discussion). This only affects users of the
1115
``tsk_vargen_t`` class. Genotypes are now stored as int8_t and int16_t
@@ -34,6 +38,14 @@ In development.
3438

3539
**New features**
3640

41+
- Mutations now have an optional double-precision floating-point ``time`` column.
42+
If not specified, this defaults to a particular NaN value (``TSK_UNKNOWN_TIME``)
43+
indicating that the time is unknown. For a tree sequence to be considered valid
44+
it must meet new criteria for mutation times, see :ref:`sec_mutation_requirements`.
45+
Add ``tsk_table_collection_compute_mutation_times`` and new flag to
46+
``tsk_table_collection_check_integrity``:``TSK_CHECK_MUTATION_TIME``.
47+
(:user:`benjeffery`, :pr:`672`)
48+
3749
- Add ``metadata`` and ``metadata_schema`` fields to table collection, with accessors on
3850
tree sequence. These store arbitrary bytes and are optional in the file format.
3951
(:user: `benjeffery`, :pr:`641`)

c/tests/test_core.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,17 @@ test_blkalloc(void)
334334
tsk_blkalloc_free(&alloc);
335335
}
336336

337+
static void
338+
test_unknown_time(void)
339+
{
340+
CU_ASSERT_TRUE(isnan(TSK_UNKNOWN_TIME));
341+
CU_ASSERT_TRUE(tsk_is_unknown_time(TSK_UNKNOWN_TIME));
342+
CU_ASSERT_FALSE(tsk_is_unknown_time(NAN));
343+
CU_ASSERT_FALSE(tsk_is_unknown_time(0));
344+
CU_ASSERT_FALSE(tsk_is_unknown_time(INFINITY));
345+
CU_ASSERT_FALSE(tsk_is_unknown_time(1));
346+
}
347+
337348
int
338349
main(int argc, char **argv)
339350
{
@@ -343,6 +354,7 @@ main(int argc, char **argv)
343354
{ "test_generate_uuid", test_generate_uuid },
344355
{ "test_double_round", test_double_round },
345356
{ "test_blkalloc", test_blkalloc },
357+
{ "test_unknown_time", test_unknown_time },
346358
{ NULL, NULL },
347359
};
348360

c/tests/test_genotypes.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -649,8 +649,8 @@ test_single_tree_many_alleles(void)
649649
/* Add j mutations over a single node. */
650650
for (j = 0; j < (tsk_id_t) num_alleles; j++) {
651651
/* When j = 0 we get a parent of -1, which is the NULL_NODE */
652-
ret = tsk_mutation_table_add_row(
653-
&tables.mutations, 0, 0, j - 1, alleles, (tsk_size_t) j, NULL, 0);
652+
ret = tsk_mutation_table_add_row(&tables.mutations, 0, 0, j - 1,
653+
TSK_UNKNOWN_TIME, alleles, (tsk_size_t) j, NULL, 0);
654654
CU_ASSERT_FATAL(ret >= 0);
655655
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
656656
CU_ASSERT_EQUAL_FATAL(ret, 0);

c/tests/test_tables.c

Lines changed: 112 additions & 47 deletions
Large diffs are not rendered by default.

c/tests/test_trees.c

Lines changed: 176 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,29 @@ verify_compute_mutation_parents(tsk_treeseq_t *ts)
128128
tsk_table_collection_free(&tables);
129129
}
130130

131+
static void
132+
verify_compute_mutation_times(tsk_treeseq_t *ts)
133+
{
134+
int ret;
135+
size_t size = tsk_treeseq_get_num_mutations(ts) * sizeof(tsk_id_t);
136+
tsk_id_t *time = malloc(size);
137+
tsk_table_collection_t tables;
138+
139+
CU_ASSERT_FATAL(time != NULL);
140+
ret = tsk_treeseq_copy_tables(ts, &tables, 0);
141+
CU_ASSERT_EQUAL_FATAL(ret, 0);
142+
memcpy(time, tables.mutations.time, size);
143+
/* Make sure the tables are actually updated */
144+
memset(tables.mutations.time, 0, size);
145+
146+
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
147+
CU_ASSERT_EQUAL_FATAL(ret, 0);
148+
CU_ASSERT_EQUAL_FATAL(memcmp(time, tables.mutations.time, size), 0);
149+
150+
free(time);
151+
tsk_table_collection_free(&tables);
152+
}
153+
131154
static void
132155
verify_individual_nodes(tsk_treeseq_t *ts)
133156
{
@@ -3140,12 +3163,12 @@ test_single_tree_bad_mutations(void)
31403163
const char *sites = "0 0\n"
31413164
"0.1 0\n"
31423165
"0.2 0\n";
3143-
const char *mutations = "0 0 1 -1\n"
3144-
"1 1 1 -1\n"
3145-
"2 4 1 -1\n"
3146-
"2 1 0 2\n"
3147-
"2 1 1 3\n"
3148-
"2 2 1 -1\n";
3166+
const char *mutations = "0 0 1 -1 0\n"
3167+
"1 1 1 -1 0\n"
3168+
"2 4 1 -1 1\n"
3169+
"2 1 0 2 0\n"
3170+
"2 1 1 3 0\n"
3171+
"2 2 1 -1 0\n";
31493172
tsk_treeseq_t ts;
31503173
tsk_table_collection_t tables;
31513174
tsk_flags_t load_flags = TSK_BUILD_INDEXES;
@@ -3282,6 +3305,27 @@ test_single_tree_bad_mutations(void)
32823305
tsk_treeseq_free(&ts);
32833306
tables.mutations.parent[2] = TSK_NULL;
32843307

3308+
/* time < node time */
3309+
tables.mutations.time[2] = 0;
3310+
ret = tsk_treeseq_init(&ts, &tables, load_flags);
3311+
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE);
3312+
tsk_treeseq_free(&ts);
3313+
tables.mutations.time[2] = 1;
3314+
3315+
/* time > parent mutation */
3316+
tables.mutations.time[4] = 0.5;
3317+
ret = tsk_treeseq_init(&ts, &tables, load_flags);
3318+
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION);
3319+
tsk_treeseq_free(&ts);
3320+
tables.mutations.time[4] = 0;
3321+
3322+
/* time > parent node */
3323+
tables.mutations.time[0] = 1.5;
3324+
ret = tsk_treeseq_init(&ts, &tables, load_flags);
3325+
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE);
3326+
tsk_treeseq_free(&ts);
3327+
tables.mutations.time[0] = 0;
3328+
32853329
/* Check to make sure we've maintained legal mutations */
32863330
ret = tsk_treeseq_init(&ts, &tables, load_flags);
32873331
CU_ASSERT_EQUAL(ret, 0);
@@ -3714,12 +3758,12 @@ test_single_tree_compute_mutation_parents(void)
37143758
const char *sites = "0 0\n"
37153759
"0.1 0\n"
37163760
"0.2 0\n";
3717-
const char *mutations = "0 0 1 -1\n"
3718-
"1 1 1 -1\n"
3719-
"2 4 1 -1\n"
3720-
"2 1 0 2\n"
3721-
"2 1 1 3\n"
3722-
"2 2 1 -1\n";
3761+
const char *mutations = "0 0 1 -1 0\n"
3762+
"1 1 1 -1 0\n"
3763+
"2 4 1 -1 1\n"
3764+
"2 1 0 2 0\n"
3765+
"2 1 1 3 0\n"
3766+
"2 2 1 -1 0\n";
37233767
tsk_treeseq_t ts;
37243768
tsk_table_collection_t tables;
37253769

@@ -3748,9 +3792,6 @@ test_single_tree_compute_mutation_parents(void)
37483792

37493793
/* Compute the mutation parents */
37503794
verify_compute_mutation_parents(&ts);
3751-
3752-
/* Verify consistency of individuals */
3753-
verify_individual_nodes(&ts);
37543795
tsk_treeseq_free(&ts);
37553796

37563797
/* Bad site reference */
@@ -3815,6 +3856,123 @@ test_single_tree_compute_mutation_parents(void)
38153856
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);
38163857
tsk_treeseq_free(&ts);
38173858

3859+
tsk_table_collection_free(&tables);
3860+
}
3861+
3862+
static void
3863+
test_single_tree_compute_mutation_times(void)
3864+
{
3865+
int ret = 0;
3866+
const char *sites = "0 0\n"
3867+
"0.1 0\n"
3868+
"0.2 0\n"
3869+
"0.3 0\n";
3870+
const char *mutations = "0 0 1 -1 3\n"
3871+
"1 1 1 -1 3\n"
3872+
"2 4 1 -1 8\n"
3873+
"2 1 0 2 4\n"
3874+
"2 1 1 3 2\n"
3875+
"2 2 1 -1 4\n"
3876+
"3 6 1 -1 10\n";
3877+
/* 6 */
3878+
/* 6 */
3879+
/* / \ */
3880+
/* / \ */
3881+
/* 2 \ */
3882+
/* / 5 */
3883+
/* 4 / \ */
3884+
/* 0 1,3,4 5 \ */
3885+
/* 0 1 2 3 */
3886+
3887+
tsk_treeseq_t ts;
3888+
tsk_table_collection_t tables;
3889+
3890+
ret = tsk_table_collection_init(&tables, 0);
3891+
CU_ASSERT_EQUAL_FATAL(ret, 0);
3892+
3893+
tables.sequence_length = 1;
3894+
parse_nodes(single_tree_ex_nodes, &tables.nodes);
3895+
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);
3896+
tables.nodes.time[4] = 6;
3897+
tables.nodes.time[5] = 8;
3898+
tables.nodes.time[6] = 10;
3899+
parse_edges(single_tree_ex_edges, &tables.edges);
3900+
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);
3901+
parse_sites(sites, &tables.sites);
3902+
parse_mutations(mutations, &tables.mutations);
3903+
CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 4);
3904+
CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 7);
3905+
tables.sequence_length = 1.0;
3906+
3907+
ret = tsk_table_collection_build_index(&tables, 0);
3908+
CU_ASSERT_EQUAL_FATAL(ret, 0);
3909+
3910+
/* Check to make sure we have legal mutations */
3911+
ret = tsk_treeseq_init(&ts, &tables, 0);
3912+
CU_ASSERT_EQUAL_FATAL(ret, 0);
3913+
3914+
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 4);
3915+
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 7);
3916+
3917+
/* Compute the mutation times */
3918+
verify_compute_mutation_times(&ts);
3919+
3920+
/* Verify consistency of individuals */
3921+
verify_individual_nodes(&ts);
3922+
tsk_treeseq_free(&ts);
3923+
3924+
/* Bad random param */
3925+
ret = tsk_table_collection_compute_mutation_times(&tables, (double *) 1, 0);
3926+
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
3927+
3928+
/* Bad site reference */
3929+
tables.mutations.site[0] = -1;
3930+
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
3931+
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
3932+
tables.mutations.site[0] = 0;
3933+
3934+
/* Bad site reference */
3935+
tables.mutations.site[0] = -1;
3936+
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
3937+
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
3938+
tables.mutations.site[0] = 0;
3939+
3940+
/* mutation sites out of order */
3941+
tables.mutations.site[0] = 2;
3942+
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
3943+
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_MUTATIONS);
3944+
tables.mutations.site[0] = 0;
3945+
3946+
/* sites out of order */
3947+
tables.sites.position[0] = 0.11;
3948+
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
3949+
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_SITES);
3950+
tables.sites.position[0] = 0;
3951+
3952+
/* Bad node reference */
3953+
tables.mutations.node[0] = -1;
3954+
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
3955+
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
3956+
tables.mutations.node[0] = 0;
3957+
3958+
/* Bad node reference */
3959+
tables.mutations.node[0] = (tsk_id_t) tables.nodes.num_rows;
3960+
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
3961+
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
3962+
tables.mutations.node[0] = 0;
3963+
3964+
/* Mutations not ordered by site */
3965+
tables.mutations.site[2] = 0;
3966+
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
3967+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_MUTATIONS);
3968+
tables.mutations.site[2] = 2;
3969+
3970+
ret = tsk_treeseq_init(&ts, &tables, 0);
3971+
CU_ASSERT_EQUAL(ret, 0);
3972+
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 4);
3973+
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 7);
3974+
tsk_treeseq_free(&ts);
3975+
38183976
tsk_treeseq_free(&ts);
38193977
tsk_table_collection_free(&tables);
38203978
}
@@ -5350,7 +5508,7 @@ test_deduplicate_sites_errors(void)
53505508
CU_ASSERT_EQUAL_FATAL(ret, 0);
53515509
ret = tsk_site_table_add_row(&tables.sites, 2, "TT", 2, "MM", 2);
53525510
CU_ASSERT_EQUAL_FATAL(ret, 1);
5353-
ret = tsk_mutation_table_add_row(&tables.mutations, 0, 0, -1, "T", 1, NULL, 0);
5511+
ret = tsk_mutation_table_add_row(&tables.mutations, 0, 0, -1, 0, "T", 1, NULL, 0);
53545512
CU_ASSERT_EQUAL_FATAL(ret, 0);
53555513
ret = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, NULL, 0);
53565514
CU_ASSERT_EQUAL_FATAL(ret, 0);
@@ -5732,6 +5890,8 @@ main(int argc, char **argv)
57325890
test_single_tree_simplify_null_samples },
57335891
{ "test_single_tree_compute_mutation_parents",
57345892
test_single_tree_compute_mutation_parents },
5893+
{ "test_single_tree_compute_mutation_times",
5894+
test_single_tree_compute_mutation_times },
57355895
{ "test_single_tree_is_descendant", test_single_tree_is_descendant },
57365896
{ "test_single_tree_map_mutations", test_single_tree_map_mutations },
57375897
{ "test_single_tree_map_mutations_internal_samples",

c/tests/testlib.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@ parse_mutations(const char *text, tsk_mutation_table_t *mutation_table)
369369
const char *whitespace = " \t";
370370
char *p;
371371
tsk_id_t node, site, parent;
372+
double time;
372373
char derived_state[MAX_LINE];
373374

374375
c = 0;
@@ -399,7 +400,12 @@ parse_mutations(const char *text, tsk_mutation_table_t *mutation_table)
399400
if (p != NULL) {
400401
parent = atoi(p);
401402
}
402-
ret = tsk_mutation_table_add_row(mutation_table, site, node, parent,
403+
time = TSK_UNKNOWN_TIME;
404+
p = strtok(NULL, whitespace);
405+
if (p != NULL) {
406+
time = atof(p);
407+
}
408+
ret = tsk_mutation_table_add_row(mutation_table, site, node, parent, time,
403409
derived_state, strlen(derived_state), NULL, 0);
404410
CU_ASSERT_FATAL(ret >= 0);
405411
}
@@ -606,7 +612,8 @@ caterpillar_tree(tsk_size_t n, tsk_size_t num_sites, tsk_size_t num_mutations)
606612
m = k % num_metadatas;
607613
state = (state + 1) % 2;
608614
ret = tsk_mutation_table_add_row(&tables.mutations, j, u, TSK_NULL,
609-
states[state], strlen(states[state]), metadata[m], strlen(metadata[m]));
615+
tables.nodes.time[u], states[state], strlen(states[state]), metadata[m],
616+
strlen(metadata[m]));
610617
CU_ASSERT_FATAL(ret >= 0);
611618
u--;
612619
}

c/tskit/core.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,18 @@ tsk_strerror_internal(int err)
287287
case TSK_ERR_UNSORTED_MUTATIONS:
288288
ret = "Mutations must be provided in non-decreasing site order";
289289
break;
290+
case TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE:
291+
ret = "A mutation's time must be >= the node time, or be marked as "
292+
"'unknown'";
293+
break;
294+
case TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION:
295+
ret = "A mutation's time must be <= the parent mutation time (if known), or "
296+
"be marked as 'unknown'";
297+
break;
298+
case TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE:
299+
ret = "A mutation's time must be < the parent node of the edge on which it "
300+
"occurs, or be marked as 'unknown'";
301+
break;
290302

291303
/* Sample errors */
292304
case TSK_ERR_DUPLICATE_SAMPLE:
@@ -625,3 +637,15 @@ tsk_round(double x, unsigned int ndigits)
625637
}
626638
return z;
627639
}
640+
641+
/* As NANs are not equal, use this function to check for equality to TSK_UNKNOWN_TIME */
642+
bool
643+
tsk_is_unknown_time(double val)
644+
{
645+
union {
646+
uint64_t i;
647+
double f;
648+
} nan_union;
649+
nan_union.f = val;
650+
return nan_union.i == TSK_UNKNOWN_TIME_HEX;
651+
}

0 commit comments

Comments
 (0)