Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion c/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
--------------------
[1.2.0] - 2022-XX-XX
[1.1.1] - 2022-XX-XX
--------------------

**Bug fixes**

- Fix segfault in tsk_variant_restricted_copy in tree sequences with large
numbers of alleles or very long alleles
(:user:`jeromekelleher`, :pr:`2437`, :issue:`2429`).

--------------------
[1.1.0] - 2022-07-14
Expand Down
163 changes: 133 additions & 30 deletions c/tests/test_genotypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -958,53 +958,154 @@ test_variant_decode_errors(void)
tsk_treeseq_free(&ts);
}

/* Checks that the data represented by the specified pair of variants exposed
* by the public API is equal. */
static void
assert_variants_equal(const tsk_variant_t *v1, const tsk_variant_t *v2)
{
tsk_size_t j;

CU_ASSERT_EQUAL(v1->num_samples, v2->num_samples);
CU_ASSERT_EQUAL(v1->num_alleles, v2->num_alleles);
for (j = 0; j < v1->num_alleles; j++) {
CU_ASSERT_EQUAL(v1->allele_lengths[j], v2->allele_lengths[j]);
CU_ASSERT_EQUAL(
0, memcmp(v1->alleles[j], v2->alleles[j], (size_t) v1->allele_lengths[j]));
}
CU_ASSERT_EQUAL(v1->has_missing_data, v2->has_missing_data);
CU_ASSERT_EQUAL(v1->num_samples, v2->num_samples);
for (j = 0; j < v1->num_samples; j++) {
CU_ASSERT_EQUAL(v1->samples[j], v2->samples[j]);
CU_ASSERT_EQUAL(v1->genotypes[j], v2->genotypes[j]);
}
CU_ASSERT_EQUAL(v1->site.id, v2->site.id);
CU_ASSERT_EQUAL(v1->site.position, v2->site.position);
CU_ASSERT_EQUAL(v1->site.ancestral_state_length, v2->site.ancestral_state_length);
CU_ASSERT_EQUAL(0, memcmp(v1->site.ancestral_state, v2->site.ancestral_state,
(size_t) v1->site.ancestral_state_length));
CU_ASSERT_EQUAL(v1->site.mutations_length, v2->site.mutations_length);
/* We're pointing back to the same memory for embedded pointers */
CU_ASSERT_EQUAL(v1->site.mutations, v2->site.mutations);
CU_ASSERT_EQUAL(v1->site.metadata, v2->site.metadata);
}

static void
test_variant_copy(void)
{
int ret = 0;
tsk_size_t j;
tsk_treeseq_t ts;
tsk_variant_t var;
tsk_variant_t var_copy;
tsk_variant_t var, var_copy;

tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);

ret = tsk_variant_init(&var, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_decode(&var, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_restricted_copy(&var, &var_copy);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_decode(&var_copy, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_VARIANT_CANT_DECODE_COPY);

CU_ASSERT_EQUAL(
0, memcmp(var.tree_sequence, var.tree_sequence, sizeof(*var.tree_sequence)));
CU_ASSERT_EQUAL(0, memcmp(&var.tree, &var_copy.tree, sizeof(tsk_tree_t)));
CU_ASSERT_EQUAL(0, memcmp(&var.site, &var_copy.site, sizeof(tsk_site_t)));
CU_ASSERT_EQUAL(0, memcmp(var.genotypes, var_copy.genotypes,
sizeof(int32_t) * (size_t) var.num_samples));
CU_ASSERT_EQUAL(var.num_alleles, var_copy.num_alleles);
CU_ASSERT_EQUAL(
0, memcmp(var.allele_lengths, var_copy.allele_lengths, sizeof(tsk_size_t)));
for (j = 0; j < var.num_alleles; j++) {
CU_ASSERT_EQUAL(0,
memcmp(var.alleles[j], var_copy.alleles[j], (size_t) var.allele_lengths[j]));
for (j = 0; j < tsk_treeseq_get_num_sites(&ts); j++) {
ret = tsk_variant_decode(&var, (tsk_id_t) j, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_restricted_copy(&var, &var_copy);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_decode(&var_copy, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_VARIANT_CANT_DECODE_COPY);

assert_variants_equal(&var, &var_copy);
CU_ASSERT_EQUAL(
0, memcmp(var.tree_sequence, var.tree_sequence, sizeof(*var.tree_sequence)));
CU_ASSERT_EQUAL(0, memcmp(&var.tree, &var_copy.tree, sizeof(tsk_tree_t)));
CU_ASSERT_EQUAL(0, memcmp(&var.site, &var_copy.site, sizeof(tsk_site_t)));
CU_ASSERT_EQUAL(var_copy.traversal_stack, NULL);
CU_ASSERT_EQUAL(var_copy.sample_index_map, NULL);
CU_ASSERT_EQUAL(var_copy.alt_samples, NULL);
CU_ASSERT_EQUAL(var_copy.alt_sample_index_map, NULL);
tsk_variant_free(&var_copy);
}
CU_ASSERT_EQUAL(var.has_missing_data, var_copy.has_missing_data);
CU_ASSERT_EQUAL(var.num_alleles, var_copy.num_alleles);
CU_ASSERT_EQUAL(var.num_samples, var_copy.num_samples);
CU_ASSERT_EQUAL(0, memcmp(var.samples, var_copy.samples,
sizeof(*var.samples) * (size_t) var.num_samples));

CU_ASSERT_EQUAL(var_copy.traversal_stack, NULL);
CU_ASSERT_EQUAL(var_copy.sample_index_map, NULL);
CU_ASSERT_EQUAL(var_copy.alt_samples, NULL);
CU_ASSERT_EQUAL(var_copy.alt_sample_index_map, NULL);
tsk_variant_free(&var);
tsk_treeseq_free(&ts);
}

static void
test_variant_copy_long_alleles(void)
{
int ret = 0;
const char *sites = "0.0 GGGG\n"
"0.125 AAAAA\n"
"0.25 CCCCCC\n"
"0.5 AAAAAAA\n";
const char *mutations = "0 0 TTT -1\n"
"1 1 CCCCCCC -1\n"
"2 0 GGGGGGG -1\n"
"2 1 AG -1\n"
"2 2 TTTTTTT -1\n"
"3 4 TGGGGGG -1\n"
"3 0 AAA 5\n";
tsk_treeseq_t ts;
tsk_variant_t var, copy, copy_of_copy;
tsk_size_t j;

tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
sites, mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_variant_init(&var, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < tsk_treeseq_get_num_sites(&ts); j++) {
ret = tsk_variant_decode(&var, (tsk_id_t) j, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_restricted_copy(&var, &copy);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_variants_equal(&var, &copy);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_restricted_copy(&copy, &copy_of_copy);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_variants_equal(&var, &copy_of_copy);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_variant_free(&copy_of_copy);
tsk_variant_free(&copy);
}
tsk_variant_free(&var);
tsk_variant_free(&var_copy);
tsk_treeseq_free(&ts);
}

static void
test_variant_copy_memory_management(void)
{
int ret = 0;
tsk_size_t j;
tsk_treeseq_t ts;
tsk_variant_t *var;
tsk_variant_t copy, copy_of_copy;

tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);

for (j = 0; j < tsk_treeseq_get_num_sites(&ts); j++) {
var = tsk_malloc(sizeof(*var));
CU_ASSERT_FATAL(var != NULL);
ret = tsk_variant_init(var, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_decode(var, (tsk_id_t) j, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_restricted_copy(var, &copy);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_variants_equal(var, &copy);
/* Free var to make sure we're not pointing to any of the original memory. */
tsk_variant_free(var);
free(var);
ret = tsk_variant_restricted_copy(&copy, &copy_of_copy);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_variants_equal(&copy, &copy_of_copy);
ret = tsk_variant_decode(&copy, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_VARIANT_CANT_DECODE_COPY);
ret = tsk_variant_decode(&copy_of_copy, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_VARIANT_CANT_DECODE_COPY);

tsk_variant_free(&copy);
tsk_variant_free(&copy_of_copy);
}
tsk_treeseq_free(&ts);
}

Expand All @@ -1030,6 +1131,8 @@ main(int argc, char **argv)
{ "test_multiple_variant_decode", test_multiple_variant_decode },
{ "test_variant_decode_errors", test_variant_decode_errors },
{ "test_variant_copy", test_variant_copy },
{ "test_variant_copy_long_alleles", test_variant_copy_long_alleles },
{ "test_variant_copy_memory_management", test_variant_copy_memory_management },
{ NULL, NULL },
};

Expand Down
16 changes: 8 additions & 8 deletions c/tskit/genotypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,6 @@ tsk_variant_decode(
tsk_id_t allele_index;
tsk_size_t j, num_missing;
int no_longer_missing;

tsk_mutation_t mutation;
bool impute_missing = !!(self->options & TSK_ISOLATED_NOT_MISSING);
bool by_traversal = self->alt_samples != NULL;
Expand Down Expand Up @@ -582,11 +581,11 @@ tsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other)
tsk_size_t total_len, offset, j;

/* Copy everything */
tsk_memcpy(other, self, sizeof(tsk_variant_t));
tsk_memcpy(other, self, sizeof(*other));
/* Tree sequence left as NULL and zero'd tree is a way of indicating this variant is
* fixed and cannot be further decoded. */
other->tree_sequence = NULL;
tsk_memset(&other->tree, sizeof(tsk_tree_t), 0);
tsk_memset(&other->tree, sizeof(other->tree), 0);
other->traversal_stack = NULL;
other->samples = NULL;
other->sample_index_map = NULL;
Expand All @@ -598,9 +597,9 @@ tsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other)
for (j = 0; j < self->num_alleles; j++) {
total_len += self->allele_lengths[j];
}
other->samples = tsk_malloc(other->num_samples * sizeof(tsk_id_t));
other->genotypes = tsk_malloc(other->num_samples * sizeof(int32_t));
other->user_alleles_mem = tsk_malloc(total_len * sizeof(char *));
other->samples = tsk_malloc(other->num_samples * sizeof(*other->samples));
other->genotypes = tsk_malloc(other->num_samples * sizeof(*other->genotypes));
other->user_alleles_mem = tsk_malloc(total_len * sizeof(*other->user_alleles_mem));
other->allele_lengths
= tsk_malloc(other->num_alleles * sizeof(*other->allele_lengths));
other->alleles = tsk_malloc(other->num_alleles * sizeof(*other->alleles));
Expand All @@ -612,13 +611,14 @@ tsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other)
}
tsk_memcpy(
other->samples, self->samples, other->num_samples * sizeof(*other->samples));
tsk_memcpy(other->genotypes, self->genotypes, other->num_samples * sizeof(int32_t));
tsk_memcpy(other->genotypes, self->genotypes,
other->num_samples * sizeof(*other->genotypes));
tsk_memcpy(other->allele_lengths, self->allele_lengths,
other->num_alleles * sizeof(*other->allele_lengths));
offset = 0;
for (j = 0; j < other->num_alleles; j++) {
tsk_memcpy(other->user_alleles_mem + offset, self->alleles[j],
other->allele_lengths[j] * sizeof(char *));
other->allele_lengths[j] * sizeof(*other->user_alleles_mem));
other->alleles[j] = other->user_alleles_mem + offset;
offset += other->allele_lengths[j];
}
Expand Down
5 changes: 4 additions & 1 deletion python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

**Fixes**

- Iterating over ``ts.variants()`` could cause a segfault in tree sequences
with large numbers of alleles or very long alleles
(:user:`jeromekelleher`, :pr:`2437`, :issue:`2429`).

- Various memory leaks fixed (:user:`jeromekelleher`, :pr:`2424`, :issue:`2423`,
:issue:`2427`).

Expand All @@ -24,7 +28,6 @@
- Variant objects now have a ``.num_missing`` attribute and ``.counts()`` and
``.frequencies`` methods (:user:`hyanwong`, :issue:`2390` :pr:`2393`)


- Add the `Tree.num_lineages(t)` method to return the number of lineages present
at time t in the tree (:user:`jeromekelleher`, :issue:`386`, :pr:`2422`)

Expand Down
1 change: 0 additions & 1 deletion python/tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2502,7 +2502,6 @@ def test_copy(self, isolated_as_missing, samples, alleles):
variant2 = variant.restricted_copy()
assert variant.site_id == variant2.site_id
assert variant.alleles == variant2.alleles
print(variant.alleles)
assert np.array_equal(variant.genotypes, variant2.genotypes)
assert np.array_equal(variant.samples, variant2.samples)
assert variant.isolated_as_missing == variant2.isolated_as_missing
Expand Down