diff --git a/c/CHANGELOG.rst b/c/CHANGELOG.rst index 59acd61935..2366841c0f 100644 --- a/c/CHANGELOG.rst +++ b/c/CHANGELOG.rst @@ -17,6 +17,9 @@ - Reduce the maximum number of rows in a table by 1. This removes edge cases so that a ``tsk_id_t`` can be used to count the number of rows. (:user:`benjeffery`, :issue:`2336`, :pr:`2337`) +- Samples are now copied by ``tsk_variant_restricted_copy``. (:user:`benjeffery`, :issue:`2400`, :pr:`XXXX`) + + -------------------- [1.0.0] - 2022-05-24 -------------------- diff --git a/c/tests/test_genotypes.c b/c/tests/test_genotypes.c index a21200f731..4dd5a80aa4 100644 --- a/c/tests/test_genotypes.c +++ b/c/tests/test_genotypes.c @@ -979,14 +979,29 @@ test_variant_copy(void) ret = tsk_variant_decode(&var_copy, 0, 0); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_VARIANT_CANT_DECODE_COPY); + CU_ASSERT_EQUAL( + 0, memcmp(var.tree_sequence, var.tree_sequence, sizeof(*var.tree_sequence))); + CU_ASSERT_EQUAL(0, memcmp(&var.tree, &var_copy.tree, sizeof(tsk_tree_t))); CU_ASSERT_EQUAL(0, memcmp(&var.site, &var_copy.site, sizeof(tsk_site_t))); - CU_ASSERT_EQUAL(0, memcmp(var.genotypes, var_copy.genotypes, sizeof(int32_t))); + CU_ASSERT_EQUAL(0, memcmp(var.genotypes, var_copy.genotypes, + sizeof(int32_t) * (size_t) var.num_samples)); + CU_ASSERT_EQUAL(var.num_alleles, var_copy.num_alleles); CU_ASSERT_EQUAL( 0, memcmp(var.allele_lengths, var_copy.allele_lengths, sizeof(tsk_size_t))); for (j = 0; j < var.num_alleles; j++) { CU_ASSERT_EQUAL(0, memcmp(var.alleles[j], var_copy.alleles[j], (size_t) var.allele_lengths[j])); } + CU_ASSERT_EQUAL(var.has_missing_data, var_copy.has_missing_data); + CU_ASSERT_EQUAL(var.num_alleles, var_copy.num_alleles); + CU_ASSERT_EQUAL(var.num_samples, var_copy.num_samples); + CU_ASSERT_EQUAL(0, memcmp(var.samples, var_copy.samples, + sizeof(*var.samples) * (size_t) var.num_samples)); + + CU_ASSERT_EQUAL(var_copy.traversal_stack, NULL); + CU_ASSERT_EQUAL(var_copy.sample_index_map, NULL); + CU_ASSERT_EQUAL(var_copy.alt_samples, NULL); + CU_ASSERT_EQUAL(var_copy.alt_sample_index_map, NULL); tsk_variant_free(&var); tsk_variant_free(&var_copy); diff --git a/c/tskit/genotypes.c b/c/tskit/genotypes.c index 92a514c280..320e3bca8e 100644 --- a/c/tskit/genotypes.c +++ b/c/tskit/genotypes.c @@ -598,17 +598,20 @@ tsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other) for (j = 0; j < self->num_alleles; j++) { total_len += self->allele_lengths[j]; } + other->samples = tsk_malloc(other->num_samples * sizeof(tsk_id_t)); other->genotypes = tsk_malloc(other->num_samples * sizeof(int32_t)); other->user_alleles_mem = tsk_malloc(total_len * sizeof(char *)); other->allele_lengths = tsk_malloc(other->num_alleles * sizeof(*other->allele_lengths)); other->alleles = tsk_malloc(other->num_alleles * sizeof(*other->alleles)); - if (other->genotypes == NULL || other->user_alleles_mem == NULL - || other->allele_lengths == NULL || other->alleles == NULL) { + if (other->samples == NULL || other->genotypes == NULL + || other->user_alleles_mem == NULL || other->allele_lengths == NULL + || other->alleles == NULL) { ret = TSK_ERR_NO_MEMORY; goto out; } - + tsk_memcpy( + other->samples, self->samples, other->num_samples * sizeof(*other->samples)); tsk_memcpy(other->genotypes, self->genotypes, other->num_samples * sizeof(int32_t)); tsk_memcpy(other->allele_lengths, self->allele_lengths, other->num_alleles * sizeof(*other->allele_lengths)); @@ -619,6 +622,7 @@ tsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other) other->alleles[j] = other->user_alleles_mem + offset; offset += other->allele_lengths[j]; } + out: return ret; } diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index 070434135a..3dd1f960d4 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -2,6 +2,12 @@ [0.5.1] - 2022-0X-XX -------------------- +**Fixes** + +- Copies of a `Variant` object would cause a segfault when ``.samples`` was accessed. + (:user:`benjeffery`, :issue:`2400`, :pr:`XXXX`) + + **Changes** - Tables in a table collection can be replaced using the replace_with method diff --git a/python/tests/test_genotypes.py b/python/tests/test_genotypes.py index 037f24e8ca..1ce4bc95cf 100644 --- a/python/tests/test_genotypes.py +++ b/python/tests/test_genotypes.py @@ -359,6 +359,8 @@ def test_samples(self): for var1, var2 in zip(ts.variants(), ts.variants(samples=s)): assert var1.site == var2.site assert var1.alleles == var2.alleles + assert np.array_equal(var1.samples, ts.samples()) + assert np.array_equal(var2.samples, s) assert var2.genotypes.shape == (len(s),) assert np.array_equal(var1.genotypes[s], var2.genotypes) count += 1 diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py index dbadb0ef82..3d328a31bb 100644 --- a/python/tests/test_lowlevel.py +++ b/python/tests/test_lowlevel.py @@ -2414,13 +2414,37 @@ def test_variants_lifecycle(self): del variant assert np.array_equal(genotypes, expected) - def test_copy(self): + @pytest.mark.parametrize("isolated_as_missing", [True, False]) + @pytest.mark.parametrize("samples", [None, [], (0,)]) + @pytest.mark.parametrize("alleles", [None, ("1", "0")]) + def test_copy(self, isolated_as_missing, samples, alleles): ts = self.get_example_tree_sequence(random_seed=42) - variant = _tskit.Variant(ts) + variant = _tskit.Variant( + ts, + isolated_as_missing=isolated_as_missing, + samples=samples, + alleles=alleles, + ) + + # Test taking a copy before decode + variant2 = variant.restricted_copy() + assert variant.site_id == variant2.site_id + assert variant.alleles == variant2.alleles + print(variant.alleles) + assert np.array_equal(variant.genotypes, variant2.genotypes) + assert np.array_equal(variant.samples, variant2.samples) + assert variant.isolated_as_missing == variant2.isolated_as_missing + variant.decode(0) # Everything below should work even if the Python ts is free'd del ts variant2 = variant.restricted_copy() + assert variant.site_id == variant2.site_id + assert variant.alleles == variant2.alleles + assert np.array_equal(variant.genotypes, variant2.genotypes) + assert np.array_equal(variant.samples, variant2.samples) + assert variant.isolated_as_missing == variant2.isolated_as_missing + # Take a copy for comparison, then move the variant to check the copy # doesn't move too genotypes = variant.genotypes @@ -2434,13 +2458,18 @@ def test_copy(self): variant2.decode(1) assert site_id == variant2.site_id assert alleles == variant2.alleles + # Other properties shouldn't have changed + assert np.array_equal(variant.samples, variant2.samples) + assert variant.isolated_as_missing == variant2.isolated_as_missing # Variant should be equal to the copy we took earlier assert np.array_equal(genotypes_copy, variant2.genotypes) # But not equal to the un-copies genotypes anymore as they # have decoded a new site as a side effect of reusing the # array when decoding - assert not np.array_equal(genotypes, variant2.genotypes) + assert len(variant.samples) == 0 or not np.array_equal( + genotypes, variant2.genotypes + ) # Check the lifecycle of copies and copies of copies del variant