Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions c/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@

- Method ``tsk_individual_table_add_row`` has an extra arguments ``parents`` and ``parents_length``.

**Breaking changes**

- Add an ``options`` argument to ``tsk_table_collection_subset`` (:user:`petrelharp`, :pr:`1108`),
to allow for retaining the order of populations.

**Changes**

- Allow mutations that have the same derived state as their parent mutation.
(:user:`benjeffery`, :issue:`1180`, :pr:`1233`)

**Bugfixes**

----------------------
Expand Down
99 changes: 95 additions & 4 deletions c/tests/test_genotypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -864,7 +864,7 @@ test_single_tree_many_alleles(void)
}

static void
test_single_tree_inconsistent_mutations(void)
test_single_tree_silent_mutations_i16(void)
{
const char *sites = "0.0 0\n"
"0.1 0\n"
Expand Down Expand Up @@ -896,14 +896,103 @@ test_single_tree_inconsistent_mutations(void)
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INCONSISTENT_MUTATIONS);
CU_ASSERT_EQUAL_FATAL(ret, 1);
tsk_vargen_free(&vargen);
}
}

tsk_treeseq_free(&ts);
}

static void
test_single_tree_silent_mutations_i8(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;

/* Add some silent mutations */
const char *silent_ex_sites = "0.125 0\n"
"0.25 0\n"
"0.5 0\n"
"0.75 0\n";
/* site, node, derived_state, [parent, time] */
const char *silent_ex_mutations
= "0 5 0 -1\n" /* Silent mutation over mutation 1 */
"0 2 1 0\n"
"1 4 1 -1\n"
"1 0 0 2\n" /* Back mutation over 0 */
"1 0 0 3\n" /* Silent mutation under back mutation */
"2 0 1 -1\n" /* recurrent mutations over samples */
"2 1 1 -1\n"
"2 2 1 -1\n"
"2 3 1 -1\n"
"3 0 0 -1\n" /* Single silent mutation at a site */
;

tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
silent_ex_sites, silent_ex_mutations, NULL, NULL, 0);

ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_print_state(&vargen, _devnull);

ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes.i8[0], 0);
CU_ASSERT_EQUAL(var->genotypes.i8[1], 0);
CU_ASSERT_EQUAL(var->genotypes.i8[2], 1);
CU_ASSERT_EQUAL(var->genotypes.i8[3], 0);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site->id, 0);
CU_ASSERT_EQUAL(var->site->mutations_length, 2);

ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes.i8[0], 0);
CU_ASSERT_EQUAL(var->genotypes.i8[1], 1);
CU_ASSERT_EQUAL(var->genotypes.i8[2], 0);
CU_ASSERT_EQUAL(var->genotypes.i8[3], 0);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site->id, 1);
CU_ASSERT_EQUAL(var->site->mutations_length, 3);

ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes.i8[0], 1);
CU_ASSERT_EQUAL(var->genotypes.i8[1], 1);
CU_ASSERT_EQUAL(var->genotypes.i8[2], 1);
CU_ASSERT_EQUAL(var->genotypes.i8[3], 1);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site->id, 2);
CU_ASSERT_EQUAL(var->site->mutations_length, 4);

ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes.i8[0], 0);
CU_ASSERT_EQUAL(var->genotypes.i8[1], 0);
CU_ASSERT_EQUAL(var->genotypes.i8[2], 0);
CU_ASSERT_EQUAL(var->genotypes.i8[3], 0);
CU_ASSERT_EQUAL(var->num_alleles, 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site->id, 3);
CU_ASSERT_EQUAL(var->site->mutations_length, 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_vargen_free(&vargen);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
}

int
main(int argc, char **argv)
{
Expand All @@ -922,8 +1011,10 @@ main(int argc, char **argv)
{ "test_single_tree_user_alleles_errors", test_single_tree_user_alleles_errors },
{ "test_single_tree_subsample", test_single_tree_subsample },
{ "test_single_tree_many_alleles", test_single_tree_many_alleles },
{ "test_single_tree_inconsistent_mutations",
test_single_tree_inconsistent_mutations },
{ "test_single_tree_silent_mutations_i16",
test_single_tree_silent_mutations_i16 },
{ "test_single_tree_silent_mutations_i8", test_single_tree_silent_mutations_i8 },

{ NULL, NULL },
};

Expand Down
1 change: 1 addition & 0 deletions c/tests/testlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ const char *single_tree_ex_edges = "0 1 4 0,1\n"
const char *single_tree_ex_sites = "0.125 0\n"
"0.25 0\n"
"0.5 0\n";
/* site, node, derived_state, [parent, time] */
const char *single_tree_ex_mutations
= "0 2 1 -1\n"
"1 4 1 -1\n"
Expand Down
3 changes: 0 additions & 3 deletions c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -284,9 +284,6 @@ tsk_strerror_internal(int err)
case TSK_ERR_MUTATION_PARENT_INCONSISTENT:
ret = "Mutation parent references form a loop.";
break;
case TSK_ERR_INCONSISTENT_MUTATIONS:
ret = "Inconsistent mutations: state already equal to derived state";
break;
case TSK_ERR_UNSORTED_MUTATIONS:
ret = "Mutations must be provided in non-decreasing site order and "
"non-increasing time order within each site";
Expand Down
13 changes: 6 additions & 7 deletions c/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,13 +247,12 @@ not found in the file.
#define TSK_ERR_MUTATION_PARENT_EQUAL -501
#define TSK_ERR_MUTATION_PARENT_AFTER_CHILD -502
#define TSK_ERR_MUTATION_PARENT_INCONSISTENT -503
#define TSK_ERR_INCONSISTENT_MUTATIONS -504
#define TSK_ERR_UNSORTED_MUTATIONS -505
#define TSK_ERR_NON_SINGLE_CHAR_MUTATION -506
#define TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE -507
#define TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION -508
#define TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE -509
#define TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN -510
#define TSK_ERR_UNSORTED_MUTATIONS -504
#define TSK_ERR_NON_SINGLE_CHAR_MUTATION -505
#define TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE -506
#define TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION -507
#define TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE -508
#define TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN -509

/* Sample errors */
#define TSK_ERR_DUPLICATE_SAMPLE -600
Expand Down
28 changes: 8 additions & 20 deletions c/tskit/genotypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -321,10 +321,7 @@ tsk_vargen_update_genotypes_i8_sample_list(
if (index != TSK_NULL) {
stop = list_right[node];
while (true) {
if (genotypes[index] == (int8_t) derived) {
ret = TSK_ERR_INCONSISTENT_MUTATIONS;
goto out;
}

ret += genotypes[index] == TSK_MISSING_DATA;
genotypes[index] = (int8_t) derived;
if (index == stop) {
Expand All @@ -333,7 +330,7 @@ tsk_vargen_update_genotypes_i8_sample_list(
index = list_next[index];
}
}
out:

return ret;
}

Expand All @@ -354,10 +351,7 @@ tsk_vargen_update_genotypes_i16_sample_list(
if (index != TSK_NULL) {
stop = list_right[node];
while (true) {
if (genotypes[index] == (int16_t) derived) {
ret = TSK_ERR_INCONSISTENT_MUTATIONS;
goto out;
}

ret += genotypes[index] == TSK_MISSING_DATA;
genotypes[index] = (int16_t) derived;
if (index == stop) {
Expand All @@ -366,7 +360,7 @@ tsk_vargen_update_genotypes_i16_sample_list(
index = list_next[index];
}
}
out:

return ret;
}

Expand Down Expand Up @@ -422,13 +416,10 @@ tsk_vargen_visit_i8(tsk_vargen_t *self, tsk_id_t sample_index, tsk_id_t derived)

tsk_bug_assert(derived < INT8_MAX);
tsk_bug_assert(sample_index != -1);
if (genotypes[sample_index] == (int8_t) derived) {
ret = TSK_ERR_INCONSISTENT_MUTATIONS;
goto out;
}

ret = genotypes[sample_index] == TSK_MISSING_DATA;
genotypes[sample_index] = (int8_t) derived;
out:

return ret;
}

Expand All @@ -440,13 +431,10 @@ tsk_vargen_visit_i16(tsk_vargen_t *self, tsk_id_t sample_index, tsk_id_t derived

tsk_bug_assert(derived < INT16_MAX);
tsk_bug_assert(sample_index != -1);
if (genotypes[sample_index] == (int16_t) derived) {
ret = TSK_ERR_INCONSISTENT_MUTATIONS;
goto out;
}

ret = genotypes[sample_index] == TSK_MISSING_DATA;
genotypes[sample_index] = (int16_t) derived;
out:

return ret;
}

Expand Down
5 changes: 5 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@
population indexing and lossless node reordering with subset.
(:user:`petrelharp`, :pr:`1097`)

**Changes**

- Allow mutations that have the same derived state as their parent mutation.
(:user:`benjeffery`, :issue:`1180`, :pr:`1233`)

**Breaking changes**

- tskit now requires Python 3.6 (:user:`benjeffery`, :pr:`xxxx`)
Expand Down
11 changes: 4 additions & 7 deletions python/tests/test_genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def test_recurrent_mutations_over_samples(self):
assert variant.alleles == ("0", "1")
assert np.all(variant.genotypes == np.ones(ts.sample_size))

def test_recurrent_mutations_errors(self):
def test_silent_mutations(self):
ts = self.get_tree_sequence()
tree = next(ts.trees())
tables = ts.dump_tables()
Expand All @@ -342,8 +342,7 @@ def test_recurrent_mutations_errors(self):
tables.mutations.add_row(site=site, node=u, derived_state="1")
tables.mutations.add_row(site=site, node=sample, derived_state="1")
ts_new = tables.tree_sequence()
with pytest.raises(exceptions.LibraryError):
list(ts_new.variants())
assert all([v.genotypes[sample] == 1 for v in ts_new.variants()])

def test_zero_samples(self):
ts = self.get_tree_sequence()
Expand Down Expand Up @@ -780,7 +779,7 @@ def test_recurrent_mutations_over_samples(self):
for h in ts_new.haplotypes():
assert ones == h

def test_recurrent_mutations_errors(self):
def test_silent_mutations(self):
ts = msprime.simulate(10, random_seed=2)
tables = ts.dump_tables()
tree = next(ts.trees())
Expand All @@ -791,9 +790,7 @@ def test_recurrent_mutations_errors(self):
tables.mutations.add_row(site=site, node=u, derived_state="1")
tables.mutations.add_row(site=site, node=tree.root, derived_state="1")
ts_new = tables.tree_sequence()
with pytest.raises(exceptions.LibraryError):
list(ts_new.haplotypes())
ts_new.haplotypes()
all(h == 1 for h in ts_new.haplotypes())

def test_back_mutations(self):
base_ts = msprime.simulate(10, random_seed=2)
Expand Down
2 changes: 1 addition & 1 deletion python/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def test_tree_sequence_simulated_mutations(self):

def set_partitions(collection):
"""
Returns an ierator over all partitions of the specified set.
Returns an iterator over all partitions of the specified set.

From https://stackoverflow.com/questions/19368375/set-partitions-in-python
"""
Expand Down
1 change: 1 addition & 0 deletions python/tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3871,6 +3871,7 @@ def get_wf_example(self, N, T, seed):
ts = tsutil.insert_random_ploidy_individuals(ts, max_ploidy=2)
return ts

@pytest.mark.skip("Fails due to #1225")
def test_no_mutation_times(self):
ts = self.get_wf_example(10, 4, seed=925)
self.verify_subset_union(ts)
Expand Down
5 changes: 5 additions & 0 deletions python/tests/test_tree_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,11 @@ def test_single_tree_jukes_cantor(self):
ts = tsutil.jukes_cantor(ts, 20, 1, seed=10)
self.verify(ts)

def test_single_tree_single_site_many_silent(self):
ts = msprime.simulate(6, random_seed=1)
ts = tsutil.jukes_cantor(ts, 1, 20, seed=10)
self.verify(ts)

def test_single_tree_multichar_mutations(self):
ts = msprime.simulate(6, random_seed=1, mutation_rate=1)
ts = tsutil.insert_multichar_mutations(ts)
Expand Down
12 changes: 12 additions & 0 deletions python/tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,18 @@ def test_n50_multiroot(self):
ts = tsutil.jukes_cantor(ts, 5, 2, seed=2)
self.verify(ts)

def test_silent_mutations(self):
ts = msprime.simulate(50, random_seed=1)
ts = tsutil.jukes_cantor(ts, 5, 2, seed=2)
num_silent = 0
for m in ts.mutations():
if (
m.parent != -1
and ts.mutation(m.parent).derived_state == m.derived_state
):
num_silent += 1
assert num_silent > 20


class TestCaterpillarTree:
"""
Expand Down
9 changes: 7 additions & 2 deletions python/tests/test_vcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,10 @@ def verify_records(self, pyvcf_records, pysam_records):
assert pyvcf_record.CHROM == pysam_record.chrom
assert pyvcf_record.POS == pysam_record.pos
assert pyvcf_record.ID == pysam_record.id
assert pyvcf_record.ALT == list(pysam_record.alts)
if pysam_record.alts:
assert pyvcf_record.ALT == list(pysam_record.alts)
else:
assert pyvcf_record.ALT == [] or pyvcf_record.ALT == [None]
assert pyvcf_record.REF == pysam_record.ref
assert pysam_record.filter[0].name == "PASS"
assert pyvcf_record.FORMAT == "GT"
Expand Down Expand Up @@ -489,7 +492,9 @@ def verify(self, ts):
):
assert vcf_row.POS == np.round(variant.site.position)
assert variant.alleles[0] == vcf_row.REF
assert list(variant.alleles[1:]) == vcf_row.ALT
assert list(variant.alleles[1:]) == [
allele for allele in vcf_row.ALT if allele is not None
]
j = 0
for individual, sample in itertools.zip_longest(
map(ts.individual, indivs), vcf_row.samples
Expand Down
6 changes: 3 additions & 3 deletions python/tests/tsutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,14 +509,14 @@ def generate_site_mutations(
x = random.expovariate(mu)
new_state = state
while x < branch_length:
new_state = random.choice([s for s in states if s != state])
if multiple_per_node and (state != new_state):
new_state = random.choice(states)
if multiple_per_node:
mutation_table.add_row(site, u, new_state, parent)
parent = mutation_table.num_rows - 1
state = new_state
x += random.expovariate(mu)
else:
if (not multiple_per_node) and (state != new_state):
if not multiple_per_node:
mutation_table.add_row(site, u, new_state, parent)
parent = mutation_table.num_rows - 1
state = new_state
Expand Down
Loading