Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -2936,6 +2936,23 @@ test_simplest_map_mutations(void)
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);
free(transitions);

/* Assign the ancestral_state */
genotypes[0] = 1;
genotypes[1] = 1;
ancestral_state = 0;
ret = tsk_tree_map_mutations(&t, genotypes, NULL, TSK_MM_FIXED_ANCESTRAL_STATE,
&ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 2);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);
CU_ASSERT_EQUAL_FATAL(transitions[1].node, 1);
CU_ASSERT_EQUAL_FATAL(transitions[1].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[1].state, 1);
free(transitions);

tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
Expand Down Expand Up @@ -4388,6 +4405,24 @@ test_single_tree_map_mutations(void)
CU_ASSERT_EQUAL_FATAL(num_transitions, 3);
free(transitions);

ancestral_state = 5;
ret = tsk_tree_map_mutations(&t, genotypes, NULL, TSK_MM_FIXED_ANCESTRAL_STATE,
&ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 4);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 5);
free(transitions);

ancestral_state = -1;
ret = tsk_tree_map_mutations(&t, genotypes, NULL, TSK_MM_FIXED_ANCESTRAL_STATE,
&ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_ANCESTRAL_STATE);

ancestral_state = 64;
ret = tsk_tree_map_mutations(&t, genotypes, NULL, TSK_MM_FIXED_ANCESTRAL_STATE,
&ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_ANCESTRAL_STATE);

genotypes[0] = 64;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
Expand Down
3 changes: 3 additions & 0 deletions c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,9 @@ tsk_strerror_internal(int err)
case TSK_ERR_BAD_GENOTYPE:
ret = "Bad genotype value provided";
break;
case TSK_ERR_BAD_ANCESTRAL_STATE:
ret = "Bad ancestral state specified";
break;

/* Genotype decoding errors */
case TSK_ERR_TOO_MANY_ALLELES:
Expand Down
1 change: 1 addition & 0 deletions c/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ An unsupported type was provided for a column in the file.
/* Mutation mapping errors */
#define TSK_ERR_GENOTYPES_ALL_MISSING -1000
#define TSK_ERR_BAD_GENOTYPE -1001
#define TSK_ERR_BAD_ANCESTRAL_STATE -1002

/* Genotype decoding errors */
#define TSK_ERR_MUST_IMPUTE_NON_SAMPLES -1100
Expand Down
52 changes: 32 additions & 20 deletions c/tskit/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -4276,9 +4276,8 @@ get_smallest_set_bit(uint64_t v)
*/
int TSK_WARN_UNUSED
tsk_tree_map_mutations(tsk_tree_t *self, int8_t *genotypes,
double *TSK_UNUSED(cost_matrix), tsk_flags_t TSK_UNUSED(options),
int8_t *r_ancestral_state, tsk_size_t *r_num_transitions,
tsk_state_transition_t **r_transitions)
double *TSK_UNUSED(cost_matrix), tsk_flags_t options, int8_t *r_ancestral_state,
tsk_size_t *r_num_transitions, tsk_state_transition_t **r_transitions)
{
int ret = 0;
struct stack_elem {
Expand Down Expand Up @@ -4335,6 +4334,17 @@ tsk_tree_map_mutations(tsk_tree_t *self, int8_t *genotypes,
}
num_alleles++;

ancestral_state = 0; /* keep compiler happy */
if (options & TSK_MM_FIXED_ANCESTRAL_STATE) {
ancestral_state = *r_ancestral_state;
if ((ancestral_state < 0) || (ancestral_state >= HARTIGAN_MAX_ALLELES)) {
ret = TSK_ERR_BAD_ANCESTRAL_STATE;
goto out;
} else if (ancestral_state >= num_alleles) {
num_alleles = (int8_t)(ancestral_state + 1);
}
}

for (root = self->left_root; root != TSK_NULL; root = self->right_sib[root]) {
/* Do a post order traversal */
postorder_stack[0] = root;
Expand Down Expand Up @@ -4374,27 +4384,29 @@ tsk_tree_map_mutations(tsk_tree_t *self, int8_t *genotypes,
}
}

optimal_root_set = 0;
/* TODO it's annoying that this is essentially the same as the
* visit function above. It would be nice if we had an extra
* node that was the parent of all roots, then the algorithm
* would work as-is */
memset(allele_count, 0, ((size_t) num_alleles) * sizeof(*allele_count));
for (root = self->left_root; root != TSK_NULL; root = right_sib[root]) {
if (!(options & TSK_MM_FIXED_ANCESTRAL_STATE)) {
optimal_root_set = 0;
/* TODO it's annoying that this is essentially the same as the
* visit function above. It would be nice if we had an extra
* node that was the parent of all roots, then the algorithm
* would work as-is */
memset(allele_count, 0, ((size_t) num_alleles) * sizeof(*allele_count));
for (root = self->left_root; root != TSK_NULL; root = right_sib[root]) {
for (allele = 0; allele < num_alleles; allele++) {
allele_count[allele] += bit_is_set(optimal_set[root], allele);
}
}
max_allele_count = 0;
for (allele = 0; allele < num_alleles; allele++) {
allele_count[allele] += bit_is_set(optimal_set[root], allele);
max_allele_count = TSK_MAX(max_allele_count, allele_count[allele]);
}
}
max_allele_count = 0;
for (allele = 0; allele < num_alleles; allele++) {
max_allele_count = TSK_MAX(max_allele_count, allele_count[allele]);
}
for (allele = 0; allele < num_alleles; allele++) {
if (allele_count[allele] == max_allele_count) {
optimal_root_set = set_bit(optimal_root_set, allele);
for (allele = 0; allele < num_alleles; allele++) {
if (allele_count[allele] == max_allele_count) {
optimal_root_set = set_bit(optimal_root_set, allele);
}
}
ancestral_state = get_smallest_set_bit(optimal_root_set);
}
ancestral_state = get_smallest_set_bit(optimal_root_set);

num_transitions = 0;
for (root = self->left_root; root != TSK_NULL; root = self->right_sib[root]) {
Expand Down
3 changes: 3 additions & 0 deletions c/tskit/trees.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ extern "C" {
#define TSK_STAT_POLARISED (1 << 10)
#define TSK_STAT_SPAN_NORMALISE (1 << 11)

/* Options for map_mutations */
#define TSK_MM_FIXED_ANCESTRAL_STATE (1 << 0)

#define TSK_DIR_FORWARD 1
#define TSK_DIR_REVERSE -1

Expand Down
3 changes: 3 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@

**Features**

- ``map_mutations`` now allows the ancestral state to be specified
(:user:`hyanwong`, :user:`jeromekelleher`, :issue:`1542`, :pr:`1550`)

**Fixes**

--------------------
Expand Down
19 changes: 16 additions & 3 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -8936,18 +8936,21 @@ Tree_map_mutations(Tree *self, PyObject *args, PyObject *kwds)
PyObject *ret = NULL;
PyObject *genotypes = NULL;
PyObject *py_transitions = NULL;
PyObject *py_ancestral_state = Py_None;
PyArrayObject *genotypes_array = NULL;
static char *kwlist[] = { "genotypes", NULL };
static char *kwlist[] = { "genotypes", "ancestral_state", NULL };
int8_t ancestral_state;
tsk_state_transition_t *transitions = NULL;
tsk_size_t num_transitions;
npy_intp *shape;
tsk_flags_t options = 0;
int err;

if (Tree_check_state(self) != 0) {
goto out;
}
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &genotypes)) {
if (!PyArg_ParseTupleAndKeywords(
args, kwds, "O|O", kwlist, &genotypes, &py_ancestral_state)) {
goto out;
}
genotypes_array = (PyArrayObject *) PyArray_FROMANY(
Expand All @@ -8961,9 +8964,19 @@ Tree_map_mutations(Tree *self, PyObject *args, PyObject *kwds)
PyExc_ValueError, "Genotypes array must have 1D (num_samples,) array");
goto out;
}
if (py_ancestral_state != Py_None) {
options = TSK_MM_FIXED_ANCESTRAL_STATE;
if (!PyNumber_Check(py_ancestral_state)) {
PyErr_SetString(PyExc_TypeError, "ancestral_state must be a number");
goto out;
}
/* Note this does allow large numbers to overflow, but higher levels
* should be checking for these error anyway. */
ancestral_state = (int8_t) PyLong_AsLong(py_ancestral_state);
}

err = tsk_tree_map_mutations(self->tree, (int8_t *) PyArray_DATA(genotypes_array),
NULL, 0, &ancestral_state, &num_transitions, &transitions);
NULL, options, &ancestral_state, &num_transitions, &transitions);
if (err != 0) {
handle_library_error(err);
goto out;
Expand Down
19 changes: 19 additions & 0 deletions python/tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2606,6 +2606,16 @@ def test_map_mutations(self):
assert ancestral_state == 0
assert len(transitions) == 0

def test_map_mutations_fixed_ancestral_state(self):
ts = self.get_example_tree_sequence()
tree = _tskit.Tree(ts)
tree.next()
n = ts.get_num_samples()
genotypes = np.ones(n, dtype=np.int8)
ancestral_state, transitions = tree.map_mutations(genotypes, 0)
assert ancestral_state == 0
assert len(transitions) == 1

def test_map_mutations_errors(self):
ts = self.get_example_tree_sequence()
tree = _tskit.Tree(ts)
Expand All @@ -2629,6 +2639,15 @@ def test_map_mutations_errors(self):
with pytest.raises(_tskit.LibraryError):
tree.map_mutations(genotypes)

genotypes = np.zeros(n, dtype=np.int8)
tree.map_mutations(genotypes)
for bad_type in ["d", []]:
with pytest.raises(TypeError):
tree.map_mutations(genotypes, bad_type)
for bad_state in [-2, -1, 127, 255]:
with pytest.raises(_tskit.LibraryError, match="Bad ancestral"):
tree.map_mutations(genotypes, bad_state)

@pytest.mark.parametrize("array", ARRAY_NAMES)
def test_array_read_only(self, array):
name = array + "_array"
Expand Down
Loading