Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions c/tests/test_genotypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -1297,6 +1297,35 @@ test_alignments_partial_isolation(void)
tsk_treeseq_free(&ts);
}

static void
test_alignments_return_code_truncated_interval(void)
{
int ret = 0;
const char *nodes = "1 0 0 -1\n"
"1 0 0 -1\n"
"0 1 0 -1\n";
/* Tree over [0,5): samples 0 and 1 under root 2.
* Tree over [5,10): only sample 1 under root 2 (sample 0 isolated). */
const char *edges = "0 5 2 0\n"
"0 10 2 1\n";
tsk_treeseq_t ts;
const tsk_id_t *samples;
tsk_size_t n;
char buf[10];
const char *ref = "NNNNNNNNNN";

tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
samples = tsk_treeseq_get_samples(&ts);
n = tsk_treeseq_get_num_samples(&ts);

ret = tsk_treeseq_decode_alignments(&ts, ref, 10, samples, n, 0, 5, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_NSTRING_EQUAL(buf + 0 * 5, "NNNNN", 5);
CU_ASSERT_NSTRING_EQUAL(buf + 1 * 5, "NNNNN", 5);

tsk_treeseq_free(&ts);
}

static void
test_alignments_invalid_allele_length(void)
{
Expand Down Expand Up @@ -1367,6 +1396,64 @@ test_alignments_discrete_genome_required(void)
tsk_treeseq_free(&ts);
}

static void
test_alignments_null_reference(void)
{
int ret = 0;
tsk_treeseq_t ts;
const tsk_id_t *samples;
tsk_size_t n;
char buf[10];

build_balanced_three_example_align(&ts);
samples = tsk_treeseq_get_samples(&ts);
n = tsk_treeseq_get_num_samples(&ts);

ret = tsk_treeseq_decode_alignments(&ts, NULL, 10, samples, n, 0, 10, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_treeseq_free(&ts);
}

static void
test_alignments_null_nodes_or_buf(void)
{
int ret = 0;
tsk_treeseq_t ts;
const char *ref = "NNNNNNNNNN";
const tsk_id_t *samples;
tsk_size_t n;
char buf[30];

build_balanced_three_example_align(&ts);
samples = tsk_treeseq_get_samples(&ts);
n = tsk_treeseq_get_num_samples(&ts);

ret = tsk_treeseq_decode_alignments(&ts, ref, 10, NULL, n, 0, 10, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);

ret = tsk_treeseq_decode_alignments(&ts, ref, 10, samples, n, 0, 10, 'N', NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);

tsk_treeseq_free(&ts);
}

static void
test_alignments_node_out_of_bounds(void)
{
int ret = 0;
tsk_treeseq_t ts;
const char *ref = "NNNNNNNNNN";
tsk_id_t bad_node;
char buf[10];

build_balanced_three_example_align(&ts);
bad_node = (tsk_id_t) tsk_treeseq_get_num_nodes(&ts);

ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &bad_node, 1, 0, 10, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
}

static void
test_alignments_isolated_as_not_missing(void)
{
Expand Down Expand Up @@ -1600,6 +1687,8 @@ main(int argc, char **argv)
{ "test_alignments_basic_default", test_alignments_basic_default },
{ "test_alignments_reference_sequence", test_alignments_reference_sequence },
{ "test_alignments_partial_isolation", test_alignments_partial_isolation },
{ "test_alignments_return_code_truncated_interval",
test_alignments_return_code_truncated_interval },
{ "test_alignments_isolated_as_not_missing",
test_alignments_isolated_as_not_missing },
{ "test_alignments_internal_node_non_sample",
Expand All @@ -1610,6 +1699,9 @@ main(int argc, char **argv)
{ "test_alignments_non_integer_bounds", test_alignments_non_integer_bounds },
{ "test_alignments_discrete_genome_required",
test_alignments_discrete_genome_required },
{ "test_alignments_null_reference", test_alignments_null_reference },
{ "test_alignments_null_nodes_or_buf", test_alignments_null_nodes_or_buf },
{ "test_alignments_node_out_of_bounds", test_alignments_node_out_of_bounds },
{ "test_alignments_missing_char_collision",
test_alignments_missing_char_collision },
{ "test_alignments_zero_nodes_ok", test_alignments_zero_nodes_ok },
Expand Down
2 changes: 2 additions & 0 deletions c/tskit/genotypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,8 @@ tsk_treeseq_decode_alignments_overlay_missing(const tsk_treeseq_t *self,
}
}

/* On success we should return 0, not TSK_TREE_OK from the last tsk_tree_next */
ret = 0;
out:
tsk_tree_free(&tree);
return ret;
Expand Down
100 changes: 100 additions & 0 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -6079,6 +6079,102 @@ TreeSequence_get_individuals_nodes(TreeSequence *self)
return ret;
}

static PyObject *
TreeSequence_decode_alignments(TreeSequence *self, PyObject *args, PyObject *kwds)
{
int err;
PyObject *ret = NULL;
PyObject *py_ref, *py_nodes, *py_missing;
PyArrayObject *nodes_array = NULL;
const char *ref_seq;
Py_ssize_t ref_len, missing_len;
tsk_id_t *nodes;
tsk_size_t num_nodes;
double left, right;
char missing_char;
const char *missing_utf8;
int isolated_as_missing = 1;
tsk_flags_t options = 0;
PyObject *buf_obj = NULL;
char *buf = NULL;

static char *kwlist[] = { "reference_sequence", "nodes", "left", "right",
"missing_data_character", "isolated_as_missing", NULL };

if (TreeSequence_check_state(self) != 0) {
goto out;
}

if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOddOp", kwlist, &py_ref, &py_nodes,
&left, &right, &py_missing, &isolated_as_missing)) {
goto out;
}

if (!PyBytes_Check(py_ref)) {
PyErr_SetString(PyExc_TypeError, "reference_sequence must be bytes");
goto out;
}
if (PyBytes_AsStringAndSize(py_ref, (char **) &ref_seq, &ref_len) < 0) {
goto out;
}

if (!PyUnicode_Check(py_missing)) {
PyErr_SetString(
PyExc_TypeError, "missing_data_character must be a (length 1) string");
goto out;
}
missing_utf8 = PyUnicode_AsUTF8AndSize(py_missing, &missing_len);
if (missing_utf8 == NULL) {
goto out;
}
if (missing_len != 1) {
PyErr_SetString(
PyExc_TypeError, "missing_data_character must be a single character");
goto out;
}
missing_char = missing_utf8[0];

if (!isolated_as_missing) {
options |= TSK_ISOLATED_NOT_MISSING;
}

nodes_array = (PyArrayObject *) PyArray_FROMANY(
py_nodes, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);
if (nodes_array == NULL) {
goto out;
}
num_nodes = (tsk_size_t) PyArray_DIM(nodes_array, 0);
nodes = PyArray_DATA(nodes_array);

buf_obj = PyBytes_FromStringAndSize(
NULL, (Py_ssize_t)(num_nodes * (tsk_size_t)(right - left)));
if (buf_obj == NULL) {
goto out;
}
buf = PyBytes_AS_STRING(buf_obj);

// clang-format off
Py_BEGIN_ALLOW_THREADS
err = tsk_treeseq_decode_alignments(self->tree_sequence,
ref_seq, (tsk_size_t) ref_len, nodes, num_nodes, left, right, missing_char, buf,
options);
Py_END_ALLOW_THREADS
// clang-format on
if (err != 0)
{
handle_library_error(err);
goto out;
}

ret = buf_obj;
buf_obj = NULL;

out:
Py_XDECREF(nodes_array);
Py_XDECREF(buf_obj);
return ret;
}

static PyObject *
TreeSequence_get_mutations_edge(TreeSequence *self)
{
Expand Down Expand Up @@ -8660,6 +8756,10 @@ static PyMethodDef TreeSequence_methods[] = {
.ml_meth = (PyCFunction) TreeSequence_get_individuals_nodes,
.ml_flags = METH_NOARGS,
.ml_doc = "Returns an array of the node ids for each individual" },
{ .ml_name = "decode_alignments",
.ml_meth = (PyCFunction) TreeSequence_decode_alignments,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = "Decode full alignments for given nodes and interval." },
{ .ml_name = "get_mutations_edge",
.ml_meth = (PyCFunction) TreeSequence_get_mutations_edge,
.ml_flags = METH_NOARGS,
Expand Down
Loading