diff --git a/.circleci/config.yml b/.circleci/config.yml index 95cf02641..7bf75306b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -12,7 +12,7 @@ jobs: - run: name: Checkout submodules command: | - git submodule update --init --recursive + git submodule update --init --recursive # Write out the status for debugging purposes. Are we checked out at tags? git submodule status --recursive - run: @@ -44,7 +44,7 @@ jobs: name: Run highlevel tests and upload coverage command: | nosetests -v --with-coverage --cover-package msprime \ - --cover-branches --cover-erase --cover-xml --cover-inclusive tests + --cover-branches --cover-erase --cover-xml --cover-inclusive tests codecov -X gcov -F python rm .coverage @@ -65,12 +65,13 @@ jobs: gcov -pb -o ./build/temp.linux*/ _msprimemodule.c cd build-gcc # TODO should be able to do this with 'find', but it's tricky and opaque. - gcov -pb ./msprime@sta/fenwick.c.gcno ../lib/fenwick.c - gcov -pb ./msprime@sta/msprime.c.gcno ../lib/msprime.c - gcov -pb ./msprime@sta/mutgen.c.gcno ../lib/mutgen.c - gcov -pb ./msprime@sta/object_heap.c.gcno ../lib/object_heap.c - gcov -pb ./msprime@sta/recomb_map.c.gcno ../lib/recomb_map.c - gcov -pb ./msprime@sta/util.c.gcno ../lib/util.c + gcov -pb ./msprime@sta/fenwick.c.gcno ../lib/fenwick.c + gcov -pb ./msprime@sta/msprime.c.gcno ../lib/msprime.c + gcov -pb ./msprime@sta/mutgen.c.gcno ../lib/mutgen.c + gcov -pb ./msprime@sta/object_heap.c.gcno ../lib/object_heap.c + gcov -pb ./msprime@sta/recomb_map.c.gcno ../lib/recomb_map.c + gcov -pb ./msprime@sta/interval_map.c.gcno ../lib/interval_map.c + gcov -pb ./msprime@sta/util.c.gcno ../lib/util.c cd .. codecov -X gcov -F C @@ -87,6 +88,6 @@ jobs: name: Install from the distribution tarball command: | python -m venv venv - source venv/bin/activate + source venv/bin/activate pip install --upgrade setuptools pip - pip install dist/*.tar.gz + pip install dist/*.tar.gz diff --git a/_msprimemodule.c b/_msprimemodule.c index 9de85241a..d01e9f865 100644 --- a/_msprimemodule.c +++ b/_msprimemodule.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "msprime.h" #include "likelihood.h" @@ -55,10 +56,16 @@ typedef struct { gsl_rng* rng; } RandomGenerator; +typedef struct { + PyObject_HEAD + interval_map_t *interval_map; +} IntervalMap; + typedef struct { PyObject_HEAD mutgen_t *mutgen; RandomGenerator *random_generator; + IntervalMap *rate_map; } MutationGenerator; typedef struct { @@ -92,6 +99,17 @@ handle_input_error(const char *section, int err) PyErr_Format(MsprimeInputError, "Input error in %s: %s", section, msp_strerror(err)); } +static int +double_PyArray_converter(PyObject *in, PyObject **out) +{ + PyObject *ret = PyArray_FROMANY(in, NPY_FLOAT64, 1, 1, NPY_ARRAY_IN_ARRAY); + if (ret == NULL) { + return NPY_FAIL; + } + *out = ret; + return NPY_SUCCEED; +} + /* * Retrieves the PyObject* corresponding the specified key in the * specified dictionary. @@ -1579,8 +1597,6 @@ static PyTypeObject LightweightTableCollectionType = { (initproc)LightweightTableCollection_init, /* tp_init */ }; - - /*=================================================================== * RandomGenerator *=================================================================== @@ -1644,6 +1660,57 @@ RandomGenerator_get_seed(RandomGenerator *self) return ret; } +static PyObject * +RandomGenerator_flat(RandomGenerator *self, PyObject *args) +{ + PyObject *ret = NULL; + double a, b; + + if (RandomGenerator_check_state(self) != 0) { + goto out; + } + if (!PyArg_ParseTuple(args, "dd", &a, &b)) { + goto out; + } + ret = Py_BuildValue("d", gsl_ran_flat(self->rng, a, b)); +out: + return ret; +} + +static PyObject * +RandomGenerator_poisson(RandomGenerator *self, PyObject *args) +{ + PyObject *ret = NULL; + double mu; + + if (RandomGenerator_check_state(self) != 0) { + goto out; + } + if (!PyArg_ParseTuple(args, "d", &mu)) { + goto out; + } + ret = Py_BuildValue("I", gsl_ran_poisson(self->rng, mu)); +out: + return ret; +} + +static PyObject * +RandomGenerator_uniform_int(RandomGenerator *self, PyObject *args) +{ + PyObject *ret = NULL; + unsigned long n; + + if (RandomGenerator_check_state(self) != 0) { + goto out; + } + if (!PyArg_ParseTuple(args, "k", &n)) { + goto out; + } + ret = Py_BuildValue("k", gsl_rng_uniform_int(self->rng, n)); +out: + return ret; +} + static PyMemberDef RandomGenerator_members[] = { {NULL} /* Sentinel */ }; @@ -1651,6 +1718,12 @@ static PyMemberDef RandomGenerator_members[] = { static PyMethodDef RandomGenerator_methods[] = { {"get_seed", (PyCFunction) RandomGenerator_get_seed, METH_NOARGS, "Returns the random seed for this generator."}, + {"flat", (PyCFunction) RandomGenerator_flat, + METH_VARARGS, "Interface for gsl_ran_flat"}, + {"poisson", (PyCFunction) RandomGenerator_poisson, + METH_VARARGS, "Interface for gsl_ran_poisson"}, + {"uniform_int", (PyCFunction) RandomGenerator_uniform_int, + METH_VARARGS, "Interface for gsl_rng_uniform_int"}, {NULL} /* Sentinel */ }; @@ -1693,6 +1766,167 @@ static PyTypeObject RandomGeneratorType = { (initproc)RandomGenerator_init, /* tp_init */ }; +/*=================================================================== + * IntervalMap + *=================================================================== + */ + +static int +IntervalMap_check_state(IntervalMap *self) +{ + int ret = 0; + if (self->interval_map == NULL) { + PyErr_SetString(PyExc_SystemError, "IntervalMap not initialised"); + ret = -1; + } + return ret; +} + +static void +IntervalMap_dealloc(IntervalMap* self) +{ + if (self->interval_map != NULL) { + interval_map_free(self->interval_map); + PyMem_Free(self->interval_map); + self->interval_map = NULL; + } + Py_TYPE(self)->tp_free((PyObject*)self); +} + +static int +IntervalMap_init(IntervalMap *self, PyObject *args, PyObject *kwds) +{ + int ret = -1; + int err; + static char *kwlist[] = {"position", "value", NULL}; + Py_ssize_t size; + PyObject *py_position = NULL; + PyObject *py_value = NULL; + + self->interval_map = NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&", kwlist, + double_PyArray_converter, &py_position, + double_PyArray_converter, &py_value)) { + goto out; + } + + size = PyObject_Size(py_position); + if (size != PyObject_Size(py_value)) { + PyErr_SetString(PyExc_ValueError, + "position and list must be the same length"); + goto out; + } + self->interval_map = PyMem_Malloc(sizeof(interval_map_t)); + if (self->interval_map == NULL) { + PyErr_NoMemory(); + goto out; + } + err = interval_map_alloc(self->interval_map, size, + PyArray_DATA((PyArrayObject *) py_position), + PyArray_DATA((PyArrayObject *) py_value)); + if (err != 0) { + handle_library_error(err); + goto out; + } + ret = 0; +out: + Py_XDECREF(py_position); + Py_XDECREF(py_value); + return ret; +} + +static PyObject * +IntervalMap_get_position(IntervalMap *self, void *closure) +{ + PyObject *ret = NULL; + PyArrayObject *array; + size_t size = self->interval_map->size; + npy_intp dims = (npy_intp) size; + + array = (PyArrayObject *) PyArray_EMPTY(1, &dims, NPY_FLOAT64, 0); + if (array == NULL) { + goto out; + } + memcpy(PyArray_DATA(array), self->interval_map->position, size * sizeof(double)); + ret = (PyObject *) array; +out: + return ret; +} + +static PyObject * +IntervalMap_get_value(IntervalMap *self, void *closure) +{ + PyObject *ret = NULL; + PyArrayObject *array; + size_t size = self->interval_map->size; + npy_intp dims = (npy_intp) size; + + array = (PyArrayObject *) PyArray_EMPTY(1, &dims, NPY_FLOAT64, 0); + if (array == NULL) { + goto out; + } + memcpy(PyArray_DATA(array), self->interval_map->value, size * sizeof(double)); + ret = (PyObject *) array; +out: + return ret; +} + +static PyGetSetDef IntervalMap_getsetters[] = { + {"position", (getter) IntervalMap_get_position, NULL, + "A copy of the position array"}, + {"value", (getter) IntervalMap_get_value, NULL, + "A copy of the value array"}, + {NULL} /* Sentinel */ +}; + +static PyMemberDef IntervalMap_members[] = { + {NULL} /* Sentinel */ +}; + +static PyMethodDef IntervalMap_methods[] = { + {NULL} /* Sentinel */ +}; + +static PyTypeObject IntervalMapType = { + PyVarObject_HEAD_INIT(NULL, 0) + "_msprime.IntervalMap", /* tp_name */ + sizeof(IntervalMap), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)IntervalMap_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + "IntervalMap objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + IntervalMap_methods, /* tp_methods */ + IntervalMap_members, /* tp_members */ + IntervalMap_getsetters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)IntervalMap_init, /* tp_init */ +}; + + /*=================================================================== * MutationGenerator *=================================================================== @@ -1721,6 +1955,7 @@ MutationGenerator_dealloc(MutationGenerator* self) self->mutgen = NULL; } Py_XDECREF(self->random_generator); + Py_XDECREF(self->rate_map); Py_TYPE(self)->tp_free((PyObject*)self); } @@ -1730,17 +1965,19 @@ MutationGenerator_init(MutationGenerator *self, PyObject *args, PyObject *kwds) int ret = -1; int err; int alphabet = 0; - static char *kwlist[] = {"random_generator", "mutation_rate", "alphabet", + static char *kwlist[] = {"random_generator", "rate_map", "alphabet", "start_time", "end_time", NULL}; - double mutation_rate = 0; double start_time = -DBL_MAX; double end_time = DBL_MAX; RandomGenerator *random_generator = NULL; + IntervalMap *rate_map = NULL; self->mutgen = NULL; self->random_generator = NULL; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!d|idd", kwlist, - &RandomGeneratorType, &random_generator, &mutation_rate, + self->rate_map = NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O!|idd", kwlist, + &RandomGeneratorType, &random_generator, + &IntervalMapType, &rate_map, &alphabet, &start_time, &end_time)) { goto out; } @@ -1749,12 +1986,13 @@ MutationGenerator_init(MutationGenerator *self, PyObject *args, PyObject *kwds) if (RandomGenerator_check_state(self->random_generator) != 0) { goto out; } - if (alphabet != MSP_ALPHABET_BINARY && alphabet != MSP_ALPHABET_NUCLEOTIDE) { - PyErr_Format(PyExc_ValueError, "Bad mutation alphabet"); + self->rate_map = rate_map; + Py_INCREF(self->rate_map); + if (IntervalMap_check_state(self->rate_map) != 0) { goto out; } - if (mutation_rate < 0) { - PyErr_Format(PyExc_ValueError, "mutation_rate must be >= 0"); + if (alphabet != MSP_ALPHABET_BINARY && alphabet != MSP_ALPHABET_NUCLEOTIDE) { + PyErr_Format(PyExc_ValueError, "Bad mutation alphabet"); goto out; } self->mutgen = PyMem_Malloc(sizeof(mutgen_t)); @@ -1762,8 +2000,8 @@ MutationGenerator_init(MutationGenerator *self, PyObject *args, PyObject *kwds) PyErr_NoMemory(); goto out; } - err = mutgen_alloc(self->mutgen, mutation_rate, random_generator->rng, - alphabet, 0); + err = mutgen_alloc(self->mutgen, random_generator->rng, + rate_map->interval_map, alphabet, 0); if (err != 0) { handle_library_error(err); goto out; @@ -1778,32 +2016,6 @@ MutationGenerator_init(MutationGenerator *self, PyObject *args, PyObject *kwds) return ret; } -static PyObject * -MutationGenerator_get_mutation_rate(MutationGenerator *self) -{ - PyObject *ret = NULL; - - if (MutationGenerator_check_state(self) != 0) { - goto out; - } - ret = Py_BuildValue("d", self->mutgen->mutation_rate); -out: - return ret; -} - -static PyObject * -MutationGenerator_get_alphabet(MutationGenerator *self) -{ - PyObject *ret = NULL; - - if (MutationGenerator_check_state(self) != 0) { - goto out; - } - ret = Py_BuildValue("i", self->mutgen->alphabet); -out: - return ret; -} - static PyObject * MutationGenerator_generate(MutationGenerator *self, PyObject *args, PyObject *kwds) { @@ -1834,15 +2046,25 @@ MutationGenerator_generate(MutationGenerator *self, PyObject *args, PyObject *kw return ret; } -static PyMemberDef MutationGenerator_members[] = { +static PyObject * +MutationGenerator_get_alphabet(MutationGenerator *self, void *closure) +{ + PyObject *ret = NULL; + if (MutationGenerator_check_state(self) != 0) { + goto out; + } + ret = Py_BuildValue("i", self->mutgen->alphabet); +out: + return ret; +} + +static PyGetSetDef MutationGenerator_getsetters[] = { + {"alphabet", (getter) MutationGenerator_get_alphabet, NULL, + "The alphabet for this generator"}, {NULL} /* Sentinel */ }; static PyMethodDef MutationGenerator_methods[] = { - {"get_mutation_rate", (PyCFunction) MutationGenerator_get_mutation_rate, - METH_NOARGS, "Returns the mutation rate for this mutation generator."}, - {"get_alphabet", (PyCFunction) MutationGenerator_get_alphabet, - METH_NOARGS, "Returns the alphabet for this mutation generator."}, {"generate", (PyCFunction) MutationGenerator_generate, METH_VARARGS|METH_KEYWORDS, "Generate mutations and write to the specified table."}, @@ -1878,8 +2100,8 @@ static PyTypeObject MutationGeneratorType = { 0, /* tp_iter */ 0, /* tp_iternext */ MutationGenerator_methods, /* tp_methods */ - MutationGenerator_members, /* tp_members */ - 0, /* tp_getset */ + 0, /* tp_members */ + MutationGenerator_getsetters, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ @@ -1887,6 +2109,7 @@ static PyTypeObject MutationGeneratorType = { 0, /* tp_dictoffset */ (initproc)MutationGenerator_init, /* tp_init */ }; + /*=================================================================== * RecombinationMap *=================================================================== @@ -1914,17 +2137,6 @@ RecombinationMap_dealloc(RecombinationMap* self) Py_TYPE(self)->tp_free((PyObject*)self); } -static int -double_PyArray_converter(PyObject *in, PyObject **out) -{ - PyObject *ret = PyArray_FROMANY(in, NPY_FLOAT64, 1, 1, NPY_ARRAY_IN_ARRAY); - if (ret == NULL) { - return NPY_FAIL; - } - *out = ret; - return NPY_SUCCEED; -} - static int RecombinationMap_init(RecombinationMap *self, PyObject *args, PyObject *kwds) { @@ -1961,8 +2173,7 @@ RecombinationMap_init(RecombinationMap *self, PyObject *args, PyObject *kwds) PyErr_NoMemory(); goto out; } - err = recomb_map_alloc(self->recomb_map, - positions[size - 1], positions, rates, size, discrete); + err = recomb_map_alloc(self->recomb_map, size, positions, rates, discrete); if (err != 0) { handle_library_error(err); goto out; @@ -4192,6 +4403,15 @@ PyInit__msprime(void) Py_INCREF(&RecombinationMapType); PyModule_AddObject(module, "RecombinationMap", (PyObject *) &RecombinationMapType); + /* IntervalMap type */ + IntervalMapType.tp_new = PyType_GenericNew; + if (PyType_Ready(&IntervalMapType) < 0) { + return NULL;; + } + Py_INCREF(&IntervalMapType); + PyModule_AddObject(module, "IntervalMap", (PyObject *) &IntervalMapType); + + /* Errors and constants */ MsprimeInputError = PyErr_NewException("_msprime.InputError", NULL, NULL); Py_INCREF(MsprimeInputError); diff --git a/lib/dev-tools/dev-cli.c b/lib/dev-tools/dev-cli.c index fb835a656..8793a92b1 100644 --- a/lib/dev-tools/dev-cli.c +++ b/lib/dev-tools/dev-cli.c @@ -602,8 +602,7 @@ read_recomb_map(uint32_t num_loci, recomb_map_t *recomb_map, config_t *config) rates[j] = config_setting_get_float(s); } // TODO Read discrete flag from recombination map - ret = recomb_map_alloc(recomb_map, coordinates[size - 1], - coordinates, rates, size, true); + ret = recomb_map_alloc(recomb_map, size, coordinates, rates, true); if (ret != 0) { fatal_msprime_error(ret, __LINE__); } @@ -762,11 +761,12 @@ run_simulate(const char *conf_file, const char *output_file, int verbose, int nu mutation_params_t mutation_params; msp_t msp; recomb_map_t recomb_map; + interval_map_t mut_map; mutgen_t mutgen; tsk_table_collection_t tables; tsk_treeseq_t tree_seq; - gsl_rng *rng = gsl_rng_alloc(gsl_rng_default); + gsl_rng *rng = gsl_rng_alloc(gsl_rng_default); if (rng == NULL) { fatal_error("No memory"); } @@ -775,8 +775,15 @@ run_simulate(const char *conf_file, const char *output_file, int verbose, int nu fatal_tskit_error(ret, __LINE__); } get_configuration(rng, &msp, &tables, &mutation_params, &recomb_map, conf_file); - ret = mutgen_alloc(&mutgen, mutation_params.mutation_rate, rng, - mutation_params.alphabet, 0); + ret = interval_map_alloc_single(&mut_map, + recomb_map_get_sequence_length(&recomb_map), mutation_params.mutation_rate); + if (ret != 0) { + fatal_msprime_error(ret, __LINE__); + } + ret = mutgen_alloc(&mutgen, rng, &mut_map, mutation_params.alphabet, 0); + if (ret != 0) { + fatal_msprime_error(ret, __LINE__); + } if (ret != 0) { fatal_msprime_error(ret, __LINE__); } @@ -839,6 +846,7 @@ run_simulate(const char *conf_file, const char *output_file, int verbose, int nu msp_free(&msp); recomb_map_free(&recomb_map); + interval_map_free(&mut_map); mutgen_free(&mutgen); gsl_rng_free(rng); tsk_table_collection_free(&tables); diff --git a/lib/interval_map.c b/lib/interval_map.c new file mode 100644 index 000000000..c3c25bc67 --- /dev/null +++ b/lib/interval_map.c @@ -0,0 +1,122 @@ +/* +** Copyright (C) 2020 University of Oxford +** +** This file is part of msprime. +** +** msprime is free software: you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation, either version 3 of the License, or +** (at your option) any later version. +** +** msprime is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with msprime. If not, see . +*/ +#include +#include +#include + +#include "util.h" +#include "msprime.h" + +void +interval_map_print_state(interval_map_t *self, FILE *out) +{ + size_t j; + + fprintf(out, "interval_map (%p):: size = %d\n", (void *) self, (int) self->size); + fprintf(out, "\tsequence_length = %f\n", interval_map_get_sequence_length(self)); + fprintf(out, "\tindex\tposition\tvalue\n"); + for (j = 0; j < self->size; j++) { + fprintf(out, "\t%d\t%f\t%f\n", (int) j, self->position[j], self->value[j]); + } +} + +int MSP_WARN_UNUSED +interval_map_alloc(interval_map_t *self, size_t size, double *position, double *value) +{ + int ret = 0; + size_t j; + + memset(self, 0, sizeof(interval_map_t)); + if (size < 2) { + ret = MSP_ERR_INSUFFICIENT_INTERVALS; + goto out; + } + /* Check the framing position */ + if (position[0] != 0.0) { + ret = MSP_ERR_INTERVAL_MAP_START_NON_ZERO; + goto out; + } + self->position = malloc(size * sizeof(*self->position)); + self->value = malloc(size * sizeof(*self->value)); + if (self->position == NULL || self->value == NULL) { + ret = MSP_ERR_NO_MEMORY; + goto out; + } + self->size = size; + for (j = 0; j < size; j++) { + if (position[j] < 0) { + ret = MSP_ERR_NEGATIVE_INTERVAL_POSITION; + goto out; + } + if (j > 0) { + if (position[j] <= position[j - 1]) { + ret = MSP_ERR_INTERVAL_POSITIONS_UNSORTED; + goto out; + } + } + self->value[j] = value[j]; + self->position[j] = position[j]; + } +out: + return ret; +} + +int MSP_WARN_UNUSED +interval_map_alloc_single(interval_map_t *self, double sequence_length, double value) +{ + double position[2] = {0, sequence_length}; + return interval_map_alloc(self, 2, position, &value); +} + +int +interval_map_free(interval_map_t *self) +{ + msp_safe_free(self->position); + msp_safe_free(self->value); + return 0; +} + +double +interval_map_get_sequence_length(interval_map_t *self) +{ + return self->position[self->size - 1]; +} + +size_t +interval_map_get_size(interval_map_t *self) +{ + return self->size; +} + +size_t +interval_map_get_num_intervals(interval_map_t *self) +{ + return self->size - 1; +} + +size_t +interval_map_get_index(interval_map_t *self, double x) +{ + size_t index = tsk_search_sorted(self->position, self->size, x); + + if (self->position[index] > x) { + index--; + } + return index; +} diff --git a/lib/meson.build b/lib/meson.build index a349901ca..0657501b4 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -23,7 +23,7 @@ extra_c_args = [ msprime_sources =[ 'msprime.c', 'fenwick.c', 'util.c', 'mutgen.c', 'object_heap.c', - 'likelihood.c', 'recomb_map.c'] + 'likelihood.c', 'recomb_map.c', 'interval_map.c'] avl_lib = static_library('avl', sources: ['avl.c']) msprime_lib = static_library('msprime', diff --git a/lib/msprime.c b/lib/msprime.c index f26e781d0..a95b30ff7 100755 --- a/lib/msprime.c +++ b/lib/msprime.c @@ -3218,12 +3218,9 @@ msp_reset_from_samples(msp_t *self) node_id_t u; tsk_id_t tsk_ind; - tsk_population_table_clear(&self->tables->populations); - tsk_edge_table_clear(&self->tables->edges); - tsk_node_table_clear(&self->tables->nodes); - tsk_migration_table_clear(&self->tables->migrations); + tsk_table_collection_clear(self->tables); - self->tables->sequence_length = self->recomb_map.sequence_length; + self->tables->sequence_length = recomb_map_get_sequence_length(&self->recomb_map); for (j = 0; j < self->num_populations; j++) { ret = tsk_population_table_add_row(&self->tables->populations, NULL, 0); if (ret < 0) { diff --git a/lib/msprime.h b/lib/msprime.h index e3370f784..301060371 100644 --- a/lib/msprime.h +++ b/lib/msprime.h @@ -1,5 +1,5 @@ /* -** Copyright (C) 2015-2018 University of Oxford +** Copyright (C) 2015-2020 University of Oxford ** ** This file is part of msprime. ** @@ -97,7 +97,6 @@ typedef struct segment_t_t { struct segment_t_t *next; } segment_t; - typedef struct { double left; /* TODO CHANGE THIS - not a good name! */ uint32_t value; @@ -203,14 +202,16 @@ typedef struct _simulation_model_t { double (*model_rate_to_generation_rate)(struct _simulation_model_t *model, double rm); } simulation_model_t; -/* Recombination map */ +typedef struct { + double *position; + double *value; + size_t size; +} interval_map_t; +/* Recombination map */ typedef struct { - double sequence_length; /* size of the physical coordinate space */ + interval_map_t map; double total_recombination_rate; - size_t size; /* the total number of values in the map */ - double *positions; - double *rates; double *cumulative; bool discrete; } recomb_map_t; @@ -337,9 +338,9 @@ typedef struct { typedef struct { int alphabet; gsl_rng *rng; + interval_map_t *rate_map; double start_time; double end_time; - double mutation_rate; size_t block_size; avl_tree_t sites; tsk_blkalloc_t allocator; @@ -445,24 +446,28 @@ size_t msp_get_num_rejected_common_ancestor_events(msp_t *self); size_t msp_get_num_recombination_events(msp_t *self); size_t msp_get_num_gene_conversion_events(msp_t *self); +int interval_map_alloc(interval_map_t *self, size_t size, + double *position, double *value); +int interval_map_alloc_single(interval_map_t *self, + double sequence_length, double value); +int interval_map_free(interval_map_t *self); +void interval_map_print_state(interval_map_t *self, FILE *out); +double interval_map_get_sequence_length(interval_map_t *self); +size_t interval_map_get_size(interval_map_t *self); +size_t interval_map_get_num_intervals(interval_map_t *self); +size_t interval_map_get_index(interval_map_t *self, double x); + typedef double (*msp_convert_func) (void *obj, double rate); int recomb_map_alloc_uniform(recomb_map_t *self, double sequence_length, double rate, bool discrete); int recomb_map_alloc(recomb_map_t *self, - double sequence_length, double *positions, double *rates, - size_t size, bool discrete); + size_t size, double *position, double *rate, bool discrete); int recomb_map_copy(recomb_map_t *to, recomb_map_t *from); int recomb_map_free(recomb_map_t *self); -uint32_t recomb_map_get_num_loci(recomb_map_t *self); double recomb_map_get_sequence_length(recomb_map_t *self); bool recomb_map_get_discrete(recomb_map_t *self); -double recomb_map_get_per_locus_recombination_rate(recomb_map_t *self); double recomb_map_get_total_recombination_rate(recomb_map_t *self); -double recomb_map_genetic_to_phys(recomb_map_t *self, double genetic_x); -double recomb_map_phys_to_genetic(recomb_map_t *self, double phys_x); -int recomb_map_phys_to_discrete_genetic(recomb_map_t *self, double phys_x, - uint32_t *locus); void recomb_map_convert_rates(recomb_map_t *self, msp_convert_func convert, void *obj); size_t recomb_map_get_size(recomb_map_t *self); int recomb_map_get_positions(recomb_map_t *self, double *positions); @@ -477,8 +482,10 @@ double recomb_map_position_to_mass(recomb_map_t *self, double position); double recomb_map_shift_by_mass(recomb_map_t *self, double pos, double mass); double recomb_map_sample_poisson(recomb_map_t *self, gsl_rng *rng, double start); -int mutgen_alloc(mutgen_t *self, double mutation_rate, gsl_rng *rng, +int mutgen_alloc(mutgen_t *self, gsl_rng *rng, interval_map_t *rate_map, int alphabet, size_t mutation_block_size); +int mutgen_set_rate(mutgen_t *self, double rate, double sequence_length); +int mutgen_set_map(mutgen_t *self, size_t size, double *position, double *rate); int mutgen_set_time_interval(mutgen_t *self, double start_time, double end_time); int mutgen_free(mutgen_t *self); int mutgen_generate(mutgen_t *self, tsk_table_collection_t *tables, int flags); diff --git a/lib/mutgen.c b/lib/mutgen.c index 688a7928a..a541441ea 100644 --- a/lib/mutgen.c +++ b/lib/mutgen.c @@ -1,5 +1,5 @@ /* -** Copyright (C) 2015-2018 University of Oxford +** Copyright (C) 2015-2020 University of Oxford ** ** This file is part of msprime. ** @@ -65,7 +65,8 @@ mutgen_print_state(mutgen_t *self, FILE *out) tsk_size_t j; fprintf(out, "Mutgen state\n"); - fprintf(out, "\tmutation_rate = %f\n", self->mutation_rate); + fprintf(out, "\trate_map:\n"); + interval_map_print_state(self->rate_map, out); fprintf(out, "\tstart_time = %f\n", self->start_time); fprintf(out, "\tend_time = %f\n", self->end_time); tsk_blkalloc_print_state(&self->allocator, out); @@ -85,10 +86,11 @@ mutgen_print_state(mutgen_t *self, FILE *out) } int MSP_WARN_UNUSED -mutgen_alloc(mutgen_t *self, double mutation_rate, gsl_rng *rng, int alphabet, - size_t block_size) +mutgen_alloc(mutgen_t *self, gsl_rng *rng, interval_map_t *rate_map, + int alphabet, size_t block_size) { int ret = 0; + size_t j; assert(rng != NULL); memset(self, 0, sizeof(mutgen_t)); @@ -97,13 +99,29 @@ mutgen_alloc(mutgen_t *self, double mutation_rate, gsl_rng *rng, int alphabet, goto out; } self->alphabet = alphabet; - self->mutation_rate = mutation_rate; self->rng = rng; + self->rate_map = rate_map; self->start_time = -DBL_MAX; self->end_time = DBL_MAX; self->block_size = block_size; avl_init_tree(&self->sites, cmp_site, NULL); + if (block_size == 0) { + block_size = 8192; + } + /* In practice this is the minimum we can support */ + block_size = GSL_MAX(block_size, 128); + ret = tsk_blkalloc_init(&self->allocator, block_size); + if (ret != 0) { + ret = msp_set_tsk_error(ret); + goto out; + } + for (j = 0; j < rate_map->size - 1; j++) { + if (rate_map->value[j] < 0) { + ret = MSP_ERR_BAD_MUTATION_MAP_RATE; + goto out; + } + } out: return ret; } @@ -355,8 +373,10 @@ mutgen_generate(mutgen_t *self, tsk_table_collection_t *tables, int flags) int ret = 0; tsk_node_table_t *nodes = &tables->nodes; tsk_edge_table_t *edges = &tables->edges; - size_t j, l, branch_mutations; - double left, right, branch_length, distance, mu, position; + const double *map_position = self->rate_map->position; + const double *map_rate = self->rate_map->value; + size_t j, l, branch_mutations, map_index; + double left, right, edge_right, branch_length, mu, position; node_id_t parent, child; const mutation_type_t *mutation_types; unsigned long num_mutation_types; @@ -373,22 +393,23 @@ mutgen_generate(mutgen_t *self, tsk_table_collection_t *tables, int flags) if (ret != 0) { goto out; } - if (flags & MSP_KEEP_SITES) { - ret = mutget_initialise_sites(self, tables); - if (ret != 0) { - goto out; - } - } - ret = tsk_site_table_clear(&tables->sites); + ret = tsk_table_collection_check_integrity(tables, TSK_CHECK_OFFSETS); if (ret != 0) { ret = msp_set_tsk_error(ret); goto out; } - ret = tsk_mutation_table_clear(&tables->mutations); - if (ret != 0) { - ret = msp_set_tsk_error(ret); + if (interval_map_get_sequence_length(self->rate_map) != tables->sequence_length) { + ret = MSP_ERR_INCOMPATIBLE_MUTATION_MAP; goto out; } + if (flags & MSP_KEEP_SITES) { + ret = mutget_initialise_sites(self, tables); + if (ret != 0) { + goto out; + } + } + tsk_site_table_clear(&tables->sites); + tsk_mutation_table_clear(&tables->mutations); if (self->alphabet == 0) { mutation_types = binary_mutation_types; @@ -400,32 +421,38 @@ mutgen_generate(mutgen_t *self, tsk_table_collection_t *tables, int flags) for (j = 0; j < edges->num_rows; j++) { left = edges->left[j]; - right = edges->right[j]; - distance = right - left; + edge_right = edges->right[j]; parent = edges->parent[j]; child = edges->child[j]; assert(child >= 0 && child < (node_id_t) nodes->num_rows); branch_start = GSL_MAX(start_time, nodes->time[child]); branch_end = GSL_MIN(end_time, nodes->time[parent]); branch_length = branch_end - branch_start; - mu = branch_length * distance * self->mutation_rate; - branch_mutations = gsl_ran_poisson(self->rng, mu); - for (l = 0; l < branch_mutations; l++) { - /* Rejection sample positions until we get one we haven't seen before. */ - /* TODO add a maximum number of rejections here */ - do { - position = gsl_ran_flat(self->rng, left, right); - search.position = position; - avl_node = avl_search(&self->sites, &search); - } while (avl_node != NULL); - assert(left <= position && position < right); - type = gsl_rng_uniform_int(self->rng, num_mutation_types); - ret = mutgen_add_mutation(self, child, position, - mutation_types[type].ancestral_state, - mutation_types[type].derived_state); - if (ret != 0) { - goto out; + + map_index = interval_map_get_index(self->rate_map, left); + right = 0; + while (right != edge_right) { + right = GSL_MIN(edge_right, map_position[map_index + 1]); + mu = branch_length * (right - left) * map_rate[map_index]; + branch_mutations = gsl_ran_poisson(self->rng, mu); + for (l = 0; l < branch_mutations; l++) { + /* Rejection sample positions until we get one we haven't seen before. */ + /* TODO add a maximum number of rejections here */ + do { + position = gsl_ran_flat(self->rng, left, right); + search.position = position; + avl_node = avl_search(&self->sites, &search); + } while (avl_node != NULL); + assert(left <= position && position < right); + type = gsl_rng_uniform_int(self->rng, num_mutation_types); + ret = mutgen_add_mutation(self, child, position, + mutation_types[type].ancestral_state, + mutation_types[type].derived_state); + if (ret != 0) { + goto out; + } } + map_index++; } } ret = mutgen_populate_tables(self, &tables->sites, &tables->mutations); diff --git a/lib/recomb_map.c b/lib/recomb_map.c index 459636dc3..bb06b43be 100644 --- a/lib/recomb_map.c +++ b/lib/recomb_map.c @@ -1,5 +1,5 @@ /* -** Copyright (C) 2015 University of Oxford +** Copyright (C) 2015-2020 University of Oxford ** ** This file is part of msprime. ** @@ -29,26 +29,30 @@ void recomb_map_print_state(recomb_map_t *self, FILE *out) { - size_t j; - - fprintf(out, "recombination_map (%p):: size = %d\n", (void *) self, (int) self->size); - fprintf(out, "\tsequence_length = %f\n", recomb_map_get_sequence_length(self)); - fprintf(out, "\tindex\tlocation\trate\n"); - for (j = 0; j < self->size; j++) { - fprintf(out, "\t%d\t%f\t%f\n", (int) j, self->positions[j], self->rates[j]); - } + fprintf(out, "recombination_map (%p)\n", (void *) self); + interval_map_print_state(&self->map, out); } -static void +static int recomb_map_init_cumulative_recomb_mass(recomb_map_t *self) { + int ret = 0; size_t j; double s = 0; + const double *position = self->map.position; + const double *rate = self->map.value; + self->cumulative[0] = 0; - for (j = 1; j < self->size; j++) { - s += (self->positions[j] - self->positions[j - 1]) * self->rates[j - 1]; + for (j = 1; j < self->map.size; j++) { + if (rate[j - 1] < 0) { + ret = MSP_ERR_BAD_RECOMBINATION_MAP; + goto out; + } + s += (position[j] - position[j - 1]) * rate[j - 1]; self->cumulative[j] = s; } +out: + return ret; } int MSP_WARN_UNUSED @@ -58,57 +62,32 @@ recomb_map_alloc_uniform(recomb_map_t *self, double sequence_length, double positions[] = {0.0, sequence_length}; double rates[] = {rate, 0.0}; - return recomb_map_alloc(self, sequence_length, positions, rates, 2, discrete); + return recomb_map_alloc(self, 2, positions, rates, discrete); } int MSP_WARN_UNUSED -recomb_map_alloc(recomb_map_t *self, double sequence_length, - double *positions, double *rates, size_t size, bool discrete) +recomb_map_alloc(recomb_map_t *self, size_t size, double *position, double *rate, + bool discrete) { - int ret = MSP_ERR_BAD_RECOMBINATION_MAP; - double length; - size_t j; + int ret = 0; memset(self, 0, sizeof(recomb_map_t)); - if (size < 2) { - goto out; - } - /* Check the framing positions */ - if (positions[0] != 0.0 || positions[size - 1] != sequence_length) { + self->discrete = discrete; + + ret = interval_map_alloc(&self->map, size, position, rate); + if (ret != 0) { goto out; } - if (sequence_length < 1 && discrete) { + if (interval_map_get_sequence_length(&self->map) < 1 && discrete) { + ret = MSP_ERR_BAD_RECOMBINATION_MAP; goto out; } - self->positions = malloc(size * sizeof(double)); - self->rates = malloc(size * sizeof(double)); self->cumulative = malloc(size * sizeof(double)); - - if (self->positions == NULL || self->rates == NULL || self->cumulative == NULL) { + if (self->cumulative == NULL) { ret = MSP_ERR_NO_MEMORY; goto out; } - self->total_recombination_rate = 0.0; - self->size = size; - self->sequence_length = sequence_length; - self->discrete = discrete; - for (j = 0; j < size; j++) { - if (rates[j] < 0 || positions[j] < 0) { - goto out; - } - if (j > 0) { - /* Coordinates must be sorted */ - if (positions[j] <= positions[j - 1]) { - goto out; - } - length = positions[j] - positions[j - 1]; - self->total_recombination_rate += length * rates[j - 1]; - } - self->rates[j] = rates[j]; - self->positions[j] = positions[j]; - } - ret = 0; - recomb_map_init_cumulative_recomb_mass(self); + ret = recomb_map_init_cumulative_recomb_mass(self); out: return ret; } @@ -116,32 +95,22 @@ recomb_map_alloc(recomb_map_t *self, double sequence_length, int recomb_map_copy(recomb_map_t *to, recomb_map_t *from) { - return recomb_map_alloc(to, from->sequence_length, - from->positions, from->rates, from->size, from->discrete); + return recomb_map_alloc(to, from->map.size, + from->map.position, from->map.value, from->discrete); } int recomb_map_free(recomb_map_t *self) { - if (self->positions != NULL) { - free(self->positions); - } - if (self->rates != NULL) { - free(self->rates); - } - if (self->cumulative != NULL) { - free(self->cumulative); - } + interval_map_free(&self->map); + msp_safe_free(self->cumulative); return 0; } -/* Returns the equivalent recombination rate between pairs of - * adjacent loci. - */ double recomb_map_get_total_recombination_rate(recomb_map_t *self) { - return self->total_recombination_rate; + return self->cumulative[self->map.size - 1]; } /* Returns the total physical length of the sequence. @@ -149,7 +118,7 @@ recomb_map_get_total_recombination_rate(recomb_map_t *self) double recomb_map_get_sequence_length(recomb_map_t *self) { - return self->sequence_length; + return interval_map_get_sequence_length(&self->map); } /* Returns a boolean indicating whether the recombination map is discrete. @@ -181,21 +150,24 @@ recomb_map_mass_between_left_exclusive(recomb_map_t *self, double left, double r double recomb_map_position_to_mass(recomb_map_t *self, double pos) { + const double *position = self->map.position; + const double *rate = self->map.value; double offset; size_t index; - if (pos == self->positions[0]) { + if (pos == position[0]) { return 0; } - if (pos >= self->positions[self->size - 1]) { - return self->cumulative[self->size - 1]; + if (pos >= position[self->map.size - 1]) { + return self->cumulative[self->map.size - 1]; } - index = msp_binary_interval_search(pos, self->positions, self->size); + /* TODO replace this with tsk_search_sorted */ + index = msp_binary_interval_search(pos, position, self->map.size); assert(index > 0); index--; - offset = pos - self->positions[index]; + offset = pos - position[index]; - return self->cumulative[index] + offset * self->rates[index]; + return self->cumulative[index] + offset * rate[index]; } /* Finds the physical coordinate such that the sequence up to (but not @@ -204,17 +176,19 @@ recomb_map_position_to_mass(recomb_map_t *self, double pos) double recomb_map_mass_to_position(recomb_map_t *self, double mass) { + const double *position = self->map.position; + const double *rate = self->map.value; double mass_in_interval, pos; size_t index; if (mass == 0.0) { - return self->positions[0]; + return position[0]; } - index = msp_binary_interval_search(mass, self->cumulative, self->size); + index = msp_binary_interval_search(mass, self->cumulative, self->map.size); assert(index > 0); index--; mass_in_interval = mass - self->cumulative[index]; - pos = self->positions[index] + mass_in_interval / self->rates[index]; + pos = position[index] + mass_in_interval / rate[index]; return self->discrete ? floor(pos) : pos; } @@ -245,8 +219,8 @@ void recomb_map_convert_rates(recomb_map_t *self, msp_convert_func convert, void *obj) { size_t i; - for (i = 0; i < self->size; i++) { - self->rates[i] = convert(obj, self->rates[i]); + for (i = 0; i < self->map.size; i++) { + self->map.value[i] = convert(obj, self->map.value[i]); } recomb_map_init_cumulative_recomb_mass(self); } @@ -254,19 +228,18 @@ recomb_map_convert_rates(recomb_map_t *self, msp_convert_func convert, void *obj size_t recomb_map_get_size(recomb_map_t *self) { - return self->size; + return interval_map_get_size(&self->map); } int -recomb_map_get_positions(recomb_map_t *self, double *positions) +recomb_map_get_positions(recomb_map_t *self, double *position) { - memcpy(positions, self->positions, sizeof(double) * self->size); + memcpy(position, self->map.position, sizeof(double) * self->map.size); return 0; } -int recomb_map_get_rates(recomb_map_t *self, double *rates) +int recomb_map_get_rates(recomb_map_t *self, double *rate) { - memcpy(rates, self->rates, sizeof(double) * self->size); + memcpy(rate, self->map.value, sizeof(double) * self->map.size); return 0; - } diff --git a/lib/tests/tests.c b/lib/tests/tests.c index 3d7827882..22dbd20ca 100644 --- a/lib/tests/tests.c +++ b/lib/tests/tests.c @@ -1,5 +1,5 @@ /* -** Copyright (C) 2016-2018 University of Oxford +** Copyright (C) 2016-2020 University of Oxford ** ** This file is part of msprime. ** @@ -2762,12 +2762,15 @@ test_simulation_replicates(void) mutgen_t mutgen; tsk_table_collection_t tables; recomb_map_t recomb_map; + interval_map_t mut_map; CU_ASSERT_FATAL(samples != NULL); CU_ASSERT_FATAL(rng != NULL); ret = recomb_map_alloc_uniform(&recomb_map, m / 2, 1.0, true); CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = interval_map_alloc_single(&mut_map, m / 2, mutation_rate); + CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_table_collection_init(&tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -2792,16 +2795,20 @@ test_simulation_replicates(void) CU_ASSERT_EQUAL(ret, 0); ret = msp_initialise(&msp); CU_ASSERT_EQUAL(ret, 0); - ret = mutgen_alloc(&mutgen, mutation_rate, rng, 0, 3); + ret = mutgen_alloc(&mutgen, rng, &mut_map, 0, 3); CU_ASSERT_EQUAL(ret, 0); for (j = 0; j < num_replicates; j++) { + CU_ASSERT_EQUAL(ret, 0); ret = msp_run(&msp, DBL_MAX, SIZE_MAX); CU_ASSERT_EQUAL(ret, 0); msp_verify(&msp, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = msp_finalise_tables(&msp); + CU_ASSERT_EQUAL_FATAL(ret, 0); ret = mutgen_generate(&mutgen, &tables, 0); + /* printf("ret = %s\n", msp_strerror(ret)); */ + CU_ASSERT_EQUAL_FATAL(ret, 0); tables.sequence_length = m; ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES); @@ -2822,6 +2829,7 @@ test_simulation_replicates(void) free(samples); tsk_table_collection_free(&tables); recomb_map_free(&recomb_map); + interval_map_free(&mut_map); } static void @@ -3365,8 +3373,7 @@ test_simple_recomb_map(void) for (j = 0; j < 3; j++) { seq_length = positions[j][1]; - ret = recomb_map_alloc( - &recomb_map, seq_length, positions[j], rates, 2, true); + ret = recomb_map_alloc(&recomb_map, 2, positions[j], rates, true); CU_ASSERT_EQUAL_FATAL(ret, 0); recomb_map_print_state(&recomb_map, _devnull); CU_ASSERT_EQUAL(recomb_map_get_size(&recomb_map), 2); @@ -3381,12 +3388,12 @@ verify_recomb_maps_equal(recomb_map_t *actual, recomb_map_t *expected) { size_t i; - CU_ASSERT_EQUAL(actual->size, expected->size); + CU_ASSERT_EQUAL(actual->map.size, expected->map.size); CU_ASSERT_EQUAL(actual->discrete, expected->discrete); - CU_ASSERT_EQUAL(actual->sequence_length, expected->sequence_length); - for (i = 0; i < actual->size; i++) { - CU_ASSERT_DOUBLE_EQUAL(actual->positions[i], expected->positions[i], 0.0); - CU_ASSERT_DOUBLE_EQUAL(actual->rates[i], expected->rates[i], 0.0); + for (i = 0; i < actual->map.size; i++) { + CU_ASSERT_DOUBLE_EQUAL(actual->map.position[i], + expected->map.position[i], 0.0); + CU_ASSERT_DOUBLE_EQUAL(actual->map.value[i], expected->map.value[i], 0.0); CU_ASSERT_DOUBLE_EQUAL(actual->cumulative[i], expected->cumulative[i], 1e-6); } } @@ -3406,7 +3413,7 @@ test_recomb_map_copy(void) double rates[] = {1.0, 2.0, 0.0}; double other_rates[] = {2.0, 4.0, 0.0}; - ret = recomb_map_alloc(&map, 2.0, positions, rates, 3, true); + ret = recomb_map_alloc(&map, 3, positions, rates, true); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = recomb_map_copy(©, &map); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -3414,9 +3421,9 @@ test_recomb_map_copy(void) recomb_map_free(&map); recomb_map_free(©); - ret = recomb_map_alloc(&map, 2.0, positions, rates, 3, true); + ret = recomb_map_alloc(&map, 3, positions, rates, true); CU_ASSERT_EQUAL_FATAL(ret, 0); - ret = recomb_map_alloc(&other, 2.0, positions, other_rates, 3, true); + ret = recomb_map_alloc(&other, 3, positions, other_rates, true); CU_ASSERT_EQUAL_FATAL(ret, 0); recomb_map_convert_rates(&map, double_rate, NULL); verify_recomb_maps_equal(&map, &other); @@ -3433,51 +3440,47 @@ test_recomb_map_errors(void) double rates[] = {1.0, 2.0, 0.0}; double short_positions[] = {0.0, 0.25, 0.5}; - ret = recomb_map_alloc(&recomb_map, 1.0, positions, rates, 0, true); - CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_BAD_RECOMBINATION_MAP); + ret = recomb_map_alloc(&recomb_map, 0, positions, rates, true); + CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_INSUFFICIENT_INTERVALS); recomb_map_free(&recomb_map); - ret = recomb_map_alloc(&recomb_map, 0.5, short_positions, rates, 3, true); + ret = recomb_map_alloc(&recomb_map, 3, short_positions, rates, true); CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_BAD_RECOMBINATION_MAP); recomb_map_free(&recomb_map); - ret = recomb_map_alloc(&recomb_map, 1.0, positions, rates, 1, true); - CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_BAD_RECOMBINATION_MAP); - recomb_map_free(&recomb_map); - - ret = recomb_map_alloc(&recomb_map, 2.0, positions, rates, 2, true); - CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_BAD_RECOMBINATION_MAP); + ret = recomb_map_alloc(&recomb_map, 1, positions, rates, true); + CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_INSUFFICIENT_INTERVALS); recomb_map_free(&recomb_map); positions[0] = 1; - ret = recomb_map_alloc(&recomb_map, 1.0, positions, rates, 2, true); - CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_BAD_RECOMBINATION_MAP); + ret = recomb_map_alloc(&recomb_map, 2, positions, rates, true); + CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_INTERVAL_MAP_START_NON_ZERO); recomb_map_free(&recomb_map); positions[0] = 0; positions[1] = 3.0; - ret = recomb_map_alloc(&recomb_map, 2.0, positions, rates, 3, true); - CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_BAD_RECOMBINATION_MAP); + ret = recomb_map_alloc(&recomb_map, 3, positions, rates, true); + CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_INTERVAL_POSITIONS_UNSORTED); recomb_map_free(&recomb_map); positions[1] = 1.0; positions[0] = -1; - ret = recomb_map_alloc(&recomb_map, 2.0, positions, rates, 3, true); - CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_BAD_RECOMBINATION_MAP); + ret = recomb_map_alloc(&recomb_map, 3, positions, rates, true); + CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_INTERVAL_MAP_START_NON_ZERO); recomb_map_free(&recomb_map); positions[0] = 0.0; rates[0] = -1; - ret = recomb_map_alloc(&recomb_map, 2.0, positions, rates, 3, true); + ret = recomb_map_alloc(&recomb_map, 3, positions, rates, true); CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_BAD_RECOMBINATION_MAP); recomb_map_free(&recomb_map); rates[0] = 1.0; - ret = recomb_map_alloc(&recomb_map, 2.0, positions, rates, 3, true); + ret = recomb_map_alloc(&recomb_map, 3, positions, rates, true); CU_ASSERT_EQUAL_FATAL(ret, 0); recomb_map_free(&recomb_map); - ret = recomb_map_alloc(&recomb_map, 0.5, short_positions, rates, 3, false); + ret = recomb_map_alloc(&recomb_map, 3, short_positions, rates, false); CU_ASSERT_EQUAL_FATAL(ret, 0); recomb_map_free(&recomb_map); } @@ -3497,8 +3500,7 @@ verify_recomb_map(double length, double *positions, double *rates, size_t size) CU_ASSERT_FATAL(ret_rates != NULL); CU_ASSERT_FATAL(ret_positions != NULL); - ret = recomb_map_alloc(&recomb_map, length, - positions, rates, size, true); + ret = recomb_map_alloc(&recomb_map, size, positions, rates, true); CU_ASSERT_EQUAL_FATAL(ret, 0); recomb_map_print_state(&recomb_map, _devnull); CU_ASSERT_EQUAL(recomb_map_get_size(&recomb_map), size); @@ -3538,10 +3540,9 @@ static void test_translate_position_and_recomb_mass(void) { recomb_map_t map; - double seq_length = 20; double p1[] = {0, 6, 13, 20}; double r1[] = {3, 0, 1, 0}; - recomb_map_alloc(&map, seq_length, p1, r1, 4, true); + recomb_map_alloc(&map, 4, p1, r1, true); /* interval edges */ CU_ASSERT_EQUAL(recomb_map_position_to_mass(&map, 0), 0); @@ -3571,13 +3572,13 @@ test_recomb_map_mass_between(void) { recomb_map_t discrete_map; recomb_map_t cont_map; - double seq_length = 20; double p1[] = {0, 6, 13, 20}; double r1[] = {3, 0, 1, 0}; - recomb_map_alloc(&discrete_map, seq_length, p1, r1, 4, true); - recomb_map_alloc(&cont_map, seq_length, p1, r1, 4, false); double tol = 1e-9; + recomb_map_alloc(&discrete_map, 4, p1, r1, true); + recomb_map_alloc(&cont_map, 4, p1, r1, false); + CU_ASSERT_DOUBLE_EQUAL_FATAL(recomb_map_mass_between(&discrete_map, 0, 2), 6, tol); CU_ASSERT_DOUBLE_EQUAL_FATAL(recomb_map_mass_between(&cont_map, 0, 2), 6, tol); CU_ASSERT_DOUBLE_EQUAL_FATAL( @@ -4091,6 +4092,7 @@ insert_single_tree(tsk_table_collection_t *tables) "0 1 6 4,5\n"; */ int ret; + tables->sequence_length = 1.0; ret = tsk_node_table_add_row(&tables->nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, TSK_NULL, NULL, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -4123,11 +4125,103 @@ insert_single_tree(tsk_table_collection_t *tables) ret = tsk_edge_table_add_row(&tables->edges, 0, 1, 6, 5); CU_ASSERT_FATAL(ret >= 0); + ret = tsk_population_table_add_row(&tables->populations, NULL, 0); + CU_ASSERT_FATAL(ret == 0); + /* Add a site and a mutation */ ret = tsk_site_table_add_row(&tables->sites, 0.1, "A", 1, NULL, 0); CU_ASSERT_FATAL(ret >= 0); ret = tsk_mutation_table_add_row(&tables->mutations, 0, 0, -1, "C", 1, NULL, 0); CU_ASSERT_FATAL(ret >= 0); + ret = tsk_table_collection_check_integrity(tables, TSK_CHECK_OFFSETS); + + CU_ASSERT_FATAL(ret == 0); +} + +static void +test_mutgen_simple_map(void) +{ + int ret = 0; + mutgen_t mutgen; + gsl_rng *rng = gsl_rng_alloc(gsl_rng_default); + tsk_table_collection_t tables; + interval_map_t rate_map; + double pos[] = {0, 0.1, 0.2, 0.3, 0.4, 1.0}; + double rate[] = {0, 0.01, 0.02, 0.03, 0.04, 0.0}; + + CU_ASSERT_FATAL(rng != NULL); + ret = tsk_table_collection_init(&tables, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + insert_single_tree(&tables); + + ret = interval_map_alloc(&rate_map, 6, pos, rate); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret = mutgen_alloc(&mutgen, rng, &rate_map, 0, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(mutgen.rate_map->size, 6); + CU_ASSERT_EQUAL_FATAL(mutgen.rate_map->position[0], 0); + CU_ASSERT_EQUAL_FATAL(mutgen.rate_map->position[1], 0.1); + CU_ASSERT_EQUAL_FATAL(mutgen.rate_map->position[2], 0.2); + CU_ASSERT_EQUAL_FATAL(mutgen.rate_map->position[3], 0.3); + CU_ASSERT_EQUAL_FATAL(mutgen.rate_map->position[4], 0.4); + CU_ASSERT_EQUAL_FATAL(mutgen.rate_map->position[5], 1.0); + CU_ASSERT_EQUAL_FATAL(mutgen.rate_map->value[0], 0); + CU_ASSERT_EQUAL_FATAL(mutgen.rate_map->value[1], 0.01); + CU_ASSERT_EQUAL_FATAL(mutgen.rate_map->value[2], 0.02); + CU_ASSERT_EQUAL_FATAL(mutgen.rate_map->value[3], 0.03); + CU_ASSERT_EQUAL_FATAL(mutgen.rate_map->value[4], 0.04); + CU_ASSERT_EQUAL_FATAL(mutgen.rate_map->value[5], 0); + + ret = mutgen_generate(&mutgen, &tables, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + tables.sequence_length = 2.0; + ret = mutgen_generate(&mutgen, &tables, 0); + CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_INCOMPATIBLE_MUTATION_MAP); + + mutgen_free(&mutgen); + interval_map_free(&rate_map); + tsk_table_collection_free(&tables); + gsl_rng_free(rng); +} + +static void +test_mutgen_errors(void) +{ + int ret = 0; + mutgen_t mutgen; + gsl_rng *rng = gsl_rng_alloc(gsl_rng_default); + tsk_table_collection_t tables; + interval_map_t rate_map; + + CU_ASSERT_FATAL(rng != NULL); + ret = tsk_table_collection_init(&tables, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + insert_single_tree(&tables); + + ret = interval_map_alloc_single(&rate_map, 1, -1); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = mutgen_alloc(&mutgen, rng, &rate_map, 0, 1); + CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_BAD_MUTATION_MAP_RATE); + interval_map_free(&rate_map); + mutgen_free(&mutgen); + + ret = interval_map_alloc_single(&rate_map, 5, 1); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = mutgen_alloc(&mutgen, rng, &rate_map, 0, 1); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = mutgen_generate(&mutgen, &tables, 0); + CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_INCOMPATIBLE_MUTATION_MAP); + + tables.sequence_length = 0.1; + ret = mutgen_generate(&mutgen, &tables, 0); + CU_ASSERT_FATAL(msp_is_tsk_error(ret)); + + mutgen_free(&mutgen); + interval_map_free(&rate_map); + tsk_table_collection_free(&tables); + gsl_rng_free(rng); } static void @@ -4138,6 +4232,7 @@ test_single_tree_mutgen(void) mutgen_t mutgen; gsl_rng *rng = gsl_rng_alloc(gsl_rng_default); tsk_table_collection_t tables1, tables2; + interval_map_t rate_map; CU_ASSERT_FATAL(rng != NULL); ret = tsk_table_collection_init(&tables1, 0); @@ -4147,7 +4242,9 @@ test_single_tree_mutgen(void) insert_single_tree(&tables1); insert_single_tree(&tables2); - ret = mutgen_alloc(&mutgen, 0.0, rng, 0, 100); + ret = interval_map_alloc_single(&rate_map, 1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = mutgen_alloc(&mutgen, rng, &rate_map, 0, 100); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = mutgen_generate(&mutgen, &tables1, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -4157,7 +4254,9 @@ test_single_tree_mutgen(void) CU_ASSERT_EQUAL_FATAL(ret, 0); gsl_rng_set(rng, 1); - ret = mutgen_alloc(&mutgen, 10.0, rng, 0, 100); + /* Cheat a bit to avoid reallocating a new rate_map */ + rate_map.value[0] = 10; + ret = mutgen_alloc(&mutgen, rng, &rate_map, 0, 100); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = mutgen_generate(&mutgen, &tables1, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -4178,7 +4277,7 @@ test_single_tree_mutgen(void) * block size. */ gsl_rng_set(rng, 1); - ret = mutgen_alloc(&mutgen, 10.0, rng, 0, 1); + ret = mutgen_alloc(&mutgen, rng, &rate_map, 0, 1); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = mutgen_generate(&mutgen, &tables2, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -4188,6 +4287,7 @@ test_single_tree_mutgen(void) tsk_table_collection_free(&tables1); tsk_table_collection_free(&tables2); + interval_map_free(&rate_map); gsl_rng_free(rng); } @@ -4198,6 +4298,7 @@ test_single_tree_mutgen_keep_sites(void) gsl_rng *rng = gsl_rng_alloc(gsl_rng_default); tsk_table_collection_t tables; tsk_table_collection_t copy; + interval_map_t rate_map; mutgen_t mutgen; ret = tsk_table_collection_init(&tables, 0); @@ -4210,15 +4311,19 @@ test_single_tree_mutgen_keep_sites(void) /* With a mutation rate of 0, we should keep exactly the same set * of mutations */ - ret = mutgen_alloc(&mutgen, 0.0, rng, 0, 0); + ret = interval_map_alloc_single(&rate_map, 1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = mutgen_alloc(&mutgen, rng, &rate_map, 0, 1); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = mutgen_generate(&mutgen, &tables, MSP_KEEP_SITES); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, ©)); mutgen_free(&mutgen); + /* Turn up the mutation rate */ + rate_map.value[0] = 10; gsl_rng_set(rng, 2); - ret = mutgen_alloc(&mutgen, 10.0, rng, 0, 0); + ret = mutgen_alloc(&mutgen, rng, &rate_map, 0, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = mutgen_generate(&mutgen, &tables, MSP_KEEP_SITES); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -4229,7 +4334,7 @@ test_single_tree_mutgen_keep_sites(void) /* If we run precisely the same mutations again we should rejection * sample away all of the original positions */ gsl_rng_set(rng, 2); - ret = mutgen_alloc(&mutgen, 10.0, rng, 0, 0); + ret = mutgen_alloc(&mutgen, rng, &rate_map, 0, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = mutgen_generate(&mutgen, &tables, MSP_KEEP_SITES); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -4246,6 +4351,7 @@ test_single_tree_mutgen_keep_sites(void) tsk_table_collection_free(&tables); tsk_table_collection_free(©); gsl_rng_free(rng); + interval_map_free(&rate_map); } static void @@ -4256,6 +4362,7 @@ test_single_tree_mutgen_keep_sites_many_mutations(void) gsl_rng *rng = gsl_rng_alloc(gsl_rng_default); tsk_table_collection_t tables; mutgen_t mutgen; + interval_map_t rate_map; ret = tsk_table_collection_init(&tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -4266,8 +4373,10 @@ test_single_tree_mutgen_keep_sites_many_mutations(void) CU_ASSERT_EQUAL_FATAL(ret, j + 1); } + ret = interval_map_alloc_single(&rate_map, 1, 10); + CU_ASSERT_EQUAL_FATAL(ret, 0); gsl_rng_set(rng, 2); - ret = mutgen_alloc(&mutgen, 10.0, rng, 0, 1); + ret = mutgen_alloc(&mutgen, rng, &rate_map, 0, 1); CU_ASSERT_EQUAL_FATAL(ret, 0); for (j = 0; j < 10; j++) { @@ -4278,6 +4387,7 @@ test_single_tree_mutgen_keep_sites_many_mutations(void) CU_ASSERT_TRUE(tables.sites.num_rows > 2); mutgen_free(&mutgen); + interval_map_free(&rate_map); tsk_table_collection_free(&tables); gsl_rng_free(rng); } @@ -4289,6 +4399,7 @@ test_single_tree_mutgen_interval(void) mutgen_t mutgen; gsl_rng *rng = gsl_rng_alloc(gsl_rng_default); tsk_table_collection_t tables1; + interval_map_t rate_map; size_t j; tsk_id_t node; @@ -4298,7 +4409,9 @@ test_single_tree_mutgen_interval(void) CU_ASSERT_EQUAL_FATAL(ret, 0); insert_single_tree(&tables1); - ret = mutgen_alloc(&mutgen, 10.0, rng, 0, 100); + ret = interval_map_alloc_single(&rate_map, 1, 10); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = mutgen_alloc(&mutgen, rng, &rate_map, 0, 100); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = mutgen_generate(&mutgen, &tables1, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -4340,6 +4453,7 @@ test_single_tree_mutgen_interval(void) ret = mutgen_free(&mutgen); CU_ASSERT_EQUAL_FATAL(ret, 0); + interval_map_free(&rate_map); tsk_table_collection_free(&tables1); gsl_rng_free(rng); } @@ -4661,6 +4775,46 @@ test_sweep_genic_selection_time_change(void) recomb_map_free(&recomb_map); } +static void +test_interval_map(void) +{ + int ret; + size_t j; + interval_map_t imap; + double position[] = {0, 1, 2, 3, 4, 5}; + double value[] = {0.1, 1.1, 2.1, 3.1, 4.1, 5.1}; + + ret = interval_map_alloc(&imap, 0, NULL, NULL); + CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_INSUFFICIENT_INTERVALS); + interval_map_free(&imap); + + ret = interval_map_alloc(&imap, 1, NULL, NULL); + CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_INSUFFICIENT_INTERVALS); + interval_map_free(&imap); + + for (j = 2; j < 6; j++) { + ret = interval_map_alloc(&imap, j, position, value); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL(interval_map_get_size(&imap), j); + CU_ASSERT_EQUAL(interval_map_get_num_intervals(&imap), j - 1); + CU_ASSERT_EQUAL(interval_map_get_sequence_length(&imap), j - 1); + interval_map_print_state(&imap, _devnull); + interval_map_free(&imap); + } + + position[1] = -1; + ret = interval_map_alloc(&imap, 6, position, value); + CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_NEGATIVE_INTERVAL_POSITION); + interval_map_free(&imap); + position[1] = 0; + + position[0] = 1; + ret = interval_map_alloc(&imap, 6, position, value); + CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_INTERVAL_MAP_START_NON_ZERO); + interval_map_free(&imap); + +} + static void test_strerror(void) { @@ -4814,6 +4968,8 @@ main(int argc, char **argv) {"test_simulate_from_incompatible", test_simulate_from_incompatible}, {"test_simulate_init_errors", test_simulate_init_errors}, + {"test_mutgen_simple_map", test_mutgen_simple_map}, + {"test_mutgen_errors", test_mutgen_errors}, {"test_single_tree_mutgen", test_single_tree_mutgen}, {"test_single_tree_mutgen_keep_sites", test_single_tree_mutgen_keep_sites}, {"test_single_tree_mutgen_keep_sites_many_mutations", @@ -4830,6 +4986,8 @@ main(int argc, char **argv) {"test_sweep_genic_selection_time_change", test_sweep_genic_selection_time_change}, + {"test_interval_map", test_interval_map}, + {"test_strerror", test_strerror}, {"test_strerror_tskit", test_strerror_tskit}, CU_TEST_INFO_NULL, diff --git a/lib/util.c b/lib/util.c index 4d7a8bd1d..e7328ec6c 100644 --- a/lib/util.c +++ b/lib/util.c @@ -1,5 +1,5 @@ /* -** Copyright (C) 2015-2019 University of Oxford +** Copyright (C) 2015-2020 University of Oxford ** ** This file is part of msprime. ** @@ -169,6 +169,21 @@ msp_strerror_internal(int err) case MSP_ERR_BAD_TRUNCATION_POINT: ret = "Bad truncation_point. Must have 0 < truncation_point <= 1"; break; + case MSP_ERR_BAD_MUTATION_MAP_RATE: + ret = "Bad mutation rate; must be >= 0."; + break; + case MSP_ERR_INCOMPATIBLE_MUTATION_MAP: + ret = "Mutation map is not compatible with specified tables."; + break; + case MSP_ERR_INSUFFICIENT_INTERVALS: + ret = "At least one interval must be specified."; + break; + case MSP_ERR_INTERVAL_MAP_START_NON_ZERO: + ret = "The first interval must start with zero"; + break; + case MSP_ERR_INTERVAL_POSITIONS_UNSORTED: + ret = "Interval positions must be listed in increasing order"; + break; default: ret = "Error occurred generating error string. Please file a bug " "report!"; @@ -178,7 +193,6 @@ msp_strerror_internal(int err) return ret; } - int msp_set_tsk_error(int err) { @@ -219,7 +233,7 @@ __msp_safe_free(void **ptr) { * Assumes `values` are sorted */ size_t -msp_binary_interval_search(double query, double *values, size_t n_values) +msp_binary_interval_search(double query, const double *values, size_t n_values) { if (n_values == 0) { return 0; diff --git a/lib/util.h b/lib/util.h index e1bc4de87..9c108f46f 100644 --- a/lib/util.h +++ b/lib/util.h @@ -1,5 +1,5 @@ /* -** Copyright (C) 2015-2018 University of Oxford +** Copyright (C) 2015-2020 University of Oxford ** ** This file is part of msprime. ** @@ -78,6 +78,12 @@ #define MSP_ERR_BAD_PEDIGREE_ID -40 #define MSP_ERR_BAD_ALPHA -41 #define MSP_ERR_BAD_TRUNCATION_POINT -42 +#define MSP_ERR_BAD_MUTATION_MAP_RATE -43 +#define MSP_ERR_INCOMPATIBLE_MUTATION_MAP -44 +#define MSP_ERR_INSUFFICIENT_INTERVALS -45 +#define MSP_ERR_INTERVAL_MAP_START_NON_ZERO -46 +#define MSP_ERR_NEGATIVE_INTERVAL_POSITION -47 +#define MSP_ERR_INTERVAL_POSITIONS_UNSORTED -48 /* This bit is 0 for any errors originating from tskit */ #define MSP_TSK_ERR_BIT 13 @@ -89,6 +95,6 @@ void __msp_safe_free(void **ptr); #define msp_safe_free(pointer) __msp_safe_free((void **) &(pointer)) -size_t msp_binary_interval_search(double query, double *values, size_t n_values); +size_t msp_binary_interval_search(double query, const double *values, size_t n_values); #endif /*__UTIL_H__*/ diff --git a/msprime/cli.py b/msprime/cli.py index ce492d4a8..6303c0119 100644 --- a/msprime/cli.py +++ b/msprime/cli.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2015-2018 University of Oxford +# Copyright (C) 2015-2020 University of Oxford # # This file is part of msprime. # @@ -213,8 +213,13 @@ def __init__( self._random_generator = msprime.RandomGenerator(seed) self._ms_random_seeds = ms_seeds self._simulator.random_generator = self._random_generator + rate_map = msprime.IntervalMap( + position=[0, self._simulator.sequence_length], + value=[self._mutation_rate, 0] + ) self._mutation_generator = msprime.MutationGenerator( - self._random_generator, self._mutation_rate) + self._random_generator, rate_map + ) def get_num_replicates(self): """ diff --git a/msprime/mutations.py b/msprime/mutations.py index 1d30172ae..3c82a015e 100644 --- a/msprime/mutations.py +++ b/msprime/mutations.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2018 University of Oxford +# Copyright (C) 2018-2020 University of Oxford # # This file is part of msprime. # @@ -63,6 +63,15 @@ def asdict(self): self.__init__).parameters.keys() if hasattr(self, key)} +class MutationMap(object): + """ + TODO document + """ + def __init__(self, position, rate): + self.position = position + self.rate = rate + + def mutate( tree_sequence, rate=None, random_seed=None, model=None, keep=False, start_time=None, end_time=None): @@ -160,8 +169,12 @@ def mutate( encoded_provenance = provenance.json_encode_provenance( provenance.get_provenance_dict(parameters)) + rate_map = simulations.IntervalMap( + position=[0, tree_sequence.sequence_length], + value=[rate, 0] + ) mutation_generator = _msprime.MutationGenerator( - rng, rate, alphabet=alphabet, start_time=start_time, end_time=end_time) + rng, rate_map, alphabet=alphabet, start_time=start_time, end_time=end_time) lwt = _msprime.LightweightTableCollection() lwt.fromdict(tables.asdict()) mutation_generator.generate(lwt, keep=keep) diff --git a/msprime/simulations.py b/msprime/simulations.py index bf18554ab..efed20420 100644 --- a/msprime/simulations.py +++ b/msprime/simulations.py @@ -48,6 +48,7 @@ # and may be removed in future versions. from _msprime import RandomGenerator from _msprime import MutationGenerator +from _msprime import IntervalMap # Make sure the GSL error handler is turned off so that we can be sure that # we don't abort on errors. This can be reset by using the function @@ -549,7 +550,11 @@ def simulate( "Cannot specify mutation rate combined with a non-zero " "start_time. Please use msprime.mutate on the returned " "tree sequence instead") - mutation_generator = MutationGenerator(rng, mutation_rate) + rate_map = IntervalMap( + position=[0, sim.sequence_length], + value=[mutation_rate, 0] + ) + mutation_generator = MutationGenerator(rng, rate_map) if replicate_index is not None and random_seed is None: raise ValueError("Cannot specify replicate_index without random_seed as this " @@ -806,7 +811,8 @@ def create_ll_instance(self): ll_demographic_events = [ event.get_ll_representation(d) for event in self.demographic_events] ll_recomb_map = self.recombination_map.get_ll_recombination_map() - self.ll_tables = _msprime.LightweightTableCollection() + self.ll_tables = _msprime.LightweightTableCollection( + self.recombination_map.get_sequence_length()) if self.from_ts is not None: from_ts_tables = self.from_ts.tables.asdict() # Clear the provenance as it's included in the new ts's provenance diff --git a/setup.py b/setup.py index 116d1ae53..5766dd8a2 100644 --- a/setup.py +++ b/setup.py @@ -87,7 +87,7 @@ def finalize_options(self): msp_source_files = [ "msprime.c", "fenwick.c", "avl.c", "util.c", "object_heap.c", "recomb_map.c", "mutgen.c", - "likelihood.c" + "likelihood.c", "interval_map.c" ] tsk_source_files = ["core.c", "tables.c", "trees.c"] kas_source_files = ["kastore.c"] diff --git a/tests/test_highlevel.py b/tests/test_highlevel.py index a266524d1..2fa62993d 100644 --- a/tests/test_highlevel.py +++ b/tests/test_highlevel.py @@ -797,14 +797,16 @@ def test_no_mutations_with_start_time(self): def test_mutation_generator_unsupported(self): n = 10 - mutgen = msprime.MutationGenerator(msprime.RandomGenerator(1), 1) + rate_map = msprime.IntervalMap([0, 1], [1, 0]) + mutgen = msprime.MutationGenerator(msprime.RandomGenerator(1), rate_map) with self.assertRaises(ValueError): msprime.simulate(n, mutation_generator=mutgen) def test_mutation_interface(self): - for bad_type in ["x", [], {}]: - self.assertRaises( - TypeError, msprime.simulate, 10, mutation_rate=bad_type) + for bad_type in [{}, self]: + self.assertRaises(TypeError, msprime.simulate, 10, mutation_rate=bad_type) + for bad_value in ["x", [], [[], []]]: + self.assertRaises(ValueError, msprime.simulate, 10, mutation_rate=bad_value) def test_recombination(self): n = 10 diff --git a/tests/test_lowlevel.py b/tests/test_lowlevel.py index e02f5f604..3b7e91e9d 100644 --- a/tests/test_lowlevel.py +++ b/tests/test_lowlevel.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 University of Oxford +# Copyright (C) 2015-2020 University of Oxford # # This file is part of msprime. # @@ -1979,6 +1979,85 @@ def test_seed(self): rng = _msprime.RandomGenerator(s) self.assertEqual(rng.get_seed(), s) + def test_flat_errors(self): + rng = _msprime.RandomGenerator(1) + self.assertRaises(TypeError, rng.flat) + self.assertRaises(TypeError, rng.flat, 0) + for bad_type in ["as", [], None]: + self.assertRaises(TypeError, rng.flat, bad_type, 1) + self.assertRaises(TypeError, rng.flat, 0, bad_type) + + def test_flat(self): + for seed in [1, 2, 2**32 - 1]: + rng1 = _msprime.RandomGenerator(seed) + rng2 = _msprime.RandomGenerator(seed) + values = [0, 1, 10, -10, 1e200, -1e200] + for a, b in itertools.product(values, repeat=2): + self.assertEqual(rng1.flat(a, b), rng2.flat(a, b)) + + def test_poisson_errors(self): + rng = _msprime.RandomGenerator(1) + self.assertRaises(TypeError, rng.poisson) + for bad_type in ["as", [], None]: + self.assertRaises(TypeError, rng.poisson, bad_type) + + def test_poisson(self): + for seed in [1, 2, 2**32 - 1]: + rng1 = _msprime.RandomGenerator(seed) + rng2 = _msprime.RandomGenerator(seed) + values = [0.001, 1e-6, 0, 1, 10, -10, 100] + for mu in values: + self.assertEqual(rng1.poisson(mu), rng2.poisson(mu)) + + def test_uniform_int_errors(self): + rng = _msprime.RandomGenerator(1) + self.assertRaises(TypeError, rng.uniform_int) + for bad_type in ["as", [], None]: + self.assertRaises(TypeError, rng.uniform_int, bad_type) + + def test_uniform_int(self): + for seed in [1, 2, 2**32 - 1]: + rng1 = _msprime.RandomGenerator(seed) + rng2 = _msprime.RandomGenerator(seed) + values = [-1, 0, 1, 2, 10, 100, 2**31] + for n in values: + self.assertEqual(rng1.uniform_int(n), rng2.uniform_int(n)) + + +class TestIntervalMap(unittest.TestCase): + """ + Tests for the IntervalMap class. + """ + def test_basic_constructor(self): + self.assertRaises(TypeError, _msprime.IntervalMap) + self.assertRaises(TypeError, _msprime.IntervalMap, []) + im = _msprime.IntervalMap([0, 1], [0, 0]) + self.assertTrue(np.array_equal(im.position, [0, 1])) + self.assertTrue(np.array_equal(im.value, [0, 0])) + + def test_input_errors(self): + for bad_value in [-1, 0]: + self.assertRaises( + _msprime.LibraryError, _msprime.IntervalMap, [0, bad_value], [0, 0]) + with self.assertRaises(_msprime.LibraryError): + _msprime.IntervalMap([], []) + with self.assertRaises(_msprime.LibraryError): + _msprime.IntervalMap([0], [0]) + with self.assertRaises(ValueError): + _msprime.IntervalMap([0], [1, 2]) + with self.assertRaises(ValueError): + _msprime.IntervalMap([0, 1], [1]) + + def test_good_inputs(self): + good_values = [ + ([0, 1], [0, 2]), ([0, 0.1, 10], [0, 2, 0]), + (np.arange(10), np.arange(10)), + (np.arange(100), np.zeros(100))] + for pos, value in good_values: + im = _msprime.IntervalMap(pos, value) + self.assertTrue(np.array_equal(pos, im.position)) + self.assertTrue(np.array_equal(value, im.value)) + class TestMutationGenerator(unittest.TestCase): """ @@ -1986,65 +2065,68 @@ class TestMutationGenerator(unittest.TestCase): """ def test_basic_constructor(self): self.assertRaises(TypeError, _msprime.MutationGenerator) - mg = _msprime.MutationGenerator(_msprime.RandomGenerator(1), 0) - self.assertEqual(mg.get_mutation_rate(), 0) + self.assertRaises(TypeError, _msprime.MutationGenerator, []) + imap = _msprime.IntervalMap([0, 1], [0, 0]) + mg = _msprime.MutationGenerator(_msprime.RandomGenerator(1), imap) + self.assertEqual(mg.alphabet, 0) def test_rng(self): for bad_type in ["x", {}, None]: self.assertRaises( - TypeError, _msprime.MutationGenerator, random_generator=bad_type) + TypeError, _msprime.MutationGenerator, random_generator=bad_type, + rate=[0], position=[0]) - def test_mutation_rate(self): + def test_mutation_map(self): rng = _msprime.RandomGenerator(1) - for bad_type in ["x", {}, None]: - self.assertRaises(TypeError, _msprime.MutationGenerator, rng, bad_type) - for bad_value in [-1, -1e-3]: - self.assertRaises(ValueError, _msprime.MutationGenerator, rng, bad_value) - for rate in [0, 1e-12, 1e12, 1000, 0.01]: - mutgen = _msprime.MutationGenerator(rng, rate) - self.assertEqual(mutgen.get_mutation_rate(), rate) + for bad_type in ["x", {}, None, [[], []]]: + self.assertRaises( + TypeError, _msprime.MutationGenerator, rng, rate_map=bad_type) def test_time_interval(self): rng = _msprime.RandomGenerator(1) + imap = _msprime.IntervalMap([0, 1], [0, 0]) for bad_type in ["x", {}, None]: with self.assertRaises(TypeError): - _msprime.MutationGenerator(rng, 0, start_time=bad_type) + _msprime.MutationGenerator(rng, imap, start_time=bad_type) with self.assertRaises(TypeError): - _msprime.MutationGenerator(rng, 0, end_time=bad_type) + _msprime.MutationGenerator(rng, imap, end_time=bad_type) for start_time, end_time in [(1, 0), (-1, -2), (200, 100)]: with self.assertRaises(_msprime.LibraryError): _msprime.MutationGenerator( - rng, 0, start_time=start_time, end_time=end_time) + rng, imap, start_time=start_time, end_time=end_time) def test_alphabet(self): rng = _msprime.RandomGenerator(1) + rate_map = _msprime.IntervalMap([0, 1], [2, 0]) for bad_type in ["x", {}, None]: self.assertRaises( TypeError, _msprime.MutationGenerator, random_generator=rng, - mutation_rate=0, alphabet=bad_type) + rate_map=rate_map, alphabet=bad_type) for bad_value in [-1, 2, 10**6]: self.assertRaises( ValueError, _msprime.MutationGenerator, random_generator=rng, - mutation_rate=0, alphabet=bad_value) + rate_map=rate_map, alphabet=bad_value) for alphabet in [0, 1]: mg = _msprime.MutationGenerator( - random_generator=rng, mutation_rate=0, alphabet=alphabet) - self.assertEqual(alphabet, mg.get_alphabet()) + random_generator=rng, rate_map=rate_map, alphabet=alphabet) + self.assertEqual(alphabet, mg.alphabet) def test_generate_interface(self): rng = _msprime.RandomGenerator(1) - mutgen = _msprime.MutationGenerator(rng, 2) - tables = _msprime.LightweightTableCollection() + imap = _msprime.IntervalMap([0, 1], [1, 0]) + mutgen = _msprime.MutationGenerator(rng, imap) + tables = _msprime.LightweightTableCollection(1) for bad_type in [None, [], {}]: self.assertRaises(TypeError, mutgen.generate, bad_type) self.assertRaises(TypeError, mutgen.generate, tables, bad_type) for _ in range(10): - tables = _msprime.LightweightTableCollection() + tables = _msprime.LightweightTableCollection(1) mutgen.generate(tables) def verify_block_size(self, tables): rng = _msprime.RandomGenerator(1) - mutgen = _msprime.MutationGenerator(rng, 2) + rate_map = _msprime.IntervalMap([0, tables.sequence_length], [2, 0]) + mutgen = _msprime.MutationGenerator(rng, rate_map) ll_tables = _msprime.LightweightTableCollection() ll_tables.fromdict(tables.asdict()) mutgen.generate(ll_tables, keep=True) @@ -2052,28 +2134,32 @@ def verify_block_size(self, tables): self.assertEqual(tables, after_tables) def test_keep_mutations_block_size_mutations(self): - tables = tskit.TableCollection() + tables = tskit.TableCollection(1) + tables.nodes.add_row(0) tables.sites.add_row(0, "a") for j in range(8192): tables.mutations.add_row(0, node=0, derived_state="b") self.verify_block_size(tables) def test_keep_mutations_block_size_ancestral_state(self): - tables = tskit.TableCollection() + tables = tskit.TableCollection(1) big_alloc = 64 * 1024 + tables.nodes.add_row(0) tables.sites.add_row(0, "a" * big_alloc) self.verify_block_size(tables) def test_keep_mutations_block_size_derived_state(self): - tables = tskit.TableCollection() + tables = tskit.TableCollection(1) + tables.nodes.add_row(0) tables.sites.add_row(0, "a") big_alloc = 64 * 1024 tables.mutations.add_row(0, node=0, derived_state="b" * big_alloc) self.verify_block_size(tables) def test_keep_mutations_block_size_metadata(self): - tables = tskit.TableCollection() + tables = tskit.TableCollection(1) big_alloc = 64 * 1024 + tables.nodes.add_row(0) tables.sites.add_row(0, "a", metadata=b"x" * big_alloc) self.verify_block_size(tables) tables.mutations.add_row(0, node=0, derived_state="b" * 2 * big_alloc) @@ -2113,13 +2199,15 @@ class TestLikelihood(unittest.TestCase): Tests for the low-level likelihood calculation interface. """ def get_arg(self): + L = 20 rng = _msprime.RandomGenerator(1) - tables = _msprime.LightweightTableCollection() + tables = _msprime.LightweightTableCollection(L) sim = _msprime.Simulator( - get_samples(5), uniform_recombination_map(L=20, rate=2), + get_samples(5), uniform_recombination_map(L=L, rate=2), rng, tables, store_full_arg=True) sim.run() - mutgen = _msprime.MutationGenerator(rng, 2) + rate_map = _msprime.IntervalMap(position=[0, L], value=[2, 0]) + mutgen = _msprime.MutationGenerator(rng, rate_map) mutgen.generate(tables) t = tskit.TableCollection.fromdict(tables.asdict()) self.assertGreater(len(t.edges), 10) diff --git a/tests/test_mutations.py b/tests/test_mutations.py index 7e3059763..ae2475656 100644 --- a/tests/test_mutations.py +++ b/tests/test_mutations.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2018 University of Oxford +# Copyright (C) 2018-2020 University of Oxford # # This file is part of msprime. # @@ -25,6 +25,7 @@ import numpy as np import msprime +import _msprime from tests import tsutil import tests.wright_fisher as wf @@ -140,8 +141,10 @@ def test_bad_rates(self): ts = msprime.simulate(2, random_seed=2) for bad_type in [{}, [], ts]: self.assertRaises(TypeError, msprime.mutate, ts, rate=bad_type) - for bad_rate in ["abc", -1, -1e-6, -1e7]: + for bad_rate in ["abc", "xxx"]: self.assertRaises(ValueError, msprime.mutate, ts, bad_rate) + for bad_rate in [-1, -1e-6, -1e7]: + self.assertRaises(_msprime.LibraryError, msprime.mutate, ts, bad_rate) def test_bad_models(self): ts = msprime.simulate(2, random_seed=2) @@ -467,3 +470,59 @@ def test_keep_mutation_parent_zero_rate(self): t1.provenances.clear() t2.provenances.clear() self.assertEqual(t1, t2) + + +def mutate_map(tables, mutation_map, seed): + """ + Parallel implementation of the C code which should produce precisely the same + results. + """ + # Insert a sentinel into the map for convenience. + map_position = np.hstack([mutation_map.position, [tables.sequence_length]]) + rng = msprime.RandomGenerator(seed) + time = tables.nodes.time + for edge in tables.edges: + branch_length = time[edge.parent] - time[edge.child] + index = np.searchsorted(map_position, edge.left) + if map_position[index] > edge.left: + index -= 1 + left = edge.left + right = 0 + while right != edge.right: + right = min(edge.right, map_position[index + 1]) + assert left < right + assert mutation_map.position[index] <= left + assert right <= map_position[index + 1] + assert right <= edge.right + # Generate the mutations. + mu = mutation_map.rate[index] * (right - left) * branch_length + for _ in range(rng.poisson(mu)): + position = rng.flat(left, right) + site_id = tables.sites.add_row(position, ancestral_state="0") + tables.mutations.add_row(site_id, node=edge.child, derived_state="1") + # Sample from this to duplicate the behaviour in the C implementation. + rng.uniform_int(1) + index += 1 + left = right + tables.sort() + return tables.tree_sequence() + + +class TestMap(unittest.TestCase): + """ + Tests for the mutation map. + """ + + def test_single_rate(self): + random_seed = 2 + rate = 0.5 + for random_seed in range(1, 5): + for rate in [0, 0.1, 0.5, 1.0]: + ts = msprime.simulate(10, recombination_rate=2, random_seed=random_seed) + mutmap = msprime.MutationMap([0], [rate]) + tables1 = ts.dump_tables() + mutate_map(tables1, mutmap, random_seed) + ts = msprime.mutate(ts, rate=rate, random_seed=random_seed) + tables2 = ts.dump_tables() + self.assertEqual(tables1.sites, tables2.sites) + self.assertEqual(tables1.mutations, tables2.mutations) diff --git a/tests/test_provenance.py b/tests/test_provenance.py index 2a91a8a75..1687f7c20 100644 --- a/tests/test_provenance.py +++ b/tests/test_provenance.py @@ -315,7 +315,6 @@ def test_from_ts(self): start_time = from_ts.tables.nodes.time.max() ts = msprime.simulate( from_ts=from_ts, start_time=start_time, random_seed=2) - print(list(ts.provenances())) self.verify(ts)