Skip to content

Commit

Permalink
Added the IntervalMap & update high-level code.
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed Mar 23, 2020
1 parent bf90d56 commit 8373905
Show file tree
Hide file tree
Showing 13 changed files with 404 additions and 332 deletions.
303 changes: 203 additions & 100 deletions _msprimemodule.c

Large diffs are not rendered by default.

14 changes: 9 additions & 5 deletions lib/dev-tools/dev-cli.c
Original file line number Diff line number Diff line change
Expand Up @@ -761,11 +761,12 @@ run_simulate(const char *conf_file, const char *output_file, int verbose, int nu
mutation_params_t mutation_params;
msp_t msp;
recomb_map_t recomb_map;
interval_map_t mut_map;
mutgen_t mutgen;
tsk_table_collection_t tables;
tsk_treeseq_t tree_seq;
gsl_rng *rng = gsl_rng_alloc(gsl_rng_default);

gsl_rng *rng = gsl_rng_alloc(gsl_rng_default);
if (rng == NULL) {
fatal_error("No memory");
}
Expand All @@ -774,13 +775,15 @@ run_simulate(const char *conf_file, const char *output_file, int verbose, int nu
fatal_tskit_error(ret, __LINE__);
}
get_configuration(rng, &msp, &tables, &mutation_params, &recomb_map, conf_file);
ret = mutgen_alloc(&mutgen, rng, mutation_params.alphabet, 0);
ret = interval_map_alloc_single(&mut_map,
recomb_map_get_sequence_length(&recomb_map), mutation_params.mutation_rate);
if (ret != 0) {
fatal_msprime_error(ret, __LINE__);
}
ret = mutgen_alloc(&mutgen, rng, &mut_map, mutation_params.alphabet, 0);
if (ret != 0) {
fatal_msprime_error(ret, __LINE__);
}
assert(false); /* FIXME */
/* ret = mutgen_set_rate(&mutgen, mutation_params.mutation_rate, */
/* recomb_map.sequence_length); */
if (ret != 0) {
fatal_msprime_error(ret, __LINE__);
}
Expand Down Expand Up @@ -843,6 +846,7 @@ run_simulate(const char *conf_file, const char *output_file, int verbose, int nu

msp_free(&msp);
recomb_map_free(&recomb_map);
interval_map_free(&mut_map);
mutgen_free(&mutgen);
gsl_rng_free(rng);
tsk_table_collection_free(&tables);
Expand Down
33 changes: 25 additions & 8 deletions lib/interval_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@
#include "util.h"
#include "msprime.h"

void
interval_map_print_state(interval_map_t *self, FILE *out)
{
size_t j;

fprintf(out, "interval_map (%p):: size = %d\n", (void *) self, (int) self->size);
fprintf(out, "\tsequence_length = %f\n", interval_map_get_sequence_length(self));
fprintf(out, "\tindex\tposition\tvalue\n");
for (j = 0; j < self->size; j++) {
fprintf(out, "\t%d\t%f\t%f\n", (int) j, self->position[j], self->value[j]);
}
}

int MSP_WARN_UNUSED
interval_map_alloc(interval_map_t *self, size_t size, double *position, double *value)
Expand Down Expand Up @@ -65,6 +77,13 @@ interval_map_alloc(interval_map_t *self, size_t size, double *position, double *
return ret;
}

int MSP_WARN_UNUSED
interval_map_alloc_single(interval_map_t *self, double sequence_length, double value)
{
double position[2] = {0, sequence_length};
return interval_map_alloc(self, 2, position, &value);
}

int
interval_map_free(interval_map_t *self)
{
Expand All @@ -91,15 +110,13 @@ interval_map_get_num_intervals(interval_map_t *self)
return self->size - 1;
}

void
interval_map_print_state(interval_map_t *self, FILE *out)
size_t
interval_map_get_index(interval_map_t *self, double x)
{
size_t j;
size_t index = tsk_search_sorted(self->position, self->size, x);

fprintf(out, "interval_map (%p):: size = %d\n", (void *) self, (int) self->size);
fprintf(out, "\tsequence_length = %f\n", interval_map_get_sequence_length(self));
fprintf(out, "\tindex\tposition\tvalue\n");
for (j = 0; j < self->size; j++) {
fprintf(out, "\t%d\t%f\t%f\n", (int) j, self->position[j], self->value[j]);
if (self->position[index] > x) {
index--;
}
return index;
}
25 changes: 6 additions & 19 deletions lib/msprime.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ typedef struct segment_t_t {
struct segment_t_t *next;
} segment_t;


typedef struct {
double left; /* TODO CHANGE THIS - not a good name! */
uint32_t value;
Expand Down Expand Up @@ -210,14 +209,9 @@ typedef struct {
} interval_map_t;

/* Recombination map */

typedef struct {
interval_map_t map;
double total_recombination_rate;
/* double sequence_length; /1* size of the physical coordinate space *1/ */
/* size_t size; /1* the total number of values in the map *1/ */
/* double *positions; */
/* double *rates; */
double *cumulative;
bool discrete;
} recomb_map_t;
Expand Down Expand Up @@ -344,20 +338,14 @@ typedef struct {
typedef struct {
int alphabet;
gsl_rng *rng;
interval_map_t *rate_map;
double start_time;
double end_time;
double mutation_rate;
size_t block_size;
avl_tree_t sites;
tsk_blkalloc_t allocator;
struct {
size_t size;
double *position;
double *rate;
} map;
} mutgen_t;


int msp_alloc(msp_t *self,
size_t num_samples, sample_t *samples,
recomb_map_t *recomb_map, tsk_table_collection_t *from_ts_tables, gsl_rng *rng);
Expand Down Expand Up @@ -460,23 +448,21 @@ size_t msp_get_num_gene_conversion_events(msp_t *self);

int interval_map_alloc(interval_map_t *self, size_t size,
double *position, double *value);
int interval_map_alloc_single(interval_map_t *self,
double sequence_length, double value);
int interval_map_free(interval_map_t *self);
void interval_map_print_state(interval_map_t *self, FILE *out);
double interval_map_get_sequence_length(interval_map_t *self);
size_t interval_map_get_size(interval_map_t *self);
size_t interval_map_get_num_intervals(interval_map_t *self);
size_t interval_map_get_index(interval_map_t *self, double x);

typedef double (*msp_convert_func) (void *obj, double rate);

int recomb_map_alloc_uniform(recomb_map_t *self,
double sequence_length, double rate, bool discrete);

int recomb_map_alloc(recomb_map_t *self,
size_t size, double *position, double *rate, bool discrete);

/* double sequence_length, double *positions, double *rates, */
/* size_t size, bool discrete); */

int recomb_map_copy(recomb_map_t *to, recomb_map_t *from);
int recomb_map_free(recomb_map_t *self);
double recomb_map_get_sequence_length(recomb_map_t *self);
Expand All @@ -496,7 +482,8 @@ double recomb_map_position_to_mass(recomb_map_t *self, double position);
double recomb_map_shift_by_mass(recomb_map_t *self, double pos, double mass);
double recomb_map_sample_poisson(recomb_map_t *self, gsl_rng *rng, double start);

int mutgen_alloc(mutgen_t *self, gsl_rng *rng, int alphabet, size_t mutation_block_size);
int mutgen_alloc(mutgen_t *self, gsl_rng *rng, interval_map_t *rate_map,
int alphabet, size_t mutation_block_size);
int mutgen_set_rate(mutgen_t *self, double rate, double sequence_length);
int mutgen_set_map(mutgen_t *self, size_t size, double *position, double *rate);
int mutgen_set_time_interval(mutgen_t *self, double start_time, double end_time);
Expand Down
77 changes: 14 additions & 63 deletions lib/mutgen.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,8 @@ mutgen_print_state(mutgen_t *self, FILE *out)
tsk_size_t j;

fprintf(out, "Mutgen state\n");
fprintf(out, "\tmutation_map: size = %d\n", (int) self->map.size);
for (j = 0; j < self->map.size; j++) {
fprintf(out, "\t\t%f\t%f\n", self->map.position[j], self->map.rate[j]);
}
fprintf(out, "\trate_map:\n");
interval_map_print_state(self->rate_map, out);
fprintf(out, "\tstart_time = %f\n", self->start_time);
fprintf(out, "\tend_time = %f\n", self->end_time);
tsk_blkalloc_print_state(&self->allocator, out);
Expand All @@ -88,9 +86,11 @@ mutgen_print_state(mutgen_t *self, FILE *out)
}

int MSP_WARN_UNUSED
mutgen_alloc(mutgen_t *self, gsl_rng *rng, int alphabet, size_t block_size)
mutgen_alloc(mutgen_t *self, gsl_rng *rng, interval_map_t *rate_map,
int alphabet, size_t block_size)
{
int ret = 0;
size_t j;

assert(rng != NULL);
memset(self, 0, sizeof(mutgen_t));
Expand All @@ -100,6 +100,7 @@ mutgen_alloc(mutgen_t *self, gsl_rng *rng, int alphabet, size_t block_size)
}
self->alphabet = alphabet;
self->rng = rng;
self->rate_map = rate_map;
self->start_time = -DBL_MAX;
self->end_time = DBL_MAX;
self->block_size = block_size;
Expand All @@ -115,58 +116,11 @@ mutgen_alloc(mutgen_t *self, gsl_rng *rng, int alphabet, size_t block_size)
ret = msp_set_tsk_error(ret);
goto out;
}
out:
return ret;
}

/* FIXME remove these two methods and change to use an interval_map
* as an argument to mutgen_alloc. */

/* Sets a single rate over the whole region. */
int MSP_WARN_UNUSED
mutgen_set_rate(mutgen_t *self, double rate, double sequence_length)
{
double positions[] = {0, sequence_length};
double rates[] = {rate, 0};

return mutgen_set_map(self, 2, positions, rates);
}

int MSP_WARN_UNUSED
mutgen_set_map(mutgen_t *self, size_t size, double *position, double *rate)
{
int ret = 0;
size_t j;

msp_safe_free(self->map.position);
msp_safe_free(self->map.rate);
/* Leave space for the sentinel */
self->map.position = malloc((size + 1) * sizeof(*self->map.position));
self->map.rate = malloc(size * sizeof(*self->map.rate));
if (self->map.position == NULL || self->map.rate == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}
if (size < 1) {
ret = MSP_ERR_BAD_MUTATION_MAP_SIZE;
goto out;
}
if (position[0] != 0) {
ret = MSP_ERR_BAD_MUTATION_MAP_POSITION;
goto out;
}
self->map.size = size;
for (j = 0; j < size; j++) {
if (j > 0 && position[j - 1] >= position[j]) {
ret = MSP_ERR_BAD_MUTATION_MAP_POSITION;
goto out;
}
if (rate[j] < 0) {
for (j = 0; j < rate_map->size - 1; j++) {
if (rate_map->value[j] < 0) {
ret = MSP_ERR_BAD_MUTATION_MAP_RATE;
goto out;
}
self->map.position[j] = position[j];
self->map.rate[j] = rate[j];
}
out:
return ret;
Expand All @@ -176,8 +130,6 @@ int
mutgen_free(mutgen_t *self)
{
tsk_blkalloc_free(&self->allocator);
msp_safe_free(self->map.position);
msp_safe_free(self->map.rate);
return 0;
}

Expand Down Expand Up @@ -421,6 +373,8 @@ mutgen_generate(mutgen_t *self, tsk_table_collection_t *tables, int flags)
int ret = 0;
tsk_node_table_t *nodes = &tables->nodes;
tsk_edge_table_t *edges = &tables->edges;
const double *map_position = self->rate_map->position;
const double *map_rate = self->rate_map->value;
size_t j, l, branch_mutations, map_index;
double left, right, edge_right, branch_length, mu, position;
node_id_t parent, child;
Expand All @@ -444,7 +398,7 @@ mutgen_generate(mutgen_t *self, tsk_table_collection_t *tables, int flags)
ret = msp_set_tsk_error(ret);
goto out;
}
if (self->map.position[self->map.size - 1] != tables->sequence_length) {
if (interval_map_get_sequence_length(self->rate_map) != tables->sequence_length) {
ret = MSP_ERR_INCOMPATIBLE_MUTATION_MAP;
goto out;
}
Expand Down Expand Up @@ -475,14 +429,11 @@ mutgen_generate(mutgen_t *self, tsk_table_collection_t *tables, int flags)
branch_end = GSL_MIN(end_time, nodes->time[parent]);
branch_length = branch_end - branch_start;

map_index = tsk_search_sorted(self->map.position, self->map.size, left);
if (self->map.position[map_index] > left) {
map_index--;
}
map_index = interval_map_get_index(self->rate_map, left);
right = 0;
while (right != edge_right) {
right = GSL_MIN(edge_right, self->map.position[map_index + 1]);
mu = branch_length * (right - left) * self->map.rate[map_index];
right = GSL_MIN(edge_right, map_position[map_index + 1]);
mu = branch_length * (right - left) * map_rate[map_index];
branch_mutations = gsl_ran_poisson(self->rng, mu);
for (l = 0; l < branch_mutations; l++) {
/* Rejection sample positions until we get one we haven't seen before. */
Expand Down

0 comments on commit 8373905

Please sign in to comment.