Skip to content

Commit

Permalink
new additional_nodes API
Browse files Browse the repository at this point in the history
  • Loading branch information
GertjanBisschop committed Apr 19, 2023
1 parent 3d31227 commit 80cce06
Show file tree
Hide file tree
Showing 15 changed files with 760 additions and 115 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Expand Up @@ -14,6 +14,10 @@
sequence that has no recombination nodes (({issue}`2123`, {pr}`2124`,
{user}`hyanwong`)

- Add ability to record specific node types with the `additional_nodes`
flag as well as record edges for coalescing nodes along non-coalescing segments. ({issue}`2128`, {issue}`2132`, {pr}`2162`,
{user}`GertjanBisschop`)

## [1.2.0] - 2022-05-18

**New features**
Expand Down
60 changes: 36 additions & 24 deletions algorithms.py
Expand Up @@ -543,8 +543,8 @@ def __init__(
max_segments=100,
num_labels=1,
sweep_trajectory=None,
full_arg=False,
store_unary=False,
coalescing_segments_only=True,
additional_nodes=None,
time_slice=None,
gene_conversion_rate=0.0,
gene_conversion_length=1,
Expand All @@ -571,8 +571,10 @@ def __init__(
self.num_labels = num_labels
self.num_populations = N
self.max_segments = max_segments
self.full_arg = full_arg
self.store_unary = store_unary
self.coalescing_segments_only = coalescing_segments_only
self.additional_nodes = msprime.NodeType(additional_nodes)
if self.additional_nodes.value > 0:
assert not self.coalescing_segments_only
self.pedigree = None
self.segment_stack = []
self.segments = [None for j in range(self.max_segments + 1)]
Expand Down Expand Up @@ -1276,7 +1278,7 @@ def migration_event(self, j, k):
index = random.randint(0, source.get_num_ancestors(label) - 1)
x = source.remove(index, label)
dest.add(x, label)
if self.full_arg:
if self.additional_nodes.value & msprime.NODE_IS_MIG_EVENT > 0:
self.store_node(k, flags=msprime.NODE_IS_MIG_EVENT)
self.store_arg_edges(x)
# Set the population id for each segment also.
Expand Down Expand Up @@ -1383,7 +1385,7 @@ def hudson_recombination_event(self, label, return_heads=False):
lhs_tail = x
self.set_segment_mass(alpha)
self.P[alpha.population].add(alpha, label)
if self.full_arg:
if self.additional_nodes.value & msprime.NODE_IS_RE_EVENT > 0:
self.store_node(lhs_tail.population, flags=msprime.NODE_IS_RE_EVENT)
self.store_arg_edges(lhs_tail)
self.store_node(alpha.population, flags=msprime.NODE_IS_RE_EVENT)
Expand Down Expand Up @@ -1791,7 +1793,9 @@ def merge_ancestors(self, H, pop_id, label, new_node_id=-1):
pop.add(alpha, label)
merged_head = alpha
else:
if self.full_arg:
if (coalescence and not self.coalescing_segments_only) or (
self.additional_nodes.value & msprime.NODE_IS_CA_EVENT > 0
):
defrag_required |= z.right == alpha.left
else:
defrag_required |= (
Expand All @@ -1801,10 +1805,14 @@ def merge_ancestors(self, H, pop_id, label, new_node_id=-1):
alpha.prev = z
self.set_segment_mass(alpha)
z = alpha
if self.full_arg:
if not coalescence:
if coalescence:
if not self.coalescing_segments_only:
self.store_arg_edges(z)
else:
if self.additional_nodes.value & msprime.NODE_IS_CA_EVENT > 0:
self.store_node(pop_id, flags=msprime.NODE_IS_CA_EVENT)
self.store_arg_edges(z)
self.store_arg_edges(z)

if defrag_required:
self.defrag_segment_chain(z)
if coalescence:
Expand Down Expand Up @@ -1927,7 +1935,9 @@ def merge_two_ancestors(self, population_index, label, x, y):
if z is None:
pop.add(alpha, label)
else:
if self.full_arg:
if (coalescence and not self.coalescing_segments_only) or (
self.additional_nodes.value & msprime.NODE_IS_CA_EVENT > 0
):
defrag_required |= z.right == alpha.left
else:
defrag_required |= (
Expand All @@ -1938,12 +1948,14 @@ def merge_two_ancestors(self, population_index, label, x, y):
self.set_segment_mass(alpha)
z = alpha

if self.full_arg:
if not coalescence:
if coalescence:
if not self.coalescing_segments_only:
self.store_arg_edges(z, u)
else:
if self.additional_nodes.value & msprime.NODE_IS_CA_EVENT > 0:
u = self.store_node(population_index, flags=msprime.NODE_IS_CA_EVENT)
self.store_arg_edges(z, u)
elif self.store_unary and coalescence:
self.store_arg_edges(z, u)
self.store_arg_edges(z, u)

if defrag_required:
self.defrag_segment_chain(z)
if coalescence:
Expand Down Expand Up @@ -2200,8 +2212,8 @@ def run_simulate(args):
census_times=args.census_time,
max_segments=100000,
num_labels=num_labels,
full_arg=args.full_arg,
store_unary=args.store_unary,
coalescing_segments_only=not args.all_segments,
additional_nodes=args.additional_nodes,
sweep_trajectory=sweep_trajectory,
time_slice=args.time_slice,
gene_conversion_rate=gc_rate,
Expand Down Expand Up @@ -2275,16 +2287,16 @@ def add_simulator_arguments(parser):
help="Parameters for the allele frequency trajectory simulation",
)
parser.add_argument(
"--full-arg",
"--all-segments",
action="store_true",
default=False,
help="Store the full ARG with all recombination and common ancestor nodes",
help="Only record edges along coalescing segments.",
)
parser.add_argument(
"--store-unary",
action="store_true",
default=False,
help="Store unary nodes.",
"--additional_nodes",
type=int,
default=0,
help="Record edges along all segments for coalescing nodes.",
)
parser.add_argument(
"--time-slice",
Expand Down
20 changes: 20 additions & 0 deletions docs/api.md
Expand Up @@ -87,6 +87,26 @@ for discussion and examples of individual features.
:members:
```

```{eval-rst}
.. autoclass:: msprime.NodeType
:members:
.. autoattribute:: RECOMBINANT
:annotation: = NodeType(1<<17)
.. autoattribute:: COMMON_ANCESTOR
:annotation: = NodeType(1<<18)
.. autoattribute:: MIGRANT
:annotation: = NodeType(1<<19)
.. autoattribute:: GENE_CONVERSION
:annotation: = NodeType(1<<21)
.. autoattribute:: PASS_THROUGH
:annotation: = NodeType(1<<22)
```

#### Models

```{eval-rst}
Expand Down
80 changes: 50 additions & 30 deletions lib/msprime.c
Expand Up @@ -256,14 +256,25 @@ msp_set_store_migrations(msp_t *self, bool store_migrations)
int
msp_set_store_full_arg(msp_t *self, bool store_full_arg)
{
self->store_full_arg = store_full_arg;
if (store_full_arg) {
self->additional_nodes = MSP_NODE_IS_RE_EVENT | MSP_NODE_IS_CA_EVENT
| MSP_NODE_IS_MIG_EVENT | MSP_NODE_IS_GC_EVENT;
self->coalescing_segments_only = false;
}
return 0;
}

int
msp_set_store_unary(msp_t *self, bool store_unary)
msp_set_additional_nodes(msp_t *self, uint32_t additional_nodes)
{
self->store_unary = store_unary;
self->additional_nodes = additional_nodes;
return 0;
}

int
msp_set_coalescing_segments_only(msp_t *self, bool coalescing_segments_only)
{
self->coalescing_segments_only = coalescing_segments_only;
return 0;
}

Expand Down Expand Up @@ -770,8 +781,8 @@ msp_alloc(msp_t *self, tsk_table_collection_t *tables, gsl_rng *rng)
self->start_time = -DBL_MAX;
/* Set the memory defaults */
self->store_migrations = false;
self->store_full_arg = false;
self->store_unary = false;
self->additional_nodes = 0;
self->coalescing_segments_only = true;
self->avl_node_block_size = 1024;
self->node_mapping_block_size = 1024;
self->segment_block_size = 1024;
Expand Down Expand Up @@ -1889,7 +1900,7 @@ msp_move_individual(msp_t *self, avl_node_t *node, avl_tree_t *source,
avl_unlink_node(source, node);
msp_free_avl_node(self, node);

if (self->store_full_arg) {
if (self->additional_nodes & MSP_NODE_IS_MIG_EVENT) {
ret = msp_store_node(
self, MSP_NODE_IS_MIG_EVENT, self->time, dest_pop, TSK_NULL);
if (ret < 0) {
Expand Down Expand Up @@ -2217,7 +2228,7 @@ msp_pedigree_initialise(msp_t *self)
label_id_t label = 0;
tsk_size_t j;

if (self->next_demographic_event != NULL || self->store_full_arg) {
if (self->next_demographic_event != NULL || self->additional_nodes > 0) {
ret = MSP_ERR_UNSUPPORTED_OPERATION;
goto out;
}
Expand Down Expand Up @@ -2520,7 +2531,7 @@ msp_recombination_event(msp_t *self, label_id_t label, segment_t **lhs, segment_
if (ret != 0) {
goto out;
}
if (self->store_full_arg) {
if (self->additional_nodes & MSP_NODE_IS_RE_EVENT) {
ret = msp_store_arg_recombination(self, lhs_tail, alpha);
if (ret != 0) {
goto out;
Expand Down Expand Up @@ -2731,7 +2742,7 @@ msp_gene_conversion_event(msp_t *self, label_id_t label)
} else {
self->num_noneffective_gc_events++;
}
if (self->store_full_arg) {
if (self->additional_nodes & MSP_NODE_IS_GC_EVENT) {
ret = msp_store_arg_gene_conversion(self, tail, alpha, head);
if (ret != 0) {
goto out;
Expand Down Expand Up @@ -2922,7 +2933,8 @@ msp_merge_two_ancestors(msp_t *self, population_id_t population_id, label_id_t l
}
merged_head = alpha;
} else {
if (self->store_full_arg) {
if ((self->additional_nodes & MSP_NODE_IS_CA_EVENT)
|| (!self->coalescing_segments_only && coalescence)) {
// we pre-empt the fact that values will be set equal later
defrag_required |= z->right == alpha->left;
} else {
Expand All @@ -2937,27 +2949,27 @@ msp_merge_two_ancestors(msp_t *self, population_id_t population_id, label_id_t l
z = alpha;
}
}
if (self->store_full_arg) {
if (!coalescence) {
ret = msp_store_node(
self, MSP_NODE_IS_CA_EVENT, self->time, population_id, TSK_NULL);
if (coalescence) {
if (!self->coalescing_segments_only) {
ret = msp_store_arg_edges(self, z, new_node_id);
if (ret < 0) {
goto out;
}
}
// is specified new_node_id impossible when recording full_arg?
ret = msp_store_arg_edges(self, z, TSK_NULL);
if (ret != 0) {
goto out;
}
} else {
if (self->store_unary && coalescence) {
ret = msp_store_arg_edges(self, z, new_node_id);
if (self->additional_nodes & MSP_NODE_IS_CA_EVENT) {
ret = msp_store_node(
self, MSP_NODE_IS_CA_EVENT, self->time, population_id, TSK_NULL);
if (ret < 0) {
goto out;
}
ret = msp_store_arg_edges(self, z, TSK_NULL);
if (ret != 0) {
goto out;
}
}
}

if (defrag_required) {
ret = msp_defrag_segment_chain(self, z);
if (ret != 0) {
Expand Down Expand Up @@ -3165,7 +3177,8 @@ msp_merge_ancestors(msp_t *self, avl_tree_t *Q, population_id_t population_id,
goto out;
}
} else {
if (self->store_full_arg) {
if ((self->additional_nodes & MSP_NODE_IS_CA_EVENT)
|| (!self->coalescing_segments_only && coalescence)) {
// we pre-empt the fact that values will be set equal later
defrag_required |= z->right == alpha->left;
} else {
Expand All @@ -3179,17 +3192,24 @@ msp_merge_ancestors(msp_t *self, avl_tree_t *Q, population_id_t population_id,
z = alpha;
}
}
if (self->store_full_arg) {
if (!coalescence) {
if (coalescence) {
if (!self->coalescing_segments_only) {
ret = msp_store_arg_edges(self, z, new_node_id);
if (ret < 0) {
goto out;
}
}
} else {
if (self->additional_nodes & MSP_NODE_IS_CA_EVENT) {
ret = msp_store_node(
self, MSP_NODE_IS_CA_EVENT, self->time, population_id, individual);
if (ret < 0) {
goto out;
}
}
ret = msp_store_arg_edges(self, z, TSK_NULL);
if (ret != 0) {
goto out;
ret = msp_store_arg_edges(self, z, TSK_NULL);
if (ret != 0) {
goto out;
}
}
}
if (defrag_required) {
Expand Down Expand Up @@ -4118,7 +4138,7 @@ msp_gene_conversion_left_event(msp_t *self, label_id_t label)
msp_set_segment_mass(self, alpha);
tsk_bug_assert(alpha->prev == NULL);
ret = msp_insert_individual(self, alpha);
if (self->store_full_arg) {
if (self->additional_nodes & MSP_NODE_IS_GC_EVENT) {
ret = msp_store_arg_gene_conversion(self, NULL, y, alpha);
if (ret != 0) {
goto out;
Expand Down Expand Up @@ -5112,7 +5132,7 @@ msp_run(msp_t *self, double max_time, unsigned long max_events)
ret = MSP_ERR_BAD_STATE;
goto out;
}
if (self->store_full_arg
if (self->additional_nodes > 0
&& !(self->model.type == MSP_MODEL_HUDSON || self->model.type == MSP_MODEL_SMC
|| self->model.type == MSP_MODEL_SMC_PRIME
|| self->model.type == MSP_MODEL_BETA
Expand Down
7 changes: 5 additions & 2 deletions lib/msprime.h
Expand Up @@ -57,6 +57,7 @@
#define MSP_NODE_IS_MIG_EVENT (1u << 19)
#define MSP_NODE_IS_CEN_EVENT (1u << 20)
#define MSP_NODE_IS_GC_EVENT (1u << 21)
#define MSP_NODE_IS_PASS_THROUGH (1u << 22)

/* Flags for verify */
#define MSP_VERIFY_BREAKPOINTS (1 << 1)
Expand Down Expand Up @@ -195,7 +196,8 @@ typedef struct _msp_t {
simulation_model_t model;
bool store_migrations;
bool store_full_arg;
bool store_unary;
uint32_t additional_nodes;
bool coalescing_segments_only;
double sequence_length;
bool discrete_genome;
rate_map_t recomb_map;
Expand Down Expand Up @@ -430,7 +432,8 @@ int msp_set_simulation_model_sweep_genic_selection(msp_t *self, double position,
int msp_set_start_time(msp_t *self, double start_time);
int msp_set_store_migrations(msp_t *self, bool store_migrations);
int msp_set_store_full_arg(msp_t *self, bool store_full_arg);
int msp_set_store_unary(msp_t *self, bool store_unary);
int msp_set_additional_nodes(msp_t *self, uint32_t additional_nodes);
int msp_set_coalescing_segments_only(msp_t *self, bool coalescing_segments_only);
int msp_set_ploidy(msp_t *self, int ploidy);
int msp_set_recombination_map(msp_t *self, size_t size, double *position, double *rate);
int msp_set_recombination_rate(msp_t *self, double rate);
Expand Down

0 comments on commit 80cce06

Please sign in to comment.