Skip to content

Commit

Permalink
Remove tree_pos iterator from KC distance and haplotype matching
Browse files Browse the repository at this point in the history
  • Loading branch information
duncanMR committed Feb 21, 2024
1 parent caaa00b commit d88aff8
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 83 deletions.
21 changes: 6 additions & 15 deletions c/tskit/haplotype_matching.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,6 @@ int
tsk_ls_hmm_free(tsk_ls_hmm_t *self)
{
tsk_tree_free(&self->tree);
tsk_tree_position_free(&self->tree_pos);
tsk_safe_free(self->recombination_rate);
tsk_safe_free(self->mutation_rate);
tsk_safe_free(self->recombination_rate);
Expand Down Expand Up @@ -247,11 +246,6 @@ tsk_ls_hmm_reset(tsk_ls_hmm_t *self, double value)
tsk_memset(self->transition_parent, 0xff,
self->max_transitions * sizeof(*self->transition_parent));

tsk_tree_position_free(&self->tree_pos);
ret = tsk_tree_position_init(&self->tree_pos, self->tree_sequence, 0);
if (ret != 0) {
goto out;
}
samples = tsk_treeseq_get_samples(self->tree_sequence);
for (j = 0; j < self->num_samples; j++) {
u = samples[j];
Expand All @@ -260,7 +254,6 @@ tsk_ls_hmm_reset(tsk_ls_hmm_t *self, double value)
self->transition_index[u] = (tsk_id_t) j;
}
self->num_transitions = self->num_samples;
out:
return ret;
}

Expand Down Expand Up @@ -309,13 +302,11 @@ tsk_ls_hmm_update_tree(tsk_ls_hmm_t *self, int direction)
tsk_value_transition_t *restrict T = self->transitions;
tsk_id_t u, c, p, j, e;
tsk_value_transition_t *vt;
tsk_tree_position_t tree_pos;

tsk_tree_position_step(&self->tree_pos, direction);
tsk_bug_assert(self->tree_pos.index != -1);
tsk_bug_assert(self->tree_pos.index == self->tree.index);

for (j = self->tree_pos.out.start; j != self->tree_pos.out.stop; j += direction) {
e = self->tree_pos.out.order[j];
tree_pos = self->tree.tree_pos;
for (j = tree_pos.out.start; j != tree_pos.out.stop; j += direction) {
e = tree_pos.out.order[j];
c = edges_child[e];
u = c;
if (T_index[u] == TSK_NULL) {
Expand All @@ -333,8 +324,8 @@ tsk_ls_hmm_update_tree(tsk_ls_hmm_t *self, int direction)
parent[c] = TSK_NULL;
}

for (j = self->tree_pos.in.start; j != self->tree_pos.in.stop; j += direction) {
e = self->tree_pos.in.order[j];
for (j = tree_pos.in.start; j != tree_pos.in.stop; j += direction) {
e = tree_pos.in.order[j];
c = edges_child[e];
p = edges_parent[e];
parent[c] = p;
Expand Down
6 changes: 1 addition & 5 deletions c/tskit/haplotype_matching.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* MIT License
*
* Copyright (c) 2019-2023 Tskit Developers
* Copyright (c) 2019-2024 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -98,10 +98,6 @@ typedef struct _tsk_ls_hmm_t {
tsk_size_t num_nodes;
/* state */
tsk_tree_t tree;
/* NOTE: this tree_position will be redundant once we integrate the top-level
* tree class with this.
*/
tsk_tree_position_t tree_pos;
tsk_id_t *parent;
/* The probability value transitions on the tree */
tsk_value_transition_t *transitions;
Expand Down
91 changes: 30 additions & 61 deletions c/tskit/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -4566,19 +4566,6 @@ tsk_tree_position_prev(tsk_tree_position_t *self)
return self->index != -1;
}

bool
tsk_tree_position_step(tsk_tree_position_t *self, int direction)
{
bool ret = false;

if (direction == TSK_DIR_FORWARD) {
ret = tsk_tree_position_next(self);
} else if (direction == TSK_DIR_REVERSE) {
ret = tsk_tree_position_prev(self);
}
return ret;
}

int TSK_WARN_UNUSED
tsk_tree_position_seek_forward(tsk_tree_position_t *self, tsk_id_t index)
{
Expand Down Expand Up @@ -5686,18 +5673,20 @@ tsk_tree_next(tsk_tree_t *self)
const tsk_id_t *restrict edge_parent = tables->edges.parent;
const tsk_id_t *restrict edge_child = tables->edges.child;
tsk_id_t j, e;
tsk_tree_position_t tree_pos;
bool valid;

valid = tsk_tree_position_next(&self->tree_pos);
tree_pos = self->tree_pos;

if (valid) {
for (j = self->tree_pos.out.start; j != self->tree_pos.out.stop; j++) {
e = self->tree_pos.out.order[j];
for (j = tree_pos.out.start; j != tree_pos.out.stop; j++) {
e = tree_pos.out.order[j];
tsk_tree_remove_edge(self, edge_parent[e], edge_child[e]);
}

for (j = self->tree_pos.in.start; j != self->tree_pos.in.stop; j++) {
e = self->tree_pos.in.order[j];
for (j = tree_pos.in.start; j != tree_pos.in.stop; j++) {
e = tree_pos.in.order[j];
tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);
}
ret = TSK_TREE_OK;
Expand All @@ -5716,18 +5705,20 @@ tsk_tree_prev(tsk_tree_t *self)
const tsk_id_t *restrict edge_parent = tables->edges.parent;
const tsk_id_t *restrict edge_child = tables->edges.child;
tsk_id_t j, e;
tsk_tree_position_t tree_pos;
bool valid;

valid = tsk_tree_position_prev(&self->tree_pos);
tree_pos = self->tree_pos;

if (valid) {
for (j = self->tree_pos.out.start; j != self->tree_pos.out.stop; j--) {
e = self->tree_pos.out.order[j];
for (j = tree_pos.out.start; j != tree_pos.out.stop; j--) {
e = tree_pos.out.order[j];
tsk_tree_remove_edge(self, edge_parent[e], edge_child[e]);
}

for (j = self->tree_pos.in.start; j != self->tree_pos.in.stop; j--) {
e = self->tree_pos.in.order[j];
for (j = tree_pos.in.start; j != tree_pos.in.stop; j--) {
e = tree_pos.in.order[j];
tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);
}
ret = TSK_TREE_OK;
Expand Down Expand Up @@ -5759,6 +5750,7 @@ tsk_tree_seek_from_null(tsk_tree_t *self, double x, tsk_flags_t TSK_UNUSED(optio
const tsk_size_t num_trees = self->tree_sequence->num_trees;
const double L = tsk_treeseq_get_sequence_length(self->tree_sequence);
tsk_id_t j, e, index;
tsk_tree_position_t tree_pos;

index = (tsk_id_t) tsk_search_sorted(breakpoints, num_trees + 1, x);
if (breakpoints[index] > x) {
Expand All @@ -5770,9 +5762,10 @@ tsk_tree_seek_from_null(tsk_tree_t *self, double x, tsk_flags_t TSK_UNUSED(optio
if (ret != 0) {
goto out;

Check warning on line 5763 in c/tskit/trees.c

View check run for this annotation

Codecov / codecov/patch

c/tskit/trees.c#L5763

Added line #L5763 was not covered by tests
}
interval_left = self->tree_pos.interval.left;
for (j = self->tree_pos.in.start; j != self->tree_pos.in.stop; j++) {
e = self->tree_pos.in.order[j];
tree_pos = self->tree_pos;
interval_left = tree_pos.interval.left;
for (j = tree_pos.in.start; j != tree_pos.in.stop; j++) {
e = tree_pos.in.order[j];
if (edge_left[e] <= interval_left && interval_left < edge_right[e]) {
tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);
}
Expand All @@ -5782,9 +5775,10 @@ tsk_tree_seek_from_null(tsk_tree_t *self, double x, tsk_flags_t TSK_UNUSED(optio
if (ret != 0) {
goto out;

Check warning on line 5776 in c/tskit/trees.c

View check run for this annotation

Codecov / codecov/patch

c/tskit/trees.c#L5776

Added line #L5776 was not covered by tests
}
interval_right = self->tree_pos.interval.right;
for (j = self->tree_pos.in.start; j != self->tree_pos.in.stop; j--) {
e = self->tree_pos.in.order[j];
tree_pos = self->tree_pos;
interval_right = tree_pos.interval.right;
for (j = tree_pos.in.start; j != tree_pos.in.stop; j--) {
e = tree_pos.in.order[j];
if (edge_right[e] >= interval_right && interval_right > edge_left[e]) {
tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);
}
Expand Down Expand Up @@ -6974,23 +6968,19 @@ update_kc_subtree_state(
}

static int
update_kc_incremental(
tsk_tree_t *tree, kc_vectors *kc, tsk_tree_position_t *tree_pos, tsk_size_t *depths)
update_kc_incremental(tsk_tree_t *tree, kc_vectors *kc, tsk_size_t *depths)
{
int ret = 0;
tsk_id_t u, v, e, j;
double root_time, time;
const double *restrict times = tree->tree_sequence->tables->nodes.time;
const tsk_id_t *restrict edges_child = tree->tree_sequence->tables->edges.child;
const tsk_id_t *restrict edges_parent = tree->tree_sequence->tables->edges.parent;

tsk_bug_assert(tree_pos->index == tree->index);
tsk_bug_assert(tree_pos->interval.left == tree->interval.left);
tsk_bug_assert(tree_pos->interval.right == tree->interval.right);
tsk_tree_position_t tree_pos = tree->tree_pos;

/* Update state of detached subtrees */
for (j = tree_pos->out.stop - 1; j >= tree_pos->out.start; j--) {
e = tree_pos->out.order[j];
for (j = tree_pos.out.stop - 1; j >= tree_pos.out.start; j--) {
e = tree_pos.out.order[j];
u = edges_child[e];
depths[u] = 0;

Expand All @@ -7004,8 +6994,8 @@ update_kc_incremental(
}

/* Propagate state change down into reattached subtrees. */
for (j = tree_pos->in.stop - 1; j >= tree_pos->in.start; j--) {
e = tree_pos->in.order[j];
for (j = tree_pos.in.stop - 1; j >= tree_pos.in.start; j--) {
e = tree_pos.in.order[j];
u = edges_child[e];
v = edges_parent[e];

Expand Down Expand Up @@ -7038,17 +7028,11 @@ tsk_treeseq_kc_distance(const tsk_treeseq_t *self, const tsk_treeseq_t *other,
const tsk_treeseq_t *treeseqs[2] = { self, other };
tsk_tree_t trees[2];
kc_vectors kcs[2];
/* TODO the tree_pos here is redundant because we should be using this interally
* in the trees to do the advancing. Once we have converted the tree over to using
* tree_pos internally, we can get rid of these tree_pos variables and use
* the values stored in the trees themselves */
tsk_tree_position_t tree_pos[2];
tsk_size_t *depths[2];
int ret = 0;

for (i = 0; i < 2; i++) {
tsk_memset(&trees[i], 0, sizeof(trees[i]));
tsk_memset(&tree_pos[i], 0, sizeof(tree_pos[i]));
tsk_memset(&kcs[i], 0, sizeof(kcs[i]));
depths[i] = NULL;
}
Expand All @@ -7064,10 +7048,6 @@ tsk_treeseq_kc_distance(const tsk_treeseq_t *self, const tsk_treeseq_t *other,
if (ret != 0) {
goto out;
}
ret = tsk_tree_position_init(&tree_pos[i], treeseqs[i], 0);
if (ret != 0) {
goto out;
}
ret = kc_vectors_alloc(&kcs[i], n);
if (ret != 0) {
goto out;
Expand All @@ -7091,10 +7071,8 @@ tsk_treeseq_kc_distance(const tsk_treeseq_t *self, const tsk_treeseq_t *other,
if (ret != 0) {
goto out;
}
tsk_tree_position_next(&tree_pos[0]);
tsk_bug_assert(tree_pos[0].index == 0);

ret = update_kc_incremental(&trees[0], &kcs[0], &tree_pos[0], depths[0]);
ret = update_kc_incremental(&trees[0], &kcs[0], depths[0]);
if (ret != 0) {
goto out;
}
Expand All @@ -7103,17 +7081,11 @@ tsk_treeseq_kc_distance(const tsk_treeseq_t *self, const tsk_treeseq_t *other,
if (ret != 0) {
goto out;
}
tsk_tree_position_next(&tree_pos[1]);
tsk_bug_assert(tree_pos[1].index != -1);

ret = update_kc_incremental(&trees[1], &kcs[1], &tree_pos[1], depths[1]);
ret = update_kc_incremental(&trees[1], &kcs[1], depths[1]);
if (ret != 0) {
goto out;
}
tsk_bug_assert(trees[0].interval.left == tree_pos[0].interval.left);
tsk_bug_assert(trees[0].interval.right == tree_pos[0].interval.right);
tsk_bug_assert(trees[1].interval.left == tree_pos[1].interval.left);
tsk_bug_assert(trees[1].interval.right == tree_pos[1].interval.right);
while (trees[0].interval.right < trees[1].interval.right) {
span = trees[0].interval.right - left;
total += norm_kc_vectors(&kcs[0], &kcs[1], lambda_) * span;
Expand All @@ -7125,9 +7097,7 @@ tsk_treeseq_kc_distance(const tsk_treeseq_t *self, const tsk_treeseq_t *other,
if (ret != 0) {
goto out;
}
tsk_tree_position_next(&tree_pos[0]);
tsk_bug_assert(tree_pos[0].index != -1);
ret = update_kc_incremental(&trees[0], &kcs[0], &tree_pos[0], depths[0]);
ret = update_kc_incremental(&trees[0], &kcs[0], depths[0]);
if (ret != 0) {
goto out;
}
Expand All @@ -7144,7 +7114,6 @@ tsk_treeseq_kc_distance(const tsk_treeseq_t *self, const tsk_treeseq_t *other,
out:
for (i = 0; i < 2; i++) {
tsk_tree_free(&trees[i]);
tsk_tree_position_free(&tree_pos[i]);
kc_vectors_free(&kcs[i]);
tsk_safe_free(depths[i]);
}
Expand Down
2 changes: 0 additions & 2 deletions c/tskit/trees.h
Original file line number Diff line number Diff line change
Expand Up @@ -1850,10 +1850,8 @@ int tsk_tree_position_free(tsk_tree_position_t *self);
int tsk_tree_position_print_state(const tsk_tree_position_t *self, FILE *out);
bool tsk_tree_position_next(tsk_tree_position_t *self);
bool tsk_tree_position_prev(tsk_tree_position_t *self);
bool tsk_tree_position_step(tsk_tree_position_t *self, int direction);
int tsk_tree_position_seek_forward(tsk_tree_position_t *self, tsk_id_t index);
int tsk_tree_position_seek_backward(tsk_tree_position_t *self, tsk_id_t index);
int tsk_tree_position_seek(tsk_tree_position_t *self, double position);

#ifdef __cplusplus
}
Expand Down

0 comments on commit d88aff8

Please sign in to comment.