Skip to content

Commit

Permalink
merge contiguous rgfa fragments
Browse files Browse the repository at this point in the history
  • Loading branch information
glennhickey committed Mar 27, 2024
1 parent f23e65a commit 6ebd95f
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 12 deletions.
70 changes: 58 additions & 12 deletions src/rgfa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,9 @@ void RGFACover::compute(const PathHandleGraph* graph,
vector<pair<step_handle_t, step_handle_t>>& thread_rgfa_intervals = rgfa_intervals_vector[omp_get_thread_num()];
unordered_map<nid_t, int64_t>& thread_node_to_interval = node_to_interval_vector[omp_get_thread_num()];

vector<const Snarl*> queue = {snarl};
vector<const Snarl*> queue = {snarl};

cerr << "top level snarl " << pb2json(*snarl) << endl;

while(!queue.empty()) {
const Snarl* cur_snarl = queue.back();
Expand Down Expand Up @@ -576,19 +578,13 @@ void RGFACover::compute_snarl(const Snarl& snarl, PathTraversalFinder& path_trav
}
continue;
}

// add the interval to the local (thread safe) structures
step_handle_t step = trav[uncovered_interval.first];
int64_t interval_length = uncovered_interval.second - uncovered_interval.first;
pair<step_handle_t, step_handle_t> new_interval = make_pair(trav.at(uncovered_interval.first),
graph->get_next_step(trav.at(uncovered_interval.second - 1)));
#ifdef debug
int64_t interval_length = uncovered_interval.second - uncovered_interval.first;
cerr << "adding interval with length " << interval_length << endl;
#endif
for (int64_t i = 0; i < interval_length; ++i) {
thread_node_to_interval[graph->get_id(graph->get_handle_of_step(step))] = thread_rgfa_intervals.size();
step = graph->get_next_step(step);
}
thread_rgfa_intervals.push_back(make_pair(trav.at(uncovered_interval.first),
graph->get_next_step(trav.at(uncovered_interval.second - 1))));
add_interval(thread_rgfa_intervals, thread_node_to_interval, new_interval);
}
}

Expand Down Expand Up @@ -621,6 +617,54 @@ vector<pair<int64_t, int64_t>> RGFACover::get_uncovered_intervals(const vector<s
return intervals;
}

bool RGFACover::add_interval(vector<pair<step_handle_t, step_handle_t>>& thread_rgfa_intervals,
unordered_map<nid_t, int64_t>& thread_node_to_interval,
const pair<step_handle_t, step_handle_t>& new_interval) {

bool merged = false;
path_handle_t path_handle = graph->get_path_handle_of_step(new_interval.first);

// check the before-first step. if it's in an interval then it must be immediately
// preceeding so we merge the new interval to the end of the found interval
step_handle_t before_first_step = graph->get_previous_step(new_interval.first);
if (before_first_step != graph->path_front_end(graph->get_path_handle_of_step(before_first_step))) {
nid_t prev_node_id = graph->get_id(graph->get_handle_of_step(before_first_step));
if (thread_node_to_interval.count(prev_node_id)) {
pair<step_handle_t, step_handle_t>& prev_interval = thread_rgfa_intervals[thread_node_to_interval[prev_node_id]];
if (graph->get_path_handle_of_step(prev_interval.first) == path_handle) {
assert(prev_interval.second == new_interval.first);
prev_interval.second = new_interval.second;
merged = true;
}
}
}

// check the end step. if it's in an interval then it must be immediately
// following we merge the new interval to the front of the found interval
if (new_interval.second != graph->path_end(graph->get_path_handle_of_step(new_interval.second))) {
nid_t next_node_id = graph->get_id(graph->get_handle_of_step(new_interval.second));
if (thread_node_to_interval.count(next_node_id)) {
pair<step_handle_t, step_handle_t>& next_interval = thread_rgfa_intervals[thread_node_to_interval[next_node_id]];
path_handle_t next_path = graph->get_path_handle_of_step(next_interval.first);
if (graph->get_path_handle_of_step(next_interval.first) == path_handle) {
assert(next_interval.first == new_interval.second);
next_interval.first = new_interval.first;
merged = true;
}
}
}

// add the interval to the local (thread safe) structures
if (!merged) {
for (step_handle_t step = new_interval.first; step != new_interval.second; step = graph->get_next_step(step)) {
thread_node_to_interval[graph->get_id(graph->get_handle_of_step(step))] = thread_rgfa_intervals.size();
}
thread_rgfa_intervals.push_back(new_interval);
}

return !merged;
}

int64_t RGFACover::get_coverage(const vector<step_handle_t>& trav, const pair<int64_t, int64_t>& uncovered_interval) {
path_handle_t path_handle = graph->get_path_handle_of_step(trav.front());
int64_t coverage = 0;
Expand All @@ -635,7 +679,9 @@ int64_t RGFACover::get_coverage(const vector<step_handle_t>& trav, const pair<in

return coverage;
}




// copied pretty much verbatem from
// https://github.com/ComparativeGenomicsToolkit/hal2vg/blob/v1.1.2/clip-vg.cpp#L809-L880
void RGFACover::forwardize_rgfa_paths(MutablePathMutableHandleGraph* mutable_graph) {
Expand Down
7 changes: 7 additions & 0 deletions src/rgfa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@ class RGFACover {
vector<pair<int64_t, int64_t>> get_uncovered_intervals(const vector<step_handle_t>& trav,
const unordered_map<nid_t, int64_t>& thread_node_to_interval);

// add a new interval into the rgfa_intervals veector and update the node_to_interval map
// if the interval can be merged into an existing, contiguous interval, do that instead
// returns true if a new interval was added, false if an existing interval was updated
bool add_interval(vector<pair<step_handle_t, step_handle_t>>& thread_rgfa_intervals,
unordered_map<nid_t, int64_t>& thread_node_to_interval,
const pair<step_handle_t, step_handle_t>& new_interval);

// get the total coverage of a traversal (sum of step lengths)
int64_t get_coverage(const vector<step_handle_t>& trav, const pair<int64_t, int64_t>& uncovered_interval);

Expand Down

1 comment on commit 6ebd95f

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch rgfa2. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 16991 seconds

Please sign in to comment.