Skip to content

Commit

Permalink
Pre-index non-alt paths to fix #3054
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed Oct 28, 2020
1 parent fac2dd9 commit 63f1b51
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 11 deletions.
19 changes: 16 additions & 3 deletions src/graph_synchronizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,17 @@ namespace vg {
using namespace std;

GraphSynchronizer::GraphSynchronizer(VG& graph) : graph(graph) {
// Nothing to do!
// Because in general paths can overlap each other, and because we can't
// build a path index after a path has been modified (since we don't keep
// the ranks up to date internally), we need to build all the indexes up
// front, even if we're just working on a single path.
graph.for_each_path_handle([&](const path_handle_t& path) {
string name = graph.get_path_name(path);
if (!Paths::is_alt(name)) {
// We only care about reference paths.
get_path_index(name);
}
});
}

void GraphSynchronizer::with_path_index(const string& path_name, const function<void(const PathIndex&)>& to_run) {
Expand All @@ -29,7 +39,10 @@ const string& GraphSynchronizer::get_path_sequence(const string& path_name) {

// We need a function to grab the index for a path
PathIndex& GraphSynchronizer::get_path_index(const string& path_name) {


// We don't work on alt paths; there could be too many to pre-index.
assert(!Paths::is_alt(path_name));

if (!indexes.count(path_name)) {
// Not already made. Generate it.
indexes.emplace(piecewise_construct,
Expand Down Expand Up @@ -115,7 +128,7 @@ void GraphSynchronizer::Lock::lock() {
cerr << endl;
}
#endif

// Make them into pos_ts that point left to right, the way Jordan thinks.
pos_t left_pos = make_pos_t(start_left.node, start_left.is_end, 0);
pos_t right_pos = make_pos_t(end_right.node, !end_right.is_end,
Expand Down
12 changes: 7 additions & 5 deletions src/variant_adder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@ namespace vg {
using namespace std;
using namespace vg::io;

VariantAdder::VariantAdder(VG& graph) : graph(graph), sync(graph) {
VariantAdder::VariantAdder(VG& graph) : graph(graph), sync([&](VG& g) -> VG& {
// Dice nodes in the graph for GCSA indexing *before* constructing the synchronizer.
g.dice_nodes(max_node_size);
return g;
}(this->graph)) {


graph.paths.for_each_name([&](const string& name) {
// Save the names of all the graph paths, so we don't need to lock the
// graph to check them.
Expand All @@ -18,10 +24,6 @@ VariantAdder::VariantAdder(VG& graph) : graph(graph), sync(graph) {
// Show progress if the graph does.
show_progress = graph.show_progress;

// Make sure to dice nodes to 1024 or smaller, the max size that GCSA2
// supports, in case we need to GCSA-index part of the graph.
graph.dice_nodes(max_node_size);

// Configure the aligner to use a full length bonus
aligner.full_length_bonus = 5;
}
Expand Down
4 changes: 2 additions & 2 deletions src/vg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4004,7 +4004,7 @@ void VG::divide_node(Node* node, vector<int>& positions, vector<Node*>& parts) {
#ifdef debug_divide

#pragma omp critical (cerr)
cerr << omp_get_thread_num() << ": dividing mapping " << pb2json(*m) << endl;
cerr << omp_get_thread_num() << ": dividing mapping " << *m << endl;
#endif

string path_name = paths.mapping_path_name(m);
Expand Down Expand Up @@ -4077,7 +4077,7 @@ void VG::divide_node(Node* node, vector<int>& positions, vector<Node*>& parts) {
#pragma omp critical (cerr)
cerr << omp_get_thread_num() << ": produced mappings:" << endl;
for(auto mapping : mapping_parts) {
cerr << "\t" << pb2json(mapping) << endl;
cerr << "\t" << mapping << endl;
}
#endif
}
Expand Down
24 changes: 24 additions & 0 deletions test/add/multi.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"node": [
{"id": 1, "sequence": "CTTAAAATGATCGGGACTTTTCAAATCTTATTT"}
],
"edge": [
],
"path": [
{"name": "ref", "mapping": [
{"rank": 1, "edit": [
{"from_length": 33, "to_length": 33}
], "position": {"node_id": 1, "offset": 0, "is_reverse": true}}
]},
{"name": "ref2", "mapping": [
{"rank": 1, "edit": [
{"from_length": 33, "to_length": 33}
], "position": {"node_id": 1, "offset": 0}}
]},
{"name": "ref3", "mapping": [
{"rank": 1, "edit": [
{"from_length": 33, "to_length": 33}
], "position": {"node_id": 1, "offset": 0, "is_reverse": true}}
]}
]
}
15 changes: 15 additions & 0 deletions test/add/multi.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
##fileformat=VCFv4.0
##fileDate=20090805
##source=myImputationProgramV3.1
##reference=1000GenomesPilot-NCBI36
##phasing=partial
##FILTER=<ID=q10,Description="Quality below 10">
##FILTER=<ID=s50,Description="Less than 50% of samples have data">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2 SAMPLE3 SAMPLE4
ref 18 . TC T 100 PASS . GT 1/0 0/0 0|0 ././1
ref 21 . CGA GAC 100 PASS . GT 0/1 0/0 ./1 ./1/.
ref 23 . A AC 100 PASS . GT 0/0 1/0 . ./0
ref3 18 . TC T 100 PASS . GT 1/0 0/0 0|0 ././1
ref3 21 . CGA GAC 100 PASS . GT 0/1 0/0 ./1 ./1/.
ref3 23 . A AC 100 PASS . GT 0/0 1/0 . ./0
4 changes: 3 additions & 1 deletion test/t/31_vg_add.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ BASH_TAP_ROOT=../deps/bash-tap

PATH=../bin:$PATH # for vg

plan tests 11
plan tests 12

vg construct -r add/ref.fa > ref.vg
vg add -v add/benedict.vcf ref.vg > benedict.vg
Expand Down Expand Up @@ -44,6 +44,8 @@ is "$(vg view -c x.vg | jq -c '.path[].mapping[] | select(.rank | not)' | wc -l)

is "$(vg view -Jv add/backward.json | vg add -v add/benedict.vcf - | vg mod --unchop - | vg stats -N -)" "5" "graphs with backward nodes can be added to"

is "$(vg view -Jv add/multi.json | vg add -v add/multi.vcf - | vg mod --unchop - | vg stats -N -)" "5" "graphs with multiple overlapping paths nodes can be added to"

is "$(vg view -Jv add/backward_and_forward.json | vg add -v add/benedict.vcf - | vg mod --unchop - | vg stats -N -)" "5" "graphs with backward and forward nodes can be added to"

rm -rf ref.vg ref.pg benedict.vg benedict2.vg benedict3.vg x-ref.vg x.vg refN.vg no-n.vg with-n.vg ngap.vg ngap-add.vg
Expand Down

1 comment on commit 63f1b51

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch allow-add-overlap. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 14285 seconds

Please sign in to comment.