Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into rna-gbwt-anno
Browse files Browse the repository at this point in the history
  • Loading branch information
jonassibbesen committed Sep 20, 2021
2 parents 2929d90 + 9e02cca commit 2db683d
Show file tree
Hide file tree
Showing 46 changed files with 2,053 additions and 627 deletions.
2 changes: 1 addition & 1 deletion Makefile
Expand Up @@ -674,7 +674,7 @@ $(LIB_DIR)/libfml.a: $(FERMI_DIR)/*.h $(FERMI_DIR)/*.c
$(LIB_DIR)/libsublinearLS.a: $(LINLS_DIR)/src/*.cpp $(LINLS_DIR)/src/*.hpp $(LIB_DIR)/libhts.a
. ./source_me.sh && cd $(LINLS_DIR) && $(MAKE) clean && INCLUDE_FLAGS="-I$(CWD)/$(INC_DIR)" $(MAKE) libs $(FILTER) && cp lib/libsublinearLS.a $(CWD)/$(LIB_DIR)/ && mkdir -p $(CWD)/$(INC_DIR)/sublinearLS && cp src/*.hpp $(CWD)/$(INC_DIR)/sublinearLS/

$(LIB_DIR)/libbdsg.a: $(INC_DIR)/BooPHF.h $(LIBBDSG_DIR)/Makefile $(LIBBDSG_DIR)/bdsg/src/*.cpp $(LIBBDSG_DIR)/bdsg/include/bdsg/*.hpp $(LIBBDSG_DIR)/bdsg/include/bdsg/internal/*.hpp $(LIBBDSG_DIR)/bdsg/include/bdsg/overlays/*.hpp $(LIB_DIR)/libhandlegraph.a $(LIB_DIR)/libsdsl.a $(LIB_DIR)/libdivsufsort.a $(LIB_DIR)/libdivsufsort64.a $(INC_DIR)/sparsepp/spp.h $(INC_DIR)/dynamic/dynamic.hpp
$(LIB_DIR)/libbdsg.a: $(INC_DIR)/BooPHF.h $(LIBBDSG_DIR)/Makefile $(LIBBDSG_DIR)/bdsg/src/*.cpp $(LIBBDSG_DIR)/bdsg/include/bdsg/*.hpp $(LIBBDSG_DIR)/bdsg/include/bdsg/internal/*.hpp $(LIBBDSG_DIR)/bdsg/include/bdsg/overlays/*.hpp $(LIB_DIR)/libhandlegraph.a $(LIB_DIR)/libsdsl.a $(LIB_DIR)/libdivsufsort.a $(LIB_DIR)/libdivsufsort64.a $(INC_DIR)/sparsepp/spp.h $(INC_DIR)/dynamic/dynamic.hpp $(INC_DIR)/mio/mmap.hpp
+. ./source_me.sh && rm -Rf $(CWD)/$(INC_DIR)/bdsg && cd $(LIBBDSG_DIR) && $(MAKE) clean && CPLUS_INCLUDE_PATH=$(CWD)/$(INC_DIR):$(CWD)/$(INC_DIR)/dynamic:$(CPLUS_INCLUDE_PATH) CXXFLAGS="$(INCLUDE_FLAGS) $(CXXFLAGS)" $(MAKE) $(FILTER) && cp lib/libbdsg.a $(CWD)/$(LIB_DIR) && cp -r bdsg/include/* $(CWD)/$(INC_DIR)

$(INC_DIR)/mio/mmap.hpp: $(MIO_DIR)/include/mio/*
Expand Down
10 changes: 6 additions & 4 deletions README.md
Expand Up @@ -241,8 +241,10 @@ Call only variants that are present in the graph (use `-g`):

<!-- !test check Pack and call -->
```sh
# Compute the read support from the gam (ignoring mapping and base qualitiy < 5)
vg pack -x x.xg -g aln.gam -Q 5 -o aln.pack
# Compute the read support from the gam
# -Q 5: ignore mapping and base qualitiy < 5
# -s 5: ignore first and last 5bp from each read
vg pack -x x.xg -g aln.gam -Q 5 -s 5 -o aln.pack

# Generate a VCF from the support.
vg call x.xg -k aln.pack > graph_calls.vcf
Expand All @@ -261,8 +263,8 @@ In order to also consider *novel* variants from the reads, use the augmented gra
# Index our augmented graph
vg index aug.vg -x aug.xg

# Compute the read support from the augmented gam (with ignoring qualitiy < 5)
vg pack -x aug.xg -g aug.gam -Q 5 -o aln_aug.pack
# Compute the read support from the augmented gam (ignoring qualitiy < 5, and 1st and last 5bp of each read)
vg pack -x aug.xg -g aug.gam -Q 5 -s 5 -o aln_aug.pack

# Generate a VCF from the support
vg call aug.xg -k aln_aug.pack > calls.vcf
Expand Down
2 changes: 1 addition & 1 deletion deps/libvgio
Submodule libvgio updated 1 files
+13 −51 CMakeLists.txt
58 changes: 49 additions & 9 deletions src/aligner.cpp
Expand Up @@ -443,21 +443,34 @@ int32_t GSSWAligner::score_gap(size_t gap_length) const {
return gap_length ? -gap_open - (gap_length - 1) * gap_extension : 0;
}

double GSSWAligner::first_mapping_quality_exact(const vector<double>& scaled_scores,
const vector<double>* multiplicities) {
return maximum_mapping_quality_exact(scaled_scores, nullptr, multiplicities);
}

double GSSWAligner::first_mapping_quality_approx(const vector<double>& scaled_scores,
const vector<double>* multiplicities) {
return maximum_mapping_quality_approx(scaled_scores, nullptr, multiplicities);
}

double GSSWAligner::maximum_mapping_quality_exact(const vector<double>& scaled_scores, size_t* max_idx_out,
const vector<double>* multiplicities) {

// TODO: this isn't very well-named now that it also supports computing non-maximum
// mapping qualities

// work in log transformed values to avoid risk of overflow
double log_sum_exp = numeric_limits<double>::lowest();
double max_score = numeric_limits<double>::lowest();
double to_score = numeric_limits<double>::lowest();

// go in reverse order because this has fewer numerical problems when the scores are sorted (as usual)
for (int64_t i = scaled_scores.size() - 1; i >= 0; i--) {
// get the value of one copy of the score and check if it's the max
double score = scaled_scores.at(i);
if (score >= max_score) {
if (max_idx_out && score >= to_score) {
// Since we are going in reverse order, make sure to break ties in favor of the earlier item.
*max_idx_out = i;
max_score = score;
to_score = score;
}

// add all copies of the score
Expand All @@ -479,7 +492,11 @@ double GSSWAligner::maximum_mapping_quality_exact(const vector<double>& scaled_s
}
}

double direct_mapq = -quality_scale_factor * subtract_log(0.0, max_score - log_sum_exp);
if (!max_idx_out) {
to_score = scaled_scores.empty() ? 0.0 : scaled_scores.front();
}

double direct_mapq = -quality_scale_factor * subtract_log(0.0, to_score - log_sum_exp);
return std::isinf(direct_mapq) ? (double) numeric_limits<int32_t>::max() : direct_mapq;
}

Expand Down Expand Up @@ -522,6 +539,9 @@ double GSSWAligner::maximum_mapping_quality_approx(const vector<double>& scaled_
const vector<double>* multiplicities) {
assert(!scaled_scores.empty());

// TODO: this isn't very well-named now that it also supports computing non-maximum
// mapping qualities

// determine the maximum score and the count of the next highest score
double max_score = scaled_scores.at(0);
size_t max_idx = 0;
Expand Down Expand Up @@ -571,9 +591,19 @@ double GSSWAligner::maximum_mapping_quality_approx(const vector<double>& scaled_
}

// record the index of the highest score
*max_idx_out = max_idx;

return max(0.0, quality_scale_factor * (max_score - next_score - (next_count > 1.0 ? log(next_count) : 0.0)));
if (max_idx_out) {
*max_idx_out = max_idx;
}
if (max_idx_out || max_idx == 0) {
// we're either returning the mapping quality of whichever was the best, or we're
// returning the mapping quality of the first, which also is the best
return max(0.0, quality_scale_factor * (max_score - next_score - (next_count > 1.0 ? log(next_count) : 0.0)));
}
else {
// we're returning the mapping quality of the first, which is not the best. the approximation
// gets complicated here, so lets just fall back on the exact computation
return maximum_mapping_quality_exact(scaled_scores, nullptr, multiplicities);
}
}

double GSSWAligner::group_mapping_quality_exact(const vector<double>& scaled_scores, const vector<size_t>& group,
Expand Down Expand Up @@ -696,8 +726,8 @@ void GSSWAligner::compute_mapping_quality(vector<Alignment>& alignments,
}
}

int32_t GSSWAligner::compute_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities) const {
int32_t GSSWAligner::compute_max_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities) const {

vector<double> scaled_scores(scores.size());
for (size_t i = 0; i < scores.size(); i++) {
Expand All @@ -708,6 +738,16 @@ int32_t GSSWAligner::compute_mapping_quality(const vector<double>& scores, bool
: maximum_mapping_quality_exact(scaled_scores, &idx, multiplicities));
}

int32_t GSSWAligner::compute_first_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities) const {
vector<double> scaled_scores(scores.size());
for (size_t i = 0; i < scores.size(); i++) {
scaled_scores[i] = log_base * scores[i];
}
return (int32_t) (fast_approximation ? first_mapping_quality_approx(scaled_scores, multiplicities)
: first_mapping_quality_exact(scaled_scores, multiplicities));
}

int32_t GSSWAligner::compute_group_mapping_quality(const vector<double>& scores, const vector<size_t>& group,
const vector<double>* multiplicities) const {

Expand Down
19 changes: 17 additions & 2 deletions src/aligner.hpp
Expand Up @@ -98,6 +98,15 @@ namespace vg {
/// the mapping quality is always calculated as if its multiplicity is 1.
static double maximum_mapping_quality_approx(const vector<double>& scaled_scores, size_t* max_idx_out,
const vector<double>* multiplicities = nullptr);

/// Same as maximum_mapping_quality_exact except alway s computes mapping
/// quality for the first score
static double first_mapping_quality_exact(const vector<double>& scaled_scores,
const vector<double>* multiplicities = nullptr);
/// Same as maximum_mapping_quality_approx except alway s computes mapping
/// quality for the first score
static double first_mapping_quality_approx(const vector<double>& scaled_scores,
const vector<double>* multiplicities = nullptr);
protected:
double group_mapping_quality_exact(const vector<double>& scaled_scores, const vector<size_t>& group,
const vector<double>* multiplicities = nullptr) const;
Expand Down Expand Up @@ -217,11 +226,17 @@ namespace vg {
double maybe_mq_threshold,
double identity_weight) const;

/// Computes mapping quality for the first score in a vector of scores.
/// Optionally includes a vector of implicit counts >= 1 for the scores, but
/// the first mapping quality is always calculated as if it multiplicity is 1.
int32_t compute_first_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities = nullptr) const;

/// Computes mapping quality for the optimal score in a vector of scores.
/// Optionally includes a vector of implicit counts >= 1 for the scores, but
/// the mapping quality is always calculated as if it multiplicity is 1.
int32_t compute_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities = nullptr) const;
int32_t compute_max_mapping_quality(const vector<double>& scores, bool fast_approximation,
const vector<double>* multiplicities = nullptr) const;

/// Computes mapping quality for a group of scores in a vector of scores (group given by indexes).
/// Optionally includes a vector of implicit counts >= 1 for the score, but the mapping quality is always
Expand Down

1 comment on commit 2db683d

@adamnovak
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch rna-gbwt-anno. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 10465 seconds

Please sign in to comment.