Skip to content

Commit

Permalink
fix: fix invalid identification of homopolymer error (#405)
Browse files Browse the repository at this point in the history
### Description

Fixing a bug where the insertion of a base was falsely identified as
homopolymer error in case there is a single identical base in front of
the insertion.

### QC
<!-- Make sure that you can tick the boxes below. -->

* [x] The PR contains a test case for the changes or the changes are
already covered by an existing test case.
* [ ] The documentation at
https://github.com/varlociraptor/varlociraptor.github.io is updated in a
separate PR to reflect the changes or this is not necessary (e.g. if the
change does neither modify the calling grammar nor the behavior or
functionalities of Varlociraptor).

---------

Co-authored-by: Johannes Koester <johannes.koester@uk-essen.de>
  • Loading branch information
FelixMoelder and johanneskoester committed Jan 22, 2024
1 parent 201f91f commit e1b66cc
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 5 deletions.
20 changes: 20 additions & 0 deletions src/estimation/alignment_properties.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,26 @@ pub(crate) struct AlignmentProperties {
epsilon_gap: f64,
}

impl AlignmentProperties {
pub(crate) fn max_homopolymer_insertion_len(&self) -> u16 {
self.wildtype_homopolymer_error_model
.keys()
.filter(|i| **i >= 0)
.map(|i| *i as u16)
.max()
.unwrap_or(0)
}

pub(crate) fn max_homopolymer_deletion_len(&self) -> u16 {
self.wildtype_homopolymer_error_model
.keys()
.filter(|i| **i <= 0)
.map(|i| i.abs() as u16)
.max()
.unwrap_or(0)
}
}

#[derive(Debug, Copy, Clone)]
struct EstimationParams {
num_alignments: Option<u64>,
Expand Down
69 changes: 65 additions & 4 deletions src/utils/homopolymers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,11 @@ impl HomopolymerIndelOperation {
let mut text_pos = 0;

let is_extendable_stretch = |rpos, base| {
let min_len = if text[rpos] == base { 0 } else { 1 };
(rpos < (text.len() - 1)
&& extend_homopolymer_stretch(base, &mut text[rpos + 1..].iter()) > 0)
&& extend_homopolymer_stretch(base, &mut text[rpos + 1..].iter()) > min_len)
|| (rpos > 0
&& extend_homopolymer_stretch(base, &mut text[..rpos].iter().rev()) > 0)
&& extend_homopolymer_stretch(base, &mut text[..rpos].iter().rev()) > min_len)
};

for (op, stretch) in &alignment.iter().group_by(|op| *op) {
Expand Down Expand Up @@ -197,15 +198,28 @@ impl HomopolymerErrorModel {
.collect_vec(),
)
};
let is_insertion = variant_homopolymer_indel_len > 0;

let prob_homopolymer_insertion = prob_homopolymer_error(&|item_len| item_len > 0);
let prob_homopolymer_deletion = prob_homopolymer_error(&|item_len| item_len < 0);
let mut prob_homopolymer_artifact_deletion = prob_homopolymer_deletion;
let mut prob_homopolymer_artifact_insertion = prob_homopolymer_insertion;
let prob_total = prob_homopolymer_insertion.ln_add_exp(prob_homopolymer_deletion);
if prob_total != LogProb::ln_zero() {
prob_homopolymer_artifact_insertion -= prob_total;
prob_homopolymer_artifact_deletion -= prob_total;
if (is_insertion
&& variant_homopolymer_indel_len
<= alignment_properties.max_homopolymer_insertion_len() as i8)
|| (!is_insertion
&& variant_homopolymer_indel_len.abs()
<= alignment_properties.max_homopolymer_deletion_len() as i8)
{
prob_homopolymer_artifact_insertion -= prob_total;
prob_homopolymer_artifact_deletion -= prob_total;
} else {
// insertion or deletion is too long to be an artifact. There is no scenario with such a long homopolymer error.
prob_homopolymer_artifact_insertion = LogProb::ln_zero();
prob_homopolymer_artifact_deletion = LogProb::ln_zero();
}
} // else both of them are already zero, nothing to do.

Some(HomopolymerErrorModel {
Expand All @@ -223,3 +237,50 @@ impl HomopolymerErrorModel {
}
}
}

#[cfg(test)]
mod test_homopolymer {
use crate::utils::homopolymers::HomopolymerIndelOperation;
use bio_types::alignment::AlignmentOperation::*;
#[test]
fn test_homopolymer_indel_operation() {
// Insertion after identical base
let test_alignment = &[Match, Match, Ins, Match, Match];
let test_homopolymer_indel =
HomopolymerIndelOperation::from_alignment(b"ACGT", b"ACCGT", test_alignment);
if test_homopolymer_indel.is_some() {
panic!("Invalid homopolymer error detected");
}

// Insertion before identical base
let test_alignment = &[Match, Ins, Match, Match, Match];
let test_homopolymer_indel =
HomopolymerIndelOperation::from_alignment(b"ACGT", b"ACCGT", test_alignment);
if test_homopolymer_indel.is_some() {
panic!("Invalid homopolymer error detected");
}

// Insertion at the beginning of the homopolymer
let test_alignment = &[Match, Ins, Match, Match, Match];
let test_homopolymer_indel =
HomopolymerIndelOperation::from_alignment(b"GTTA", b"GTTTA", test_alignment);
if test_homopolymer_indel.is_none() {
panic!("Missed homopolymer error");
}

// Insertion in the middle of the homopolymer
let test_alignment = &[Match, Match, Ins, Match, Match];
let test_homopolymer_indel =
HomopolymerIndelOperation::from_alignment(b"GTTA", b"GTTTA", test_alignment);
if test_homopolymer_indel.is_none() {
panic!("Missed homopolymer error ");
}
// Insertion at the end of the homopolymer
let test_alignment = &[Match, Match, Match, Ins, Match];
let test_homopolymer_indel =
HomopolymerIndelOperation::from_alignment(b"GTTA", b"GTTTA", test_alignment);
if test_homopolymer_indel.is_none() {
panic!("Missed homopolymer error");
}
}
}
1 change: 1 addition & 0 deletions src/variants/evidence/observations/read_observation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,7 @@ where
obs.homopolymer_indel_len(Some(alt_indel_len));
assert!(ref_indel_len != 0); // caught above
if ref_indel_len > 0 {
// insertion
obs.prob_observable_at_homopolymer_artifact(Some(
homopolymer_error_model.prob_homopolymer_artifact_insertion(),
))
Expand Down
2 changes: 1 addition & 1 deletion tests/resources/testcases/test19/testcase.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

expected:
posteriors:
- PROB_SOMATIC_TUMOR >= 229.0
- PROB_SOMATIC_TUMOR >= 22.0

# necessary bam files
samples:
Expand Down

0 comments on commit e1b66cc

Please sign in to comment.