Skip to content

Commit

Permalink
fix: improved specificity of read position bias model by estimating t…
Browse files Browse the repository at this point in the history
…he rate of the predominant read position from the ref supporting observations (#425)

### Description

<!--Add a description of your PR here-->

### QC
<!-- Make sure that you can tick the boxes below. -->

* [x] The PR contains a test case for the changes or the changes are
already covered by an existing test case.
* [x] The documentation at
https://github.com/varlociraptor/varlociraptor.github.io is updated in a
separate PR to reflect the changes or this is not necessary (e.g. if the
change does neither modify the calling grammar nor the behavior or
functionalities of Varlociraptor).
  • Loading branch information
johanneskoester committed May 11, 2024
1 parent b1017f5 commit eea6727
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 17 deletions.
2 changes: 1 addition & 1 deletion src/calling/variants/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ impl Call {
read_position_bias.insert(
i,
match sample_info.artifacts.read_position_bias() {
ReadPositionBias::None => b'.',
ReadPositionBias::None { .. } => b'.',
ReadPositionBias::Some => b'^',
},
);
Expand Down
11 changes: 11 additions & 0 deletions src/utils/collect_variants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@ pub fn collect_variants(
skip_imprecise: bool,
skips: Option<&mut SimpleCounter<SkipReason>>,
) -> Result<Vec<VariantInfo>> {
// TODO ignore imprecise variants for now?
// let nonzero_bounds = |tag| {
// record.info(tag).integer().ok().map_or(false, |ci| {
// ci.map_or(false, |ci| ci.iter().any(|bound| *bound != 0))
// })
// };

// let imprecise = record.info(b"IMPRECISE").flag().ok().unwrap_or(false)
// || (!record.info(b"PRECISE").flag().ok().unwrap_or(false)
// && (nonzero_bounds(b"CIPOS") || nonzero_bounds(b"CIEND")));

let imprecise = record.info(b"IMPRECISE").flag().ok().unwrap_or(false);

let skip_incr = |reason| {
Expand Down
5 changes: 3 additions & 2 deletions src/variants/model/bias/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ impl Artifacts {
let read_position_biases = if consider_read_position_bias {
ReadPositionBias::iter().collect_vec()
} else {
vec![ReadPositionBias::None]
vec![ReadPositionBias::default()]
};
let read_orientation_biases = if consider_read_orientation_bias {
ReadOrientationBias::iter().collect_vec()
Expand Down Expand Up @@ -221,7 +221,7 @@ impl Artifacts {
ArtifactsBuilder::default()
.strand_bias(StrandBias::default())
.read_orientation_bias(ReadOrientationBias::None)
.read_position_bias(ReadPositionBias::None)
.read_position_bias(ReadPositionBias::default())
.softclip_bias(SoftclipBias::None)
.homopolymer_error(HomopolymerError::default())
.alt_locus_bias(AltLocusBias::None)
Expand Down Expand Up @@ -295,5 +295,6 @@ impl Artifacts {
pub(crate) fn learn_parameters(&mut self, pileups: &[Pileup]) {
self.homopolymer_error.learn_parameters(pileups);
self.strand_bias.learn_parameters(pileups);
self.read_position_bias.learn_parameters(pileups);
}
}
96 changes: 91 additions & 5 deletions src/variants/model/bias/read_position_bias.rs
Original file line number Diff line number Diff line change
@@ -1,36 +1,122 @@
use bio::stats::probs::LogProb;
use bio::stats::Prob;
use itertools::Itertools;
use ordered_float::NotNan;

use crate::variants::evidence::observations::pileup::Pileup;
use crate::variants::evidence::observations::read_observation::{
ProcessedReadObservation, ReadPosition,
};
use crate::variants::model::bias::Bias;

#[derive(Copy, Clone, PartialOrd, PartialEq, Eq, Debug, Ord, EnumIter, Hash)]
pub(crate) enum ReadPositionBias {
None,
None { major_rate: Option<NotNan<f64>> },
Some,
}

impl Default for ReadPositionBias {
fn default() -> Self {
ReadPositionBias::None
ReadPositionBias::None { major_rate: None }
}
}

impl Bias for ReadPositionBias {
fn prob_alt(&self, observation: &ProcessedReadObservation) -> LogProb {
match (self, observation.read_position) {
(ReadPositionBias::None, _) => observation.prob_hit_base, // normal
(
ReadPositionBias::None {
major_rate: Some(rate),
},
ReadPosition::Major,
) => LogProb::from(Prob(**rate)),
(
ReadPositionBias::None {
major_rate: Some(rate),
},
ReadPosition::Some,
) => LogProb::from(Prob(1.0 - **rate)),
(ReadPositionBias::None { major_rate: None }, ReadPosition::Major) => {
observation.prob_hit_base
}
(ReadPositionBias::None { major_rate: None }, ReadPosition::Some) => {
observation.prob_hit_base.ln_one_minus_exp()
}
(ReadPositionBias::Some, ReadPosition::Major) => LogProb::ln_one(), // bias
(ReadPositionBias::Some, ReadPosition::Some) => LogProb::ln_zero(), // no bias
}
}

fn prob_any(&self, observation: &ProcessedReadObservation) -> LogProb {
observation.prob_hit_base
LogProb::ln_one()
}

fn is_artifact(&self) -> bool {
*self != ReadPositionBias::None
!matches!(self, ReadPositionBias::None { .. })
}

fn is_informative(&self, pileups: &[Pileup]) -> bool {
// METHOD: if all reads overlap the variant at the major pos,
// we cannot estimate a read position bias and None is the only informative one.
!self.is_artifact() || Self::estimate_major_rate(pileups).is_some()
}

fn learn_parameters(&mut self, pileups: &[Pileup]) {
if let ReadPositionBias::None { ref mut major_rate } = self {
*major_rate = Self::estimate_major_rate(pileups);
}
}
}

impl ReadPositionBias {
fn estimate_major_rate(pileups: &[Pileup]) -> Option<NotNan<f64>> {
let strong_all = LogProb::ln_sum_exp(
&pileups
.iter()
.flat_map(|pileup| {
pileup.read_observations().iter().filter_map(|obs| {
if obs.is_strong_ref_support() {
Some(obs.prob_mapping())
} else {
None
}
})
})
.collect_vec(),
)
.exp();
let strong_major = LogProb::ln_sum_exp(
&pileups
.iter()
.flat_map(|pileup| {
pileup.read_observations().iter().filter_map(|obs| {
if obs.is_strong_ref_support() && obs.read_position == ReadPosition::Major {
Some(obs.prob_mapping())
} else {
None
}
})
})
.collect_vec(),
)
.exp();
let any_major = pileups.iter().any(|pileup| {
pileup
.read_observations()
.iter()
.any(|obs| obs.read_position == ReadPosition::Major)
});

if strong_all > 2.0 {
let major_fraction = strong_major / strong_all;
if any_major && major_fraction < 1.0 {
// METHOD: if there is any read with the major read position in either
// the ref or the alt supporting strong evidences and not all the reads
// supporting ref do that at the major position, we report a fraction
// and consider the read position bias.
return Some(NotNan::new(major_fraction).unwrap());
}
}
None
}
}
2 changes: 1 addition & 1 deletion tests/resources/testcases/test20/testcase.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

expected:
posteriors:
- PROB_SOMATIC_TUMOR >= 40.0
- PROB_SOMATIC_TUMOR >= 30.0

# necessary bam files
samples:
Expand Down
8 changes: 2 additions & 6 deletions tests/resources/testcases/test70/testcase.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
# here because of its complex scenario, also this is seemingly a true call in a crazily crowded region

expected:
allelefreqs:
- patient48 > 0.0
# here because of its complex scenario

# necessary bam files
samples:
Expand All @@ -22,7 +18,7 @@ candidate: 'candidates.vcf'
scenario: 'scenario.yaml'



omit_read_position_bias: true

# reference sequence
reference:
Expand Down
2 changes: 1 addition & 1 deletion tests/resources/testcases/test_cmp/testcase.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
expected:
allelefreqs:
- sample_a >= 0.95
- sample_b >= 0.45 && sample_b <= 0.55
- sample_b >= 0.4 && sample_b <= 0.55
posteriors:
- PROB_HOMHET == 0.0

Expand Down
2 changes: 1 addition & 1 deletion tests/resources/testcases/test_l2fc/testcase.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
expected:
allelefreqs:
- sample_a >= 0.95
- sample_b >= 0.45 && sample_b <= 0.55
- sample_b >= 0.4 && sample_b <= 0.55
posteriors:
- PROB_A_GREATER_B < PROB_SIMILAR

Expand Down

0 comments on commit eea6727

Please sign in to comment.