Skip to content

Commit

Permalink
fix: add flag for stating that candidate variants are atomic (in such…
Browse files Browse the repository at this point in the history
… cases, no realignment against SNVs and MNVs should happen because nearby indels can induce false positives if they are not considered in the realignment) (#400)
  • Loading branch information
huzuner committed Oct 7, 2023
1 parent 42adf59 commit 2e22c50
Show file tree
Hide file tree
Showing 11 changed files with 301 additions and 4 deletions.
3 changes: 3 additions & 0 deletions src/calling/variants/preprocessing/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ pub(crate) struct ObservationProcessor<R: realignment::Realigner + Clone + 'stat
raw_observation_output: Option<PathBuf>,
report_fragment_ids: bool,
adjust_prob_mapping: bool,
atomic_candidate_variants: bool,
}

impl<R: realignment::Realigner + Clone + std::marker::Send + std::marker::Sync>
Expand Down Expand Up @@ -497,6 +498,7 @@ impl<R: realignment::Realigner + Clone + std::marker::Send + std::marker::Sync>
ref_base()?,
alt,
self.realigner.clone(),
!self.atomic_candidate_variants,
))
};

Expand All @@ -520,6 +522,7 @@ impl<R: realignment::Realigner + Clone + std::marker::Send + std::marker::Sync>
.to_owned(),
alt.to_owned(),
self.realigner.clone(),
!self.atomic_candidate_variants,
))
};

Expand Down
14 changes: 14 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,16 @@ pub enum PreprocessKind {
)]
#[serde(default)]
report_fragment_ids: bool,
#[structopt(
long,
help = "Assume that candidate variants are given in atomic form (unlike e.g. \
provided by freebayes which combines adjacent variants that appear to be in \
phase into longer haplotypes). If variants are atomic, we do not want to perform \
realignment against SNVs and MNVs because those can lead to false positives if \
they are immediately followed or preceeded by indels."
)]
#[serde(default)]
atomic_candidate_variants: bool,
#[structopt(
long,
help = "Do not adjust mapping quality (MAPQ). By default Varlociraptor will adjust mapping qualities \
Expand Down Expand Up @@ -802,6 +812,7 @@ pub fn run(opt: Varlociraptor) -> Result<()> {
candidates,
bam,
report_fragment_ids,
atomic_candidate_variants,
omit_mapq_adjustment,
alignment_properties,
output,
Expand Down Expand Up @@ -877,6 +888,7 @@ pub fn run(opt: Varlociraptor) -> Result<()> {
hop_params,
realignment_window,
))
.atomic_candidate_variants(atomic_candidate_variants)
.build();
processor.process()?;
}
Expand Down Expand Up @@ -905,6 +917,7 @@ pub fn run(opt: Varlociraptor) -> Result<()> {
realignment_window,
reference_buffer,
))
.atomic_candidate_variants(atomic_candidate_variants)
.build();
processor.process()?;
}
Expand Down Expand Up @@ -933,6 +946,7 @@ pub fn run(opt: Varlociraptor) -> Result<()> {
gap_params,
realignment_window,
))
.atomic_candidate_variants(atomic_candidate_variants)
.build();
processor.process()?;
}
Expand Down
1 change: 1 addition & 0 deletions src/testcase/runner/common/version0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ impl Testcase for TestcaseVersion0 {
log_mode: "default".to_owned(),
pairhmm_mode: "exact".to_owned(),
output_raw_observations: None,
atomic_candidate_variants: false,
},
};

Expand Down
10 changes: 8 additions & 2 deletions src/variants/types/mnv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ pub(crate) struct Mnv<R: Realigner> {
ref_bases: Vec<u8>,
alt_bases: Rc<Vec<u8>>,
realigner: RefCell<R>,
realign_indel_reads: bool,
}

impl<R: Realigner> Mnv<R> {
Expand All @@ -50,6 +51,7 @@ impl<R: Realigner> Mnv<R> {
ref_bases: Vec<u8>,
alt_bases: Vec<u8>,
realigner: R,
realign_indel_reads: bool,
) -> Self {
Mnv {
locus: SingleLocus::new(genome::Interval::new(
Expand All @@ -59,6 +61,7 @@ impl<R: Realigner> Mnv<R> {
ref_bases: ref_bases.to_ascii_uppercase(),
alt_bases: Rc::new(alt_bases.to_ascii_uppercase()),
realigner: RefCell::new(realigner),
realign_indel_reads,
}
}

Expand Down Expand Up @@ -122,12 +125,15 @@ impl<R: Realigner> Variant for Mnv<R> {
alignment_properties: &AlignmentProperties,
alt_variants: &[Box<dyn Realignable>],
) -> Result<Option<AlleleSupport>> {
if utils::contains_indel_op(&**read) || !alt_variants.is_empty() {
if self.realign_indel_reads
&& (utils::contains_indel_op(&**read) || !alt_variants.is_empty())
{
// METHOD: reads containing indel operations should always be realigned,
// as their support or non-support of the MNV might be an artifact
// of the aligner. Also, if we have alt alignments here, we need to
// realign as well since we need the multi-allelic case handling in the
// realigner.
// realigner. TODO the latter is probably not needed anymore with third allele
// handling. Check this.
Ok(Some(self.realigner.borrow_mut().allele_support(
&**read,
[&self.locus].iter(),
Expand Down
12 changes: 10 additions & 2 deletions src/variants/types/snv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,17 @@ pub(crate) struct Snv<R: Realigner> {
ref_base: u8,
alt_base: u8,
realigner: RefCell<R>,
realign_indel_reads: bool,
}

impl<R: Realigner> Snv<R> {
pub(crate) fn new(locus: genome::Locus, ref_base: u8, alt_base: u8, realigner: R) -> Self {
pub(crate) fn new(
locus: genome::Locus,
ref_base: u8,
alt_base: u8,
realigner: R,
realign_indel_reads: bool,
) -> Self {
Snv {
locus: SingleLocus::new(genome::Interval::new(
locus.contig().to_owned(),
Expand All @@ -50,6 +57,7 @@ impl<R: Realigner> Snv<R> {
ref_base: ref_base.to_ascii_uppercase(),
alt_base: alt_base.to_ascii_uppercase(),
realigner: RefCell::new(realigner),
realign_indel_reads,
}
}
}
Expand Down Expand Up @@ -108,7 +116,7 @@ impl<R: Realigner> Variant for Snv<R> {
alignment_properties: &AlignmentProperties,
alt_variants: &[Box<dyn Realignable>],
) -> Result<Option<AlleleSupport>> {
if utils::contains_indel_op(&**read) {
if self.realign_indel_reads && utils::contains_indel_op(&**read) {
// METHOD: reads containing indel operations should always be realigned,
// as their support or non-support of the SNV might be an artifact
// of the aligner.
Expand Down
1 change: 1 addition & 0 deletions tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ testcase!(test_imprecise_fusion_absent, exact);
testcase!(test_uzuner_clonal_1, exact);
testcase!(test_uzuner_clonal_2, exact);
testcase!(test_uzuner_clonal_3, exact);
testcase!(test_uzuner_fp_snv_on_ins, exact);

fn basedir(test: &str) -> String {
format!("tests/resources/{}", test)
Expand Down
228 changes: 228 additions & 0 deletions tests/resources/testcases/test_uzuner_fp_snv_on_ins/candidates.vcf

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions tests/resources/testcases/test_uzuner_fp_snv_on_ins/ref.fa
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>6
AAATGAAACCGGGTAAACGCGCCTGGGGCTCTCGCCGGTCGAGGGTCTGGGCGGGTCCCGCGGCCTCAGGGAGGCGGATCTCGGACCCGGAGACTCGGGGCGACCCGGGCCGTACGTGGGGGATGGGGAGTCGTGACCTGCGCCCCGGGCCGGGGTCACTCACCGGCCTCGCTCTGGTTGTAGTAGCCGCGCAGGTTCCGCAGGCTCTCTCGGTCAGTCTGTGCCTGGGCCTTGTAGATCTGTGTGTTCCGGTCCCAATACTCCGGCCCCTCCTGCTCTATCCACGGCGCCCGCGGCTCCTCTCTCGGACTCGCGGCGTCGCTGTCGAACCTCACGAACTGGGTGTCGTCCACGTAGCCCACTGAGATGAAGCGGGGCTCCCCGCGGCCGGGCCGGGACACGGAGGTGTAGAAATACCTCATGGAGTGGGAGCCTGGGGGTGAGGAGGGGCTGAGACCCGCCCGACCCTCCTCCCGGCGCGGCTCCTCAGGTCCTGCGCCCCCGCCTGCGGTCCCCTCGCTCCTCCCGGCAGAGGCCATTTCCCTCCCGACCCGCACTCACCGGCCCAGGTCTCGGT
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
samples:
sample:
universe: "[0.0,1.0]"

events:
present: "sample:]0.0,1.0]"
28 changes: 28 additions & 0 deletions tests/resources/testcases/test_uzuner_fp_snv_on_ins/testcase.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
expected:
allelefreqs:
# write down a list of expressions of the form
- sample == 0.0

# necessary bam files
samples:
sample:
path: 'sample.bam'
properties: '{"insert_size":{"mean":150.73021840315073,"sd":76.48661586376208},"max_del_cigar_len":14,"max_ins_cigar_len":18,"frac_max_softclip":0.9900990099009901,"max_read_len":101,"max_mapq":60,"gap_params":{"prob_insertion_artifact":-7.469450748118796,"prob_deletion_artifact":-8.191662061622653,"prob_insertion_extend_artifact":-0.9516818568143385,"prob_deletion_extend_artifact":-0.4580555877859265},"hop_params":{"prob_seq_homopolymer":[null,null,null,null],"prob_ref_homopolymer":[null,null,null,null],"prob_seq_extend_homopolymer":[null,null,null,null],"prob_ref_extend_homopolymer":[null,null,null,null]},"wildtype_homopolymer_error_model":{"2":0.0001651729006327695,"1":0.0013056524526209399,"-1":3.9326881103040365e-6,"4":0.000043259569213344394,"22":3.9326881103040365e-6,"3":0.00001966344055152018,"-2":0.000023596128661824215,"0":0.9987690686214749},"initial":false}'
options: '{"Preprocess":{"kind":{"Variants":{"reference":"?","candidates":"?","bam":"?","report_fragment_ids":true,"omit_mapq_adjustment":true,"atomic_candidate_variants":true,"reference_buffer_size":10,"min_bam_refetch_distance":1,"alignment_properties":null,"output":"?","propagate_info_fields":[],"protocol_strandedness":"Opposite","realignment_window":64,"max_depth":200,"omit_insert_size":false,"pairhmm_mode":"exact","log_mode":"default","output_raw_observations":null}}}}'


# candidate variant
candidate: 'candidates.vcf'

scenario: 'scenario.yaml'




# reference sequence
reference:
path: 'ref.fa'

mode: Generic

version: '4'

0 comments on commit 2e22c50

Please sign in to comment.