From 9df2e1790b6a8d4dec5c61ecb5782b3dc2c471ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20A=2E=20Sch=C3=A4fer?= Date: Fri, 21 Jun 2024 00:50:07 -0500 Subject: [PATCH] Readgroup issue (#28) * catch errors when rnaseq is not provided (exitron/altsplicing calling) and add reference genome index as input for indel calling (htcaller) * added reference genome index on germline indel calling (which is required when only indel calling has been activated & remove -C from BWA mem call (on DNAseq data) which causes issues on Illumina identifier --- CHANGELOG.md | 8 ++++++++ workflow/rules/align.smk | 7 +++---- workflow/rules/common.smk | 16 ++++++++++------ workflow/rules/germline.smk | 3 ++- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f9196c..5ff3411 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Prioritization of neoantigens is now done separately for each variant type (speeds up the process) - NMD information (e.g., escape rule,...) is now also calculated for all variants +## [0.2.6] - 2024-06-20 + +### Fix + +- Added routines to catch errors when rnaseq data is not provided but exitron/alternative splicing calling is activated +- Added reference genome index as input to germline indel calling (necessary when only indel calling is activated) +- removed -C from BWA mem call (on DNAseq data) to avoid error on Illumina identifiers + ## [0.2.5] - 2024-06-19 ### Fix diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk index 9ec2699..30d1dc9 100644 --- a/workflow/rules/align.smk +++ b/workflow/rules/align.smk @@ -231,10 +231,9 @@ if config['data']['dnaseq_filetype'] in ['.fq','.fastq']: threads: config['threads'] shell: """ - bwa mem -t{threads} -C resources/refs/bwa/genome {input.reads} \ - | samtools addreplacerg -r ID:{wildcards.group} -r SM:{wildcards.sample} \ - -r LB:{wildcards.sample} -r PL:ILLUMINA -r PU:{wildcards.group} - - \ - | samtools sort -@ 6 -n -m1g - -o {output} > {log} 2>&1 + bwa mem -t{threads} resources/refs/bwa/genome \ + -R '@RG\\tID:{wildcards.group}\\tSM:{wildcards.sample}\\tLB:{wildcards.sample}\\tPL:ILLUMINA' \ + {input.reads} | samtools sort -@ 6 -n -m1g - -o {output} > {log} 2>&1 """ rule dnaseq_postproc: diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 75ec9a5..b6d4402 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -734,18 +734,22 @@ def get_prioritization_long_indels(wildcards): def get_prioritization_exitrons(wildcards): exitrons = [] if config["exitronsplicing"]["activate"]: - exitrons += expand("results/{sample}/annotation/exitrons.vcf", - sample=config["data"]["name"]) - + if len(config["data"]["rnaseq"]) != 0: + exitrons += expand("results/{sample}/annotation/exitrons.vcf", + sample=config["data"]["name"]) + else: + print('rnaseq data has not been specified in the config file, but exitron calling is activated - skipping...') return exitrons def get_prioritization_altsplicing(wildcards): altsplicing = [] if config["altsplicing"]["activate"]: - altsplicing += expand("results/{sample}/annotation/altsplicing.vcf", - sample=config["data"]["name"]) - + if len(config["data"]["rnaseq"]) != 0: + altsplicing += expand("results/{sample}/annotation/altsplicing.vcf", + sample=config["data"]["name"]) + else: + print('rnaseq data has not been specified in the config file, but alternative splicing calling is activated - skipping...') return altsplicing def get_prioritization_custom(wildcards): diff --git a/workflow/rules/germline.smk b/workflow/rules/germline.smk index 2081405..0cd912f 100644 --- a/workflow/rules/germline.smk +++ b/workflow/rules/germline.smk @@ -73,7 +73,8 @@ rule detect_variants_htc_first_round: input: bam="results/{sample}/{seqtype}/align/{group}_final_BWA_split/{chr}.bam", idx="results/{sample}/{seqtype}/align/{group}_final_BWA_split/{chr}.bam.bai", - ref="resources/refs/genome.fasta" + ref="resources/refs/genome.fasta", + ref_idx="resources/refs/genome.fasta.fai" output: vcf="results/{sample}/{seqtype}/indel/htcaller/{group}_variants.1rd/{chr}.vcf" message: