From e3247146bfd8182997ca2a8a604b96d9573e3160 Mon Sep 17 00:00:00 2001 From: Tarun Mamidi Date: Thu, 1 Feb 2024 17:19:40 -0600 Subject: [PATCH] PR requests part-2 --- .test_data/README | 2 +- .test_data/{file_list => file_list.txt} | 0 CHANGELOG.md | 8 ++++++++ README.md | 21 +++++++++++++-------- model.job | 2 +- pipeline.nf | 2 +- src/analysis/filter.sh | 2 +- 7 files changed, 25 insertions(+), 12 deletions(-) rename .test_data/{file_list => file_list.txt} (100%) diff --git a/.test_data/README b/.test_data/README index 74d8dba..c0fa03a 100644 --- a/.test_data/README +++ b/.test_data/README @@ -4,4 +4,4 @@ This directory has 3 files - `testing_variants_hg38.vcf.gz` - We custom made a test VCF file with few variants from every chromosome (1-22,X,Y) -`file_list` - contains list of above 2 test vcf files with relative path. This file is used to test nextflow pipeline +`file_list.txt` - contains list of above 2 test vcf files with relative path. This file is used to test nextflow pipeline diff --git a/.test_data/file_list b/.test_data/file_list.txt similarity index 100% rename from .test_data/file_list rename to .test_data/file_list.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index df89b36..1b1911b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,3 +10,11 @@ YYYY-MM-DD John Doe ``` --- + +```txt +2024-02-01 Tarun Mamidi + +* Uses OpenCRAVAT for annotations +* Uses Neural Networks from keras instead of traditional scikit-learn models +* Nextflow pipeline to annotate, parse and DITTO predictions +``` diff --git a/README.md b/README.md index 41b87f2..aab4140 100644 --- a/README.md +++ b/README.md @@ -14,12 +14,16 @@ Markdown](https://github.com/uab-cgds-worthey/DITTO/actions/workflows/linting.ym DITTO is an explainable neural network that can be helpful for accurate and rapid interpretation of small genetic variants for pathogenicity using patient’s genotype (VCF) information. -## Usage +## Using DITTO + +DITTO scores for variants can be obtained by the below 3 ways. Webapp and API are for single variant analysis and the +local setup is for batch/bulk variant predictions. ### Webapp DITTO is available for public use at this [website](https://cgds-ditto.streamlit.app/). + ### API DITTO is not hosted as a public API but one can serve up locally to query DITTO scores. Please follow the instructions @@ -72,13 +76,13 @@ Please follow the steps mentioned in [install_openCravat.md](docs/install_openCr #### Run DITTO pipeline -Create an environment via conda or pip. Below is an example to install `nextflow`. +Create an environment via conda. Below is an example to install `nextflow`. - [Anaconda virtual environment](https://docs.anaconda.com/free/anaconda/install/index.html) ```sh # create environment. Needed only the first time. Please use the above link if you're not using Mac. -conda create --name envi ditto-env +conda create --name ditto-env conda activate ditto-env @@ -86,11 +90,12 @@ conda activate ditto-env conda install bioconda::nextflow ``` -Please make a samplesheet with VCF files (incl. path). Please make sure to edit the directory paths as needed. +Please make a samplesheet with VCF files (incl. path). Please make sure to edit the directory paths as needed and run +the pipeline as shown below. ```sh nextflow run pipeline.nf \ - --outdir /data/ \ + --outdir ./data/ \ -work-dir ./wor_dir \ --build hg38 -with-report \ --oc_modules /data/opencravat/modules \ @@ -114,6 +119,6 @@ For queries, please open a GitHub issue. For urgent queries, send an email with clear description to |Name | Email | -------|--------| -Tarun Mamidi | -Liz Worthey | +|------|--------| +|Tarun Mamidi | | +|Liz Worthey | | diff --git a/model.job b/model.job index 6860325..b25aefa 100644 --- a/model.job +++ b/model.job @@ -28,6 +28,6 @@ module load Anaconda3 --outdir /data/results \ -work-dir .work_dir/ \ --build hg38 -c cheaha.config -with-report \ - --sample_sheet .test_data/file_list -resume + --sample_sheet .test_data/file_list.txt -resume #https://training.nextflow.io/basic_training/cache_and_resume/#how-to-organize-in-silico-experiments diff --git a/pipeline.nf b/pipeline.nf index 7585314..76c6023 100644 --- a/pipeline.nf +++ b/pipeline.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl=2 // Define the command-line options to specify the path to VCF files -params.sample_sheet = '.test_data/file_list' +params.sample_sheet = '.test_data/file_list.txt' params.build = "hg38" params.oc_modules = "/data/project/worthey_lab/projects/experimental_pipelines/tarun/opencravat/modules" // Define the Scratch directory diff --git a/src/analysis/filter.sh b/src/analysis/filter.sh index 262acbc..077e797 100644 --- a/src/analysis/filter.sh +++ b/src/analysis/filter.sh @@ -1,5 +1,5 @@ #!/bin/bash - +set -euo pipefail # Filter the DITTO scores and other annotations after running the pipeline. Example tested on CAGI project # Specify the input folder containing the CSV files