diff --git a/HISTORY.rst b/HISTORY.rst index 1f5c4747f..5987332d8 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -11,6 +11,9 @@ End-User Summary - Fixed occasionally breaking tests ``ProjectExportTest`` by sorting member list. This bug didn't affect the correct output but wasn't consistent in the order of samples. +- Fixed above mentioned bug again by consolidating two distinct ``Meta`` classes in ``Case`` model. +- Fixed bug in SV tests that became visibly by above fix and created an additional variant that wasn't intended. +- Adapted core installation instructions in manual for latest data release and introduced use of VarFish API for import. Full Change List ================ @@ -18,6 +21,9 @@ Full Change List - Fixed occasionally breaking tests ``ProjectExportTest`` by sorting member list. This bug didn't affect the correct output but wasn't consistent in the order of samples. Reason for this is unknown but might be that the order of cases a project is not always returned as in order they were created. +- Fixed above mentioned bug again by consolidating two distinct ``Meta`` classes in ``Case`` model. +- Fixed bug in SV tests that became visibly by above fix and created an additional variant that wasn't intended. +- Adapted core installation instructions in manual for latest data release and introduced use of VarFish API for import. ------- v0.22.1 diff --git a/cohorts/models.py b/cohorts/models.py index de7d46cf7..81dc74c2a 100644 --- a/cohorts/models.py +++ b/cohorts/models.py @@ -107,4 +107,4 @@ def get_family_with_filtered_pedigree_with_samples(self, user): def get_members(self, user): """Return concatenated list of members in ``pedigree``.""" - return [x["patient"] for x in self.get_filtered_pedigree_with_samples(user)] + return sorted([x["patient"] for x in self.get_filtered_pedigree_with_samples(user)]) diff --git a/docs_manual/setup_core.rst b/docs_manual/setup_core.rst index a1224bef9..37695678f 100644 --- a/docs_manual/setup_core.rst +++ b/docs_manual/setup_core.rst @@ -180,6 +180,48 @@ Let VarFish create the required tables in the ``varfish`` database: varfish-manage migrate +.. _setup_core_api: + +----------------- +Setup VarFish API +----------------- + +Setting up the API for use with VarFish can be done on any computer that should be able to use the VarFish API +(i.e. the computer you want to upload the data from). Most likely, this is not the VarFish Server itself but the +computer you have processed your data on. + +Firstly, make sure that you have set up an API token as described in the section :ref:`ui_api_tokens`. +This enables you to easily import cases into VarFish. +Next, create a ``~/.varfishrc.toml`` file in your home directory on the computer that should be able to communicate +with the VarFish Server, and paste and adapt the following lines +(substitute ``VARFISH_IP``, ``VARFISH_PORT`` and ``VARFISH_API_TOKEN`` with your values): + +.. code-block:: + + [global] + + varfish_server_url = "http://VARFISH_IP:VARFISH_PORT/" + varfish_api_token = "VARFISH_API_TOKEN" + +Next, install the VarFish CLI on the computer that should be able to communicate with the VarFish Server. +For this, follow the instructions on PyPi for `VarFish CLI `_. + +.. _setup_core_varfish_annotator: + +----------------------- +Setup VarFish Annotator +----------------------- + +To prepare your files for upload to the VarFish Server, you have to install the VarFish Annotator on the +computer you want to process the data on (which is likely not the same computer you run the VarFish Server on). +The package is available in bioconda, and when you have set up a conda environment, you can easily install VarFish CLI: + +.. code-block:: + + conda install varfish-annotator-cli + +Probably you want to have the VarFish Annotator and the VarFish CLI installed on the same computer. + .. _setup_core_varfish_database: ----------------- @@ -191,19 +233,25 @@ Download the data packages from the public VarFish website and unpack it in a pl .. code-block:: bash cd /plenty/space/ - wget https://file-public.bihealth.org/transient/varfish/varfish-server-background-db-20190820.tar.gz - wget https://file-public.bihealth.org/transient/varfish/varfish-annotator-transcripts-2019020.tar.gz - wget https://file-public.bihealth.org/transient/varfish/varfish-annotator-db-20190820.h2.db.gz - tar xzvf varfish-server-background-db-20190820.tar.gz - tar xvvf varfish-annotator-transcripts-20190820.tar.gz - gunzip varfish-annotator-db-20190820.h2.db.gz + + wget https://file-public.bihealth.org/transient/varfish/varfish-server-background-db-20201006.tar.gz{,.sha256} + wget https://file-public.bihealth.org/transient/varfish/varfish-annotator-20201006.tar.gz{,.sha256} + wget https://file-public.bihealth.org/transient/varfish/jannovar-db-20201006.tar.gz{,.sha256} + + sha256sum -c varfish-server-background-db-20201006.tar.gz.sha256 + sha256sum -c varfish-annotator-20201006.tar.gz.sha256 + sha256sum -c jannovar-db-20201006.tar.gz.sha256 + + tar xzvf varfish-server-background-db-20201006.tar.gz + tar xzvf varfish-annotator-20201006.tar.gz + tar xzvf jannovar-db-20201006.tar.gz Background Databases ^^^^^^^^^^^^^^^^^^^^ .. code-block:: bash - varfish-manage import_tables --tables-path /plenty/space/varfish-server-background-db-20190820 + varfish-manage import_tables --tables-path /plenty/space/varfish-server-background-db-20201006 .. note:: @@ -212,18 +260,21 @@ Background Databases Cases ^^^^^ +On the computer you have the VarFish Annotator installed, the following commands will prepare your VCFs for upload to +the VarFish Server. + Annotate small variants of a case: .. code-block:: bash varfish-annotator annotate \ --case-id $CASE_NAME \ - --db-path /plenty/space/varfish-annotator-db-20190820.h2.db \ - --ensembl-ser-path /plenty/space/varfish-annotator-transcripts-20190820/hg19_ensembl.ser \ + --db-path /plenty/space/varfish-annotator-20201006/vvarfish-annotator-db-20201006.h2.db \ + --ensembl-ser-path /plenty/space/varfish-annotator-db-20201006/hg19_ensembl.ser \ --input-vcf $INPUT_VCF \ --output-db-info ${CASE_NAME}.db-info.gz \ --output-gts ${CASE_NAME}.gts.tsv.gz \ - --refseq-ser-path /plenty/space/varfish-annotator-transcripts-20190820/hg19_refseq_curated.ser \ + --refseq-ser-path /plenty/space/varfish-annotator-db-20201006/hg19_refseq_curated.ser \ --release GRCh37 Annotate structural variants of a case: @@ -232,147 +283,31 @@ Annotate structural variants of a case: varfish-annotator annotate-svs \ --case-id $CASE_NAME \ - --db-path /plenty/space/varfish-annotator-db-20190820.h2.db \ - --ensembl-ser-path /plenty/space/varfish-annotator-transcripts-20190820/hg19_ensembl.ser \ + --db-path /plenty/space/varfish-annotator-20201006/vvarfish-annotator-db-20201006.h2.db \ + --ensembl-ser-path /plenty/space/varfish-annotator-20201006/hg19_ensembl.ser \ --input-vcf $INPUT_VCF \ --output-db-info ${CASE_NAME}.db-info.gz \ --output-feature-effects ${CASE_NAME}.effects.gts.tsv.gz \ --output-gts ${CASE_NAME}.svs.gts.tsv.gz \ - --refseq-ser-path /plenty/space/varfish-annotator-transcripts-20190820/hg19_refseq_curated.ser \ + --refseq-ser-path /plenty/space/varfish-annotator-20201006/hg19_refseq_curated.ser \ --release GRCh37 -Import a small variant case (replace UUID with your projects UUID): +After annotating and preparing the VCF files, you can use the VarFish CLI to import the data into the +VarFish Server via the API. Please also make md5 sum files available for each file: .. code-block:: bash - varfish-manage import_case \ - --case-name $CASE_NAME \ - --index-name $INDEX_NAME \ - --path-ped $PATH_PED \ - --path-genotypes ${CASE_NAME}.gts.tsv.gz \ - --path-db-info ${CASE_NAME}.db-info.gz \ - --project-uuid eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee + for i in ${CASE_NAME}.*; do md5sum $i > $i.md5; done -Import a structural variant case (replace UUID with your projects UUID): +Import a small or structural variant case (replace the ``eee...eee`` UUID with your projects UUID): .. code-block:: bash - varfish-manage import_case \ - --case-name $CASE_NAME \ - --index-name $INDEX_NAME \ - --path-ped $PATH_PED \ - --path-genotypes ${CASE_NAME}.svs.gts.tsv.gz \ - --path-feature-effects {$CASE_NAME}.effects.gts.tsv.gz \ - --path-db-info ${CASE_NAME}.db-info.gz \ - --project-uuid eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee + varfish-cli case create-import-info eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee ${CASE_NAME}.* --------------------------- Create your own data freeze --------------------------- In case you need different versions in the data import than provided, the VarFish DB Downloader allows you to do so. -First, clone the repository: - -.. code-block:: bash - - git clone git@cubi-gitlab.bihealth.org:CUBI_Engineering/VarFish/varfish-db-downloader - cd varfish-db-downloader - -Create a conda environment that provides all necessary programs tob run the data import. - -.. code-block:: bash - - conda env create -n varfish-db-downloader -f environment.yaml - conda activate varfish-db-downloader - pip install -r requirements.txt - -Running the actual is done with one command. You might want to adapt versions to your need in the source code, -especially in the ``Snakefile`` and ``snakefiles/*``. - -.. code-block:: bash - - snakemake - -.. note:: - - This might take some time, depending on your internet connection. - Make also sure that you provide at least 1.5 TB of space. - Also, note that this heavily relies on external data providers and therefore might contain broken links. - In case you encounter them, please open an issue in the `Github project issue tracker `_. - -.. code-block:: bash - - conda create -n varfish-annotator varfish-annotator-cli jannovar-cli - conda activate varfish-annotator - -Adapt ``ANNOTATOR_DATA_RELEASE`` and ``ANNOTATOR_VERSION`` to the current values in the following -code snippet. - -.. code-block:: bash - - ANNOTATOR_DATA_RELEASE=20190820 - ANNOTATOR_VERSION=0.9 - DOWNLOAD=varfish-db-downloader-finalizing-sv-dbs/varfish-annotator-db-$ANNOTATOR_DATA_RELEASE/ - -.. code-block:: bash - - tar chzvf \ - varfish-annotator-db-$ANNOTATOR_DATA_RELEASE.tar.gz \ - varfish-annotator-db-$ANNOTATOR_DATA_RELEASE/ - sha256sum \ - varfish-annotator-db-$ANNOTATOR_DATA_RELEASE.tar.gz \ - > varfish-annotator-db-$ANNOTATOR_DATA_RELEASE.tar.gz.sha256 - -.. code-block:: bash - - jannovar download \ - -d hg19/refseq_curated \ - --download-dir varfish-annotator-transcripts-$ANNOTATOR_DATA_RELEASE - jannovar download \ - -d hg19/ensembl \ - --download-dir varfish-annotator-transcripts-$ANNOTATOR_DATA_RELEASE - tar czvf \ - varfish-annotator-transcripts-$ANNOTATOR_DATA_RELEASE.tar.gz \ - varfish-annotator-transcripts-$ANNOTATOR_DATA_RELEASE/*.ser - sha256sum \ - varfish-annotator-transcripts-$ANNOTATOR_DATA_RELEASE.tar.gz \ - > varfish-annotator-transcripts-$ANNOTATOR_DATA_RELEASE.tar.gz.sha256 - -.. code-block:: bash - - varfish-annotator init-db \ - --db-release-info "varfish-annotator:v$ANNOTATOR_VERSION" \ - --db-release-info "varfish-annotator-db:r$ANNOTATOR_DATA_RELEASE" \ - \ - --ref-path $DOWNLOAD/GRCh37/reference/hs37d5/hs37d5.fa \ - \ - --db-release-info "clinvar:2019-06-22" \ - --clinvar-path $DOWNLOAD/GRCh37/clinvar/latest/clinvar_tsv_main/output/clinvar_allele_trait_pairs.single.b37.tsv.gz \ - --clinvar-path $DOWNLOAD/GRCh37/clinvar/latest/clinvar_tsv_main/output/clinvar_allele_trait_pairs.multi.b37.tsv.gz \ - \ - --db-path ./varfish-annotator-db-$ANNOTATOR_DATA_RELEASE \ - \ - --db-release-info "exac:r1.0" \ - --exac-path $DOWNLOAD/GRCh37/ExAC/r1/download/ExAC.r1.sites.vep.vcf.gz \ - \ - --db-release-info "gnomad_exomes:r2.1" \ - $(for path in $DOWNLOAD/GRCh37/gnomAD_exomes/r2.1/download/gnomad.exomes.r2.1.sites.chr*.normalized.vcf.bgz; do \ - echo --gnomad-exomes-path $path; \ - done) \ - \ - --db-release-info "gnomad_genomes:r2.1" \ - $(for path in $DOWNLOAD/GRCh37/gnomAD_genomes/r2.1/download/gnomad.genomes.r2.1.sites.chr*.normalized.vcf.bgz; do \ - echo --gnomad-genomes-path $path; \ - done) \ - \ - --db-release-info "thousand_genomes:v3.20101123" \ - --thousand-genomes-path $DOWNLOAD/GRCh37/thousand_genomes/phase3/ALL.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.vcf.gz \ - \ - --db-release-info "hgmd_public:ensembl_r75" \ - --hgmd-public $DOWNLOAD/GRCh37/hgmd_public/ensembl_r75/HgmdPublicLocus.tsv - gzip -c \ - varfish-annotator-db-${ANNOTATOR_DATA_RELEASE}.db.h2 \ - > varfish-annotator-db-${ANNOTATOR_DATA_RELEASE}.db.h2.gz - sha256sum \ - varfish-annotator-db-${ANNOTATOR_DATA_RELEASE}.h2.db.gz \ - > varfish-annotator-db-${ANNOTATOR_DATA_RELEASE}.h2.db.gz.sha256 +Please follow the instructions in the `Varfish DB Downloader project `_. diff --git a/docs_manual/ui_api_tokens.rst b/docs_manual/ui_api_tokens.rst index d2216f7d0..479543617 100644 --- a/docs_manual/ui_api_tokens.rst +++ b/docs_manual/ui_api_tokens.rst @@ -5,7 +5,8 @@ API Token Management ==================== This page allows for managing API tokens. -This feature is only interesting if you want to use software (or develop one yourself) that interfaces with SODAR programatically. +This feature is interesting if you want to use software (or develop one yourself) that interfaces with SODAR programatically, +or if you want to use the API import feature of VarFish to easily import your cases. .. figure:: figures/misc_ui/api_tokens.png @@ -23,4 +24,4 @@ Please also note that if you create and use an API token then, currently, whoeve Allowing to limit scope is on the list of future features, but currently this has not been implemented. On creation, you can chose a number of hours that the token should be valid. -Using an expiry time of :guilabel:`0` will make the token never expire. \ No newline at end of file +Using an expiry time of :guilabel:`0` will make the token never expire. diff --git a/svs/tests/test_queries.py b/svs/tests/test_queries.py index 7b6417344..afeb36ac6 100644 --- a/svs/tests/test_queries.py +++ b/svs/tests/test_queries.py @@ -658,10 +658,11 @@ def setUp(self): case__structure="trio", case__inheritance="denovo" ) self.case = self.variant_set.case - self.svs = [StructuralVariantFactory(variant_set=self.variant_set)] + self.sv = StructuralVariantFactory(variant_set=self.variant_set) StructuralVariantGeneAnnotationFactory( - ensembl_gene_id=self.hgnc.ensembl_gene_id, sv_uuid=self.svs[0].sv_uuid + ensembl_gene_id=self.hgnc.ensembl_gene_id, sv=self.sv ) + print("x") # # TODO FIXME XXX # def testPassGeneAllowList(self): @@ -678,7 +679,7 @@ def testPassGeneBlockList(self): SingleCaseFilterQuery, {"gene_blocklist": [self.hgnc.symbol + "XXX"]}, 1 ) result = list(result) - self.assertEqual(self.svs[0].sv_uuid, result[0]["sv_uuid"]) + self.assertEqual(self.sv.sv_uuid, result[0]["sv_uuid"]) def testFailGeneBlockList(self): self.run_query(SingleCaseFilterQuery, {"gene_blocklist": [self.hgnc.symbol]}, 0) @@ -938,17 +939,15 @@ def setUp(self): case__structure="trio", case__inheritance="denovo" ) self.case = self.variant_set.case - self.svs = [StructuralVariantFactory(variant_set=self.variant_set)] + self.sv = StructuralVariantFactory(variant_set=self.variant_set) StructuralVariantGeneAnnotationFactory( - sv_uuid=self.svs[0].sv_uuid, - refseq_transcript_coding=True, - ensembl_transcript_coding=True, + sv=self.sv, refseq_transcript_coding=True, ensembl_transcript_coding=True, ) def testIncludeTranscriptCoding(self): result = self.run_query(SingleCaseFilterQuery, {"transcripts_coding": True}, 1) result = list(result) - self.assertEqual(self.svs[0].sv_uuid, result[0]["sv_uuid"]) + self.assertEqual(self.sv.sv_uuid, result[0]["sv_uuid"]) def testExcludeTranscriptCoding(self): self.run_query(SingleCaseFilterQuery, {"transcripts_coding": False}, 0) diff --git a/variants/models.py b/variants/models.py index 4597703f3..8a28d517f 100644 --- a/variants/models.py +++ b/variants/models.py @@ -186,7 +186,7 @@ def get_case_pks(self): def get_members(self): """Return concatenated list of members in ``pedigree``.""" - return [x["patient"] for x in self.get_filtered_pedigree_with_samples()] + return sorted([x["patient"] for x in self.get_filtered_pedigree_with_samples()]) def get_active_smallvariant_cases(self): """Return activate cases.""" @@ -616,7 +616,7 @@ def get_background_jobs(self): def get_members(self): """Return list of members in ``pedigree``.""" - return [x["patient"] for x in self.pedigree] + return sorted([x["patient"] for x in self.pedigree]) def get_filtered_pedigree_with_samples(self): """Return filtered pedigree lines with members with ``has_gt_entries``.""" @@ -630,7 +630,7 @@ def get_family_with_filtered_pedigree_with_samples(self): def get_members_with_samples(self): """Returns names of members that genotype information / samples in imported VCF file.""" # TODO: unit test me - return [x["patient"] for x in self.get_filtered_pedigree_with_samples()] + return sorted([x["patient"] for x in self.get_filtered_pedigree_with_samples()]) def get_trio_roles(self): """Returns a dict with keys mapping ``index``, ``mother``, ``father`` to pedigree member names if present.