diff --git a/HISTORY.rst b/HISTORY.rst index 826e1faba..dfed61b57 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -6,6 +6,8 @@ History / Changelog HEAD (unreleased) ----------------- +Breaking changes, see below. + End-User Summary ================ @@ -25,6 +27,10 @@ End-User Summary - Added section for developers in manual (#267). - Migrated icons to iconify (#208). - Bumped chrome-driver version (#208). +- VarFish now allows for the import of GRCh38 annotated variants. + For this, GRCh38 background data must be imported. + Kiosk mode does not support GRCh38 yet. + **This is a breaking change, new data and CLI must be used!** Full Change List ================ @@ -50,6 +56,13 @@ Full Change List - Migrated icons to iconify (#208). - Bumped chrome-driver version (#208). - Skipping codacy if token is not defined (#275). +- Adjusting models and UI for supporting GRCh38 annotated cases. + It is currently not possible to migrate a GRCh37 case to GRCh38. +- Adjusting models and UI for supporting GRCh38 annotated cases. + It is currently not possible to migrate a GRCh37 case to GRCh38. +- Setting ``VARFISH_CADD_SUBMISSION_RELEASE`` is called ``VARFISH_CADD_SUBMISSION_VERSION`` now (**breaking change**). +- ``import_info.tsv`` expected as in data release from ``20210728`` as built from varfish-db-downloader ``1b03e97`` or later. +- Extending columns of ``Hgnc`` to upstream update. ------- v0.23.9 diff --git a/cohorts/templates/cohorts/cohort_create.html b/cohorts/templates/cohorts/cohort_create.html index 4ca1bdfab..f8de5afa1 100644 --- a/cohorts/templates/cohorts/cohort_create.html +++ b/cohorts/templates/cohorts/cohort_create.html @@ -112,6 +112,7 @@
' ' + ' {{ label }}' + ' {{ case.get_members|length }}' + + ' {{ case.release }}' + ' ' + ' ' + '' diff --git a/cohorts/templates/cohorts/cohort_list.html b/cohorts/templates/cohorts/cohort_list.html index 26cb865c2..43da35018 100644 --- a/cohorts/templates/cohorts/cohort_list.html +++ b/cohorts/templates/cohorts/cohort_list.html @@ -2,6 +2,7 @@ {% load dict %} {% load humanize %} +{% load variants_tags %} {% load cohorts_tags %} {% load projectroles_common_tags %} @@ -90,6 +91,7 @@

{{ case.name }} {{ case.get_members|length }} + {{ case.release }} {% endfor %} {% if not item|check_accessible_cases:user %} @@ -105,7 +107,18 @@

{% endif %} - + + + {% same_release cases as cases_same_release %} + {% if cases_same_release %} + + {% else %} + + {% endif %} diff --git a/cohorts/templates/cohorts/cohort_update.html b/cohorts/templates/cohorts/cohort_update.html index c2504835b..3d3d3f6d5 100644 --- a/cohorts/templates/cohorts/cohort_update.html +++ b/cohorts/templates/cohorts/cohort_update.html @@ -112,6 +112,7 @@

' ' + ' {{ label }}' + ' {{ case.get_members|length }}' + + ' {{ case.release }}' + ' ' + ' ' + '' diff --git a/config/settings/base.py b/config/settings/base.py index ba3b3e85e..4fc8d16f0 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -192,6 +192,51 @@ logger.info("Enabling VarFishKioskUserMiddleware") MIDDLEWARE += ["varfish.utils.VarFishKioskUserMiddleware"] +# Logging +# ------------------------------------------------------------------------------ + +# Custom logging level +LOGGING_LEVEL = env.str("LOGGING_LEVEL", "DEBUG" if DEBUG else "ERROR") + +# List of apps to include in logging +LOGGING_APPS = env.list( + "LOGGING_APPS", + default=["projectroles", "siteinfo", "sodarcache", "taskflowbackend", "timeline",], +) + +# Path for file logging. If not set, will log only to console +LOGGING_FILE_PATH = env.str("LOGGING_FILE_PATH", None) + + +def set_logging(level=None): + if not level: + level = "DEBUG" if DEBUG else "ERROR" + app_logger_config = { + "level": level, + "handlers": ["console", "file"] if LOGGING_FILE_PATH else ["console"], + "propagate": True, + } + log_handlers = { + "console": {"level": level, "class": "logging.StreamHandler", "formatter": "simple",} + } + if LOGGING_FILE_PATH: + log_handlers["file"] = { + "level": level, + "class": "logging.FileHandler", + "filename": LOGGING_FILE_PATH, + "formatter": "simple", + } + return { + "version": 1, + "disable_existing_loggers": False, + "formatters": {"simple": {"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s"}}, + "handlers": log_handlers, + "loggers": {a: app_logger_config for a in LOGGING_APPS}, + } + + +LOGGING = set_logging(LOGGING_LEVEL) + # FIXTURE CONFIGURATION # ------------------------------------------------------------------------------ # See: https://docs.djangoproject.com/en/dev/ref/settings/#std:setting-FIXTURE_DIRS @@ -258,7 +303,7 @@ # GENERAL CONFIGURATION # ------------------------------------------------------------------------------ -# Local time zone for this installation. Choices can be found here: +# Local zone for this installation. Choices can be found here: # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name # although not all choices may be available on all operating systems. # In a Windows environment this must be set to your system time zone. @@ -483,7 +528,7 @@ # Enable submission of variants to CADD server. VARFISH_ENABLE_CADD_SUBMISSION = env.bool("VARFISH_ENABLE_CADD_SUBMISSION", default=False) # CADD version to use for for submission -VARFISH_CADD_SUBMISSION_RELEASE = env.str("VARFISH_CADD_SUBMISSION_RELEASE", default="GRCh37-v1.6") +VARFISH_CADD_SUBMISSION_VERSION = env.str("VARFISH_CADD_SUBMISSION_VERSION", default="v1.6") # Varfish: MutationTaster URL VARFISH_MUTATIONTASTER_REST_API_URL = env.str( diff --git a/config/settings/test.py b/config/settings/test.py index 4b0e91c1f..f63003be7 100644 --- a/config/settings/test.py +++ b/config/settings/test.py @@ -78,6 +78,12 @@ ] ] +# Logging +# ------------------------------------------------------------------------------ + +LOGGING_LEVEL = env.str("LOGGING_LEVEL", "CRITICAL") +LOGGING = set_logging(LOGGING_LEVEL) + # Varfish: REST Services # ------------------------------------------------------------------------------ diff --git a/docs_manual/admin_config.rst b/docs_manual/admin_config.rst index e44a5f873..ab652b397 100644 --- a/docs_manual/admin_config.rst +++ b/docs_manual/admin_config.rst @@ -103,7 +103,7 @@ format, before starting your varfish instance (you can find more details `here < If you deploy varfish without docker, you can pass the file paths of your metadata.xml and key pair directly. Otherwise, make sure that you have included them into a single folder and added the corresponding folder to your ``docker-compose.yml`` (or add it as a ``docker-compose-overrrided.yml``), like in the following snippet. -.. code-block:: yml +.. code-block:: yaml varfish-web: ... diff --git a/geneinfo/migrations/0025_auto_20211019_0829.py b/geneinfo/migrations/0025_auto_20211019_0829.py new file mode 100644 index 000000000..7a17e4c9e --- /dev/null +++ b/geneinfo/migrations/0025_auto_20211019_0829.py @@ -0,0 +1,22 @@ +# Generated by Django 3.2.7 on 2021-10-19 08:29 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("geneinfo", "0024_alter_ncbigenerif_pubmed_ids"), + ] + + operations = [ + migrations.AddField( + model_name="hgnc", name="agr", field=models.CharField(max_length=32, null=True), + ), + migrations.AddField( + model_name="hgnc", name="lncipedia", field=models.CharField(max_length=32, null=True), + ), + migrations.AddField( + model_name="hgnc", name="mane_select", field=models.CharField(max_length=64, null=True), + ), + ] diff --git a/geneinfo/migrations/0026_hgnc_gtrnadb.py b/geneinfo/migrations/0026_hgnc_gtrnadb.py new file mode 100644 index 000000000..5b7eb2c12 --- /dev/null +++ b/geneinfo/migrations/0026_hgnc_gtrnadb.py @@ -0,0 +1,16 @@ +# Generated by Django 3.2.7 on 2021-10-20 07:15 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("geneinfo", "0025_auto_20211019_0829"), + ] + + operations = [ + migrations.AddField( + model_name="hgnc", name="gtrnadb", field=models.CharField(max_length=32, null=True), + ), + ] diff --git a/geneinfo/models.py b/geneinfo/models.py index 8e1134788..06b6600ba 100644 --- a/geneinfo/models.py +++ b/geneinfo/models.py @@ -107,6 +107,14 @@ class Hgnc(models.Model): intermediate_filament_db = models.CharField(max_length=32, null=True) #: RNACentral ID (rnacentral.org database) rna_central_ids = models.CharField(max_length=32, null=True) + #: gtrna DB ID + gtrnadb = models.CharField(max_length=32, null=True) + #: lcipedia.org IDs + lncipedia = models.CharField(max_length=32, null=True) + #: Alliance of genome resources. + agr = models.CharField(max_length=32, null=True) + #: MANE collected ID. + mane_select = models.CharField(max_length=64, null=True) #: Allow bulk import objects = CopyManager() diff --git a/importer/management/commands/import_tables.py b/importer/management/commands/import_tables.py index 5e7c7f81d..6b10200af 100644 --- a/importer/management/commands/import_tables.py +++ b/importer/management/commands/import_tables.py @@ -1,3 +1,4 @@ +from contextlib import contextmanager import os import sys import traceback @@ -6,7 +7,7 @@ from variants.helpers import get_engine from django.core.management.base import BaseCommand, CommandError -from django.db import transaction +from django.db import connection, transaction from clinvar.models import Clinvar, refresh_clinvar_clinvarpathogenicgenes from conservation.models import KnowngeneAA @@ -56,46 +57,62 @@ from svdbs.models import DgvGoldStandardSvs, DgvSvs, ExacCnv, ThousandGenomesSv, DbVarSv, GnomAdSv from variants.helpers import get_meta + +#: Tables in both GRCh37 and GRCh38. +_TABLES_BOTH = { + "clinvar": (Clinvar,), + "dbSNP": (Dbsnp,), + "dbVar": (DbVarSv,), + "DGV": (DgvGoldStandardSvs, DgvSvs), + "ensembl_genes": (GeneInterval,), + "ensembl_regulatory": (EnsemblRegulatoryFeature,), + "ensembltogenesymbol": (EnsemblToGeneSymbol,), + "ensembltorefseq": (EnsemblToRefseq,), + "extra_annos": (ExtraAnno, ExtraAnnoField), + "gnomAD_constraints": (GnomadConstraints,), + "gnomAD_exomes": (GnomadExomes,), + "gnomAD_genomes": (GnomadGenomes,), + "HelixMTdb": (HelixMtDb,), + "hgmd_public": (HgmdPublicLocus,), + "hgnc": (Hgnc, RefseqToHgnc), + "knowngeneaa": (KnowngeneAA,), + "MITOMAP": (Mitomap,), + "mtDB": (MtDb,), + "refseq_genes": (GeneInterval,), +} + +#: Tables only in GRCh37. +_TABLES_GRCH37 = { + "ExAC": (Exac, ExacCnv), + "gnomAD_SV": (GnomAdSv,), + "tads_hesc": (TadInterval, TadBoundaryInterval, TadSet), + "tads_imr90": (TadInterval, TadBoundaryInterval, TadSet), + "thousand_genomes": (ThousandGenomes, ThousandGenomesSv), + "vista": (VistaEnhancer,), +} + +#: Tables shared between GRCh37 and GRCh38. +_TABLES_GRCH38 = {} + +#: Tables without reference, shared between GRCh37 and GRCh38. +_TABLES_NOREF = { + "acmg": (Acmg,), + "ExAC_constraints": (ExacConstraints,), + "hpo": (Hpo, HpoName), + "kegg": (KeggInfo, EnsemblToKegg, RefseqToKegg), + "mgi": (MgiHomMouseHumanSequence,), + "mim2gene": (Mim2geneMedgen,), + "ncbi_gene": (NcbiGeneInfo, NcbiGeneRif), + "refseqtoensembl": (RefseqToEnsembl,), + "refseqtogenesymbol": (RefseqToGeneSymbol,), +} + #: One entry in the TABLES variable is structured as follows: #: 'genome_build': {'table_group': (Table,), ...} TABLES = { - "GRCh37": { - "acmg": (Acmg,), - "clinvar": (Clinvar,), - "dbSNP": (Dbsnp,), - "dbVar": (DbVarSv,), - "DGV": (DgvGoldStandardSvs, DgvSvs), - "ensembl_genes": (GeneInterval,), - "ensembl_regulatory": (EnsemblRegulatoryFeature,), - "ensembltorefseq": (EnsemblToRefseq,), - "ExAC_constraints": (ExacConstraints,), - "ExAC": (Exac, ExacCnv), - "extra-annos": (ExtraAnno, ExtraAnnoField), - "gnomAD_constraints": (GnomadConstraints,), - "gnomAD_exomes": (GnomadExomes,), - "gnomAD_genomes": (GnomadGenomes,), - "gnomAD_SV": (GnomAdSv,), - "hgmd_public": (HgmdPublicLocus,), - "hgnc": (Hgnc, RefseqToHgnc), - "hpo": (Hpo, HpoName), - "kegg": (KeggInfo, EnsemblToKegg, RefseqToKegg), - "knowngeneaa": (KnowngeneAA,), - "mgi": (MgiHomMouseHumanSequence,), - "mim2gene": (Mim2geneMedgen,), - "ncbi_gene": (NcbiGeneInfo, NcbiGeneRif), - "refseq_genes": (GeneInterval,), - "refseqtoensembl": (RefseqToEnsembl,), - "tads_hesc": (TadInterval, TadBoundaryInterval, TadSet), - "tads_imr90": (TadInterval, TadBoundaryInterval, TadSet), - "thousand_genomes": (ThousandGenomes, ThousandGenomesSv), - "vista": (VistaEnhancer,), - "refseqtogenesymbol": (RefseqToGeneSymbol,), - "ensembltogenesymbol": (EnsemblToGeneSymbol,), - "MITOMAP": (Mitomap,), - "mtDB": (MtDb,), - "HelixMTdb": (HelixMtDb,), - }, - "GRCh38": {"clinvar": (Clinvar,), "dbVar": (DbVarSv,), "DGV": (DgvSvs,)}, + "GRCh37": {**_TABLES_BOTH, **_TABLES_GRCH37}, + "GRCh38": {**_TABLES_BOTH, **_TABLES_GRCH38}, + "noref": _TABLES_NOREF, } SERVICE_NAME_CHOICES = ["CADD", "Exomiser"] SERVICE_GENOMEBUILD_CHOICES = ["GRCh37", "GRCh38"] @@ -121,6 +138,9 @@ class Command(BaseCommand): #: Help message displayed on the command line. help = "Bulk import all external databases into Varfish tables." + #: Meta information from aldjemy. + _meta = None + def add_arguments(self, parser): """Add the command's argument to the ``parser``.""" parser.add_argument("--tables-path", help="Path to the varfish-db-downloader folder") @@ -139,7 +159,13 @@ def add_arguments(self, parser): choices=SERVICE_GENOMEBUILD_CHOICES, ) parser.add_argument( - "--force", help="Force import, removes old data", action="store_true", default=False + "--force", help="Force import, overwrites old data", action="store_true", default=False + ) + parser.add_argument( + "--truncate", + help="Truncate tables before importing, removes old data", + action="store_true", + default=False, ) parser.add_argument( # Using 8 threads by default as this will make all (currently) large tables import in parallel. @@ -186,33 +212,38 @@ def handle(self, *args, **options): if not os.path.isfile(path_import_versions): raise CommandError("Require version import info file {}.".format(path_import_versions)) - self._switch_vacuum(enable=False) + self._meta = get_meta() - import_infos = list(tsv_reader(path_import_versions)) - if options["threads"] == 0: # sequential - for import_info in import_infos: - if import_info["table_group"] in TABLES[import_info["build"]]: - self._handle_import(import_info, options) - else: - self.stderr.write( - "Table group {} is no registered table group.".format( - import_info["table_group"] + with self._without_vaccuum(): + import_infos = list(tsv_reader(path_import_versions)) + if options["threads"] == 0: # sequential + for import_info in import_infos: + if import_info["table_group"] in TABLES[import_info["build"]]: + self._handle_import(import_info, options) + else: + self.stderr.write( + "Table group {} is no registered table group.".format( + import_info["table_group"] + ) ) - ) - else: - pool = ThreadPool(processes=options["threads"]) - for import_info in import_infos: - if import_info["table_group"] in TABLES[import_info["build"]]: - pool.apply_async(self._handle_import_try_catch, (import_info, options)) - else: - self.stderr.write( - "Table group {} is no registered table group.".format( - import_info["table_group"] + else: + pool = ThreadPool(processes=options["threads"]) + for import_info in import_infos: + if import_info["table_group"] in TABLES[import_info["build"]]: + pool.apply_async(self._handle_import_try_catch, (import_info, options)) + else: + self.stderr.write( + "Table group {} is no registered table group.".format( + import_info["table_group"] + ) ) - ) - pool.close() - pool.join() + pool.close() + pool.join() + @contextmanager + def _without_vaccuum(self): + self._switch_vacuum(enable=False) + yield self._switch_vacuum(enable=True) def _switch_vacuum(self, enable): @@ -256,17 +287,27 @@ def _handle_import(self, import_info, options): # Special import routine for kegg if table_group == "kegg": self._import_kegg( - version_path, TABLES[import_info["build"]][table_group], force=options["force"] + version_path, + TABLES[import_info["build"]][table_group], + force=options["force"], + truncate=options["truncate"], ) # Special import routine for gnomAD elif table_group in ("gnomAD_genomes", "gnomAD_exomes"): self._import_gnomad( - version_path, TABLES[import_info["build"]][table_group], force=options["force"] + version_path, + TABLES[import_info["build"]][table_group], + force=options["force"], + truncate=options["truncate"], ) # Special import routine for dbSNP elif table_group == "dbSNP": self._import_dbsnp( - version_path, TABLES[import_info["build"]][table_group], force=options["force"] + version_path, + TABLES[import_info["build"]][table_group], + force=options["force"], + truncate=options["truncate"], + release=import_info["build"], ) # Special import routine for gene intervals elif table_group in ("ensembl_genes", "refseq_genes"): @@ -275,6 +316,7 @@ def _handle_import(self, import_info, options): TABLES[import_info["build"]][table_group], table_group.rstrip("_genes"), force=options["force"], + truncate=options["truncate"], ) # Special import routine for tads elif table_group in ("tads_imr90", "tads_hesc"): @@ -283,6 +325,7 @@ def _handle_import(self, import_info, options): TABLES[import_info["build"]][table_group], table_group[5:], force=options["force"], + truncate=options["truncate"], ) # Import routine for no-bulk-imports elif table_group in ("ensembl_regulatory", "vista"): @@ -291,6 +334,7 @@ def _handle_import(self, import_info, options): *self._get_table_info(version_path, table.__name__), table, force=options["force"], + truncate=options["truncate"], bulk=False, ) # Import routine for bulk imports (default) @@ -300,6 +344,7 @@ def _handle_import(self, import_info, options): *self._get_table_info(version_path, table.__name__), table, force=options["force"], + truncate=options["truncate"], ) # Refresh clinvar materialized view if one of the depending tables was updated. # Depending tables: Clinvar, Hgnc, RefseqToHgnc @@ -312,12 +357,24 @@ def _handle_import(self, import_info, options): refresh_geneinfo_geneidtoinheritance() refresh_geneinfo_geneidinhpo() - def _import_tad_set(self, path, tables, subset_key, force): + def _truncate(self, models): + # Truncate tables if asked to do so. + cursor = connection.cursor() + self.stdout.write("Truncating tables %s..." % models) + for model in models: + query = 'TRUNCATE TABLE "%s"' % model._meta.db_table + self.stdout.write(" executing %s" % query) + cursor.execute(query) + + def _import_tad_set(self, path, tables, subset_key, force, truncate): """TAD import""" release_info = self._get_table_info(path, tables[0].__name__)[1] if not self._create_import_info_record(release_info): return False + # Truncate tables if asked to do so. + if truncate: + self._truncate((TadSet, TadInterval, TadBoundaryInterval)) # Clear out old data if any TadSet.objects.filter(release=release_info["genomebuild"], name=subset_key).delete() @@ -355,12 +412,15 @@ def _import_tad_set(self, path, tables, subset_key, force): ) self.stdout.write(self.style.SUCCESS("Finished importing TADs")) - def _import_gene_interval(self, path, tables, subset_key, force): + def _import_gene_interval(self, path, tables, subset_key, force, truncate): """Common code for RefSeq and ENSEMBL gene import.""" release_info = self._get_table_info(path, tables[0].__name__)[1] release_info["table"] += ":%s" % subset_key if not self._create_import_info_record(release_info): return False + # Truncate tables if asked to do so. + if truncate: + self._truncate((GeneInterval,)) # Clear out any existing entries for this release/database. GeneInterval.objects.filter( database=subset_key, release=release_info["genomebuild"] @@ -403,7 +463,7 @@ def _read_release_info_file(self, path): return next(tsv_reader(path)) def _get_import_info_record(self, release_info): - """Check if entry exsits in import info table.""" + """Check if entry exists in import info table.""" return ImportInfo.objects.filter( genomebuild=release_info["genomebuild"], table=release_info["table"] ) @@ -422,7 +482,15 @@ def _create_import_info_record(self, release_info): ) def _import( - self, path, release_info, table, import_info=True, service=False, force=False, bulk=True + self, + path, + release_info, + table, + import_info=True, + service=False, + force=False, + bulk=True, + truncate=False, ): """Bulk data into table and add entry to ImportInfo table. @@ -430,6 +498,7 @@ def _import( :param release_info: Content of release info as dict :param table: Django model object of table to import :param null: Null value for bulk import + :param truncate: Whether or not to truncate tables. :return: Boolean if import happened (True) or not (False) """ @@ -442,6 +511,10 @@ def _import( if not service and not release_info["table"] == table.__name__: CommandError("Table name in release_info file does not match table name.") + # Truncate tables if asked to do so. + if truncate: + self._truncate((table,)) + # Skip importing table if record already exists in import info table and re-import is not forced. if import_info and not force and self._get_import_info_record(release_info).exists(): self.stdout.write( @@ -458,7 +531,7 @@ def _import( # Clear out any existing entries for this release/database. if import_info: self.stdout.write("{table} -- Removing old {table} results.".format(**release_info)) - sa_table = get_meta().tables[table._meta.db_table] + sa_table = self._meta.tables[table._meta.db_table] if "release" in sa_table.c: get_engine().execute( sa_table.delete().where(sa_table.c.release == release_info["genomebuild"]) @@ -482,8 +555,9 @@ def _import( self.stderr.write( "Error during import to table %s:\n%s" % (table._meta.db_table, e) ) + traceback.print_exc(file=self.stderr) # Remove already imported data. - sa_table = get_meta().tables[table._meta.db_table] + sa_table = self._meta.tables[table._meta.db_table] if "release" in sa_table.c: get_engine().execute( sa_table.delete().where( @@ -512,7 +586,7 @@ def _import( ) return True - def _import_kegg(self, path, tables, force): + def _import_kegg(self, path, tables, force, truncate): """Wrapper function to import kegg databases. :param path: Path to kegg tables @@ -526,14 +600,22 @@ def _import_kegg(self, path, tables, force): mapping = {entry.kegg_id: str(entry.id) for entry in KeggInfo.objects.all()} # Import EnsembleToKegg self._replace_pk_in_kegg_and_import( - mapping, *self._get_table_info(path, tables[1].__name__), tables[1], force + mapping, + *self._get_table_info(path, tables[1].__name__), + tables[1], + force, + truncate=truncate, ) # Import RefseqToKegg self._replace_pk_in_kegg_and_import( - mapping, *self._get_table_info(path, tables[2].__name__), tables[2], force + mapping, + *self._get_table_info(path, tables[2].__name__), + tables[2], + force, + truncate=truncate, ) - def _replace_pk_in_kegg_and_import(self, mapping, path, release_info, table, force): + def _replace_pk_in_kegg_and_import(self, mapping, path, release_info, table, force, truncate): """Wrapper function to replace pk in mapping tables before import (and then import). :param mapping: Mapping of kegg ids to KeggInfo pks. @@ -559,15 +641,18 @@ def _replace_pk_in_kegg_and_import(self, mapping, path, release_info, table, for tmp.write("\t".join(fields)) tmp.write("\n") tmp.flush() - return self._import(tmp.name, release_info, table, force=force) + return self._import(tmp.name, release_info, table, force=force, truncate=truncate) - def _import_gnomad(self, path, tables, force): - self._import_chromosome_wise(path, tables, force, list(range(1, 23)) + ["X"]) + def _import_gnomad(self, path, tables, force, truncate): + self._import_chromosome_wise(path, tables, force, truncate, list(range(1, 23)) + ["X"]) - def _import_dbsnp(self, path, tables, force): - self._import_chromosome_wise(path, tables, force, list(range(1, 23)) + ["X", "Y", "MT"]) + def _import_dbsnp(self, path, tables, force, truncate, release): + chr_mt = ["MT"] if release == "GRCh37" else ["M"] + self._import_chromosome_wise( + path, tables, force, truncate, list(range(1, 23)) + ["X", "Y"] + chr_mt + ) - def _import_chromosome_wise(self, path, tables, force, chroms): + def _import_chromosome_wise(self, path, tables, force, truncate, chroms): """Wrapper function to import gnomad tables :param path: Path to gnomad tables @@ -575,8 +660,8 @@ def _import_chromosome_wise(self, path, tables, force, chroms): :return: Nothing """ # Import file is scattered into chromosome pieces, collect them. - for chrom in chroms: - # If the first chromosome can't be imported, don't try to import the other chromosomes. + for no, chrom in enumerate(chroms): + # If the any chromosome can't be imported, don't try to import the other chromosomes. if not self._import( # Add chromosome to file name *self._get_table_info(path, "{}.{}".format(tables[0].__name__, chrom)), @@ -584,5 +669,6 @@ def _import_chromosome_wise(self, path, tables, force, chroms): # Import into info table only once chrom == 1, force=force, + truncate=truncate and no == 0, ): break diff --git a/importer/migrations/0010_auto_20220112_0657.py b/importer/migrations/0010_auto_20220112_0657.py new file mode 100644 index 000000000..ee0a0afa7 --- /dev/null +++ b/importer/migrations/0010_auto_20220112_0657.py @@ -0,0 +1,28 @@ +# Generated by Django 3.2.7 on 2022-01-12 06:57 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("importer", "0009_alter_caseimportinfo_pedigree"), + ] + + operations = [ + migrations.AddField( + model_name="caseimportinfo", + name="release", + field=models.CharField(default="GRCh37", max_length=32, null=True), + ), + migrations.AlterField( + model_name="variantsetimportinfo", + name="genomebuild", + field=models.CharField( + choices=[("GRCh37", "GRCh37"), ("GRCh38", "GRCh38")], + default="GRCh37", + help_text="Genome build used in the variant set.", + max_length=32, + ), + ), + ] diff --git a/importer/models.py b/importer/models.py index 64d8a9f40..2d19b99b5 100644 --- a/importer/models.py +++ b/importer/models.py @@ -141,7 +141,7 @@ class VariantSetImportInfo(models.Model): genomebuild = models.CharField( max_length=32, - choices=(("GRCh37", "GRCh37"),), + choices=(("GRCh37", "GRCh37"), ("GRCh38", "GRCh38")), default="GRCh37", help_text="Genome build used in the variant set.", ) @@ -361,12 +361,23 @@ def run(self): with transaction.atomic(): self.case, case_created = Case.objects.get_or_create( name=self.import_info.name, + release=self.import_info.release, project=self.import_info.project, defaults={ "index": self.import_info.index, "pedigree": self.import_info.pedigree, }, ) + if not case_created: + if self.case.release != self.import_info.release: + self.import_job.add_log_entry( + "Tried to import data for genome build %s into case with genome build %s" + % (self.import_info.release, self.case.release), + LOG_LEVEL_ERROR, + ) + raise RuntimeError( + "Inconsistent genome builds for import and existing case" + ) for variant_set_info in self.import_info.variantsetimportinfo_set.filter( state=VariantSetImportState.UPLOADED.value ): @@ -471,6 +482,7 @@ def _import_table( default_values = default_values or {} before = timezone.now() self.import_job.add_log_entry("Creating temporary %s file..." % token) + case_genomebuild = self.case.release with tempfile.NamedTemporaryFile("w+t") as tempf: for i, import_variant_set_url in enumerate(getattr(variant_set_info, path_attr).all()): self.import_job.add_log_entry("Importing from %s" % import_variant_set_url.name) @@ -478,11 +490,12 @@ def _import_table( header = inputf.readline().strip() header_arr = header.split("\t") try: + release_idx = header_arr.index("release") case_idx = header_arr.index("case_id") set_idx = header_arr.index("set_id") except ValueError as e: raise RuntimeError( - "Column 'case_id' or 'set_id' not found in %s TSV" % token + "Column 'release', 'case_id' or 'set_id' not found in %s TSV" % token ) from e # Extend header for fields in self.default_values and build suffix to append to every line. default_suffix = [] @@ -498,6 +511,11 @@ def _import_table( if not line: break arr = line.split("\t") + if arr[release_idx] != case_genomebuild: + raise RuntimeError( + "Incompatible genome build in %s TSV: %s vs %s from case" + % (token, arr[release_idx], case_genomebuild) + ) arr[case_idx] = str(variant_set.case.pk) arr[set_idx] = str(variant_set.pk) tempf.write("\t".join(arr + default_suffix)) @@ -522,6 +540,11 @@ def _import_table( ) def _perform_import(self, variant_set, variant_set_info): + if variant_set_info.genomebuild != self.case.release: + raise RuntimeError( + "Incompatible genome builds in import info: %s and existing case: %s" + % (variant_set_info.genomebuild, self.case.release) + ) if variant_set_info.variant_type == CaseVariantType.SMALL.name: # Ensure that the info and {refseq,ensembl}_exon_dist fields are present with default values. This snippet # can go away once we are certain all TSV files have been created with varfish-annotator >=0.10 diff --git a/importer/serializers.py b/importer/serializers.py index dd208758f..8f0fb31ca 100644 --- a/importer/serializers.py +++ b/importer/serializers.py @@ -207,6 +207,7 @@ class Meta: "date_modified", "owner", "case", + "release", "project", "name", "index", diff --git a/regmaps/migrations/0002_auto_20211129_1443.py b/regmaps/migrations/0002_auto_20211008_1015.py similarity index 91% rename from regmaps/migrations/0002_auto_20211129_1443.py rename to regmaps/migrations/0002_auto_20211008_1015.py index 658cfad00..8ff8433e6 100644 --- a/regmaps/migrations/0002_auto_20211129_1443.py +++ b/regmaps/migrations/0002_auto_20211008_1015.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.9 on 2021-11-29 14:43 +# Generated by Django 3.2.7 on 2021-10-08 10:15 from django.db import migrations import varfish.utils diff --git a/svs/migrations/0016_structuralvariantset_release.py b/svs/migrations/0016_structuralvariantset_release.py new file mode 100644 index 000000000..7c0d31da7 --- /dev/null +++ b/svs/migrations/0016_structuralvariantset_release.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.7 on 2021-10-08 10:15 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("svs", "0015_set_logged_table"), + ] + + operations = [ + migrations.AddField( + model_name="structuralvariantset", + name="release", + field=models.CharField(default="GRCh37", max_length=32, null=True), + ), + ] diff --git a/svs/models.py b/svs/models.py index d4475c88c..b029a99a2 100644 --- a/svs/models.py +++ b/svs/models.py @@ -121,6 +121,8 @@ class StructuralVariantSet(models.Model): case = models.ForeignKey( Case, on_delete=models.CASCADE, null=False, help_text="The case that this set is for" ) + #: Genome build + release = models.CharField(max_length=32, null=True, default="GRCh37") #: The state of the variant set. state = models.CharField( max_length=16, diff --git a/variants/file_export.py b/variants/file_export.py index fc69abf70..5c6ce9c8f 100644 --- a/variants/file_export.py +++ b/variants/file_export.py @@ -264,6 +264,25 @@ def __init__(self, job, case_or_project_or_cohort): #: The column information. self.columns = list(self._yield_columns(self.members)) + def get_genomebuild(self): + """Return genome build for case or cohort.""" + if self.case: + return self.case.release + else: + if isinstance(self.project_or_cohort, Cohort): + cases = [ + case + for case in self.project_or_cohort.get_accessible_cases_for_user( + self.job.bg_job.user + ) + ] + else: # project + cases = [case for case in self.project_or_cohort.case_set.all()] + if not cases: + return "GRCh37" + else: + return cases[0].release + def get_alchemy_engine(self): if not self._alchemy_engine: self._alchemy_engine = get_engine() @@ -394,7 +413,9 @@ def _fetch_variant_scores(self, variants): try: patho_score = self.query_args.get("patho_score") scorer_factory = VariantScoresFactory() - scorer = scorer_factory.get_scorer(patho_score, variants, self.job.bg_job.user) + scorer = scorer_factory.get_scorer( + self._get_genomebuild(), patho_score, variants, self.job.bg_job.user + ) return { "-".join( [ diff --git a/variants/forms.py b/variants/forms.py index 33272ec9d..51bd5db73 100644 --- a/variants/forms.py +++ b/variants/forms.py @@ -11,6 +11,7 @@ from django.core.files.storage import FileSystemStorage from django.utils.text import get_valid_filename +from cohorts.models import Cohort from .models import SmallVariantComment, SmallVariantFlags, AcmgCriteriaRating, Case, CaseComments from .templatetags.variants_tags import only_source_name, get_term_description from geneinfo.models import Hgnc, HpoName, Hpo @@ -83,6 +84,8 @@ def __init__(self, *args, **kwargs): choices=((0, "unknown"), (1, "unaffected"), (2, "affected")), ) + self.genomebuild = self.instance.release + def save(self, commit=True): case = super().save(commit=False) @@ -1476,6 +1479,20 @@ def __init__(self, *args, **kwargs): self.fields["cohort"] = forms.CharField( widget=forms.HiddenInput(), initial=str(cohort.sodar_uuid) ) + self.genomebuild = self._get_genomebuild() + + def _get_genomebuild(self): + """Return genome build for case or cohort or project""" + if isinstance(self.project_or_cohort, Cohort): + cases = [ + case for case in self.project_or_cohort.get_accessible_cases_for_user(self.user) + ] + else: # project + cases = [case for case in self.project_or_cohort.case_set.all()] + if not cases: + return "GRCh37" + else: + return cases[0].release def get_pedigree(self): """Return ``list`` of ``dict`` with pedigree information.""" diff --git a/variants/migrations/0084_auto_20220112_0657.py b/variants/migrations/0084_auto_20220112_0657.py new file mode 100644 index 000000000..2a737a1bd --- /dev/null +++ b/variants/migrations/0084_auto_20220112_0657.py @@ -0,0 +1,23 @@ +# Generated by Django 3.2.7 on 2022-01-12 06:57 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("variants", "0083_auto_20211129_1443"), + ] + + operations = [ + migrations.AddField( + model_name="case", + name="release", + field=models.CharField(default="GRCh37", max_length=32, null=True), + ), + migrations.AddField( + model_name="smallvariantset", + name="release", + field=models.CharField(default="GRCh37", max_length=32, null=True), + ), + ] diff --git a/variants/models.py b/variants/models.py index a261a2139..7f772f137 100644 --- a/variants/models.py +++ b/variants/models.py @@ -472,6 +472,8 @@ def find(self, search_terms, _keywords=None): class CoreCase(models.Model): """Abstract base class for Case core fields.""" + #: Genome build + release = models.CharField(max_length=32, null=True, default="GRCh37") #: Name of the case. name = models.CharField(max_length=512) #: Identifier of the index in ``pedigree``. @@ -951,6 +953,8 @@ class SmallVariantSet(models.Model): case = models.ForeignKey( Case, on_delete=models.CASCADE, null=False, help_text="The case that this set is for" ) + #: Genome build + release = models.CharField(max_length=32, null=True, default="GRCh37") #: The state of the variant set. state = models.CharField( max_length=16, @@ -2428,13 +2432,13 @@ def prioritize_genes(entrez_ids, hpo_terms, prio_algorithm): class VariantScoresFactory: """Factory class for variant scorers.""" - def get_scorer(self, score_type, variants, user=None): + def get_scorer(self, genomebuild, score_type, variants, user=None): if score_type == "umd": - return VariantScoresUmd(variants, score_type, user) + return VariantScoresUmd(genomebuild, variants, score_type, user) elif score_type == "cadd": - return VariantScoresCadd(variants, score_type) + return VariantScoresCadd(genomebuild, variants, score_type) elif score_type == "mutationtaster": - return VariantScoresMutationTaster(variants, score_type) + return VariantScoresMutationTaster(genomebuild, variants, score_type) class VariantScoresBase: @@ -2443,7 +2447,8 @@ class VariantScoresBase: #: Set PathogenicityCache model (required in child classes) cache_model = None - def __init__(self, variants, score_type, user=None): + def __init__(self, genomebuild, variants, score_type, user=None): + self.genomebuild = genomebuild self.variants = list(set(variants)) self.user = user self.score_type = score_type @@ -2745,12 +2750,13 @@ def score(self): uncached = uncached[: settings.VARFISH_CADD_MAX_VARS] # TODO: properly test + cadd_release = "%s-%s" % (self.genomebuild, settings.VARFISH_CADD_REST_API_CADD_VERSION) try: res = requests.post( settings.VARFISH_CADD_REST_API_URL + "/annotate/", json={ - "genome_build": "GRCh37", - "cadd_release": settings.VARFISH_CADD_REST_API_CADD_VERSION, + "genome_build": self.genomebuild, + "cadd_release": cadd_release, "variant": ["-".join(map(str, var)) for var in uncached], }, ) diff --git a/variants/plugins.py b/variants/plugins.py index da44e9809..2da72c02e 100644 --- a/variants/plugins.py +++ b/variants/plugins.py @@ -202,13 +202,24 @@ def _get_state_bar_html(self, project): return "".join(arr) def _get_action_buttons(self, project): - tpl = """ - - - - """ - url = reverse("variants:project-cases-filter", kwargs={"project": project.sodar_uuid}) - return tpl % url + if len({case.release for case in Case.objects.filter(project=project)}) == 1: + tpl = """ + + + + """ + url = reverse("variants:project-cases-filter", kwargs={"project": project.sodar_uuid}) + return tpl % url + else: + html = """ + + + + """ + return html def get_statistics(self): return { diff --git a/variants/serializers.py b/variants/serializers.py index bc1ffafa8..5b571200f 100644 --- a/variants/serializers.py +++ b/variants/serializers.py @@ -100,7 +100,9 @@ class CaseSerializer(CoreCaseSerializerMixin, SODARProjectModelSerializer): project = serializers.ReadOnlyField(source="project.sodar_uuid") def create(self, validated_data): + """Make project and release writeable on creation.""" validated_data["project"] = self.context["project"] + validated_data["release"] = self.context["release"] return super().create(validated_data) class Meta: @@ -109,6 +111,7 @@ class Meta: "sodar_uuid", "date_created", "date_modified", + "release", "name", "index", "pedigree", @@ -126,4 +129,5 @@ class Meta: "num_small_vars", "num_svs", "project", + "release", ) diff --git a/variants/submit_filter.py b/variants/submit_filter.py index c0afb008a..de7fc7c8b 100644 --- a/variants/submit_filter.py +++ b/variants/submit_filter.py @@ -5,6 +5,7 @@ from projectroles.plugins import get_backend_api +from cohorts.models import Cohort from variants.helpers import get_engine from variants.forms import PATHO_SCORES_MAPPING from variants.models import prioritize_genes, VariantScoresFactory @@ -30,6 +31,10 @@ def _get_assembled_query(self): """Override me!""" pass + def _get_genomebuild(self): + """Override me!""" + pass + def get_alchemy_engine(self): """Construct and return the alchemy connection.""" if not self._alchemy_engine: @@ -111,7 +116,9 @@ def get_var(row): try: with transaction.atomic(): scorer_factory = VariantScoresFactory() - scorer = scorer_factory.get_scorer(patho_score, variants, self.job.bg_job.user) + scorer = scorer_factory.get_scorer( + self._get_genomebuild(), patho_score, variants, self.job.bg_job.user + ) for score in scorer.score(): getattr( self.variant_query, "%svariantscores_set" % self.variant_query.query_type() @@ -124,6 +131,9 @@ class CaseFilter(FilterBase): """Class for storing query results for a single case. """ + def _get_genomebuild(self): + return self.variant_query.case.release + def _get_assembled_query(self): """Render filter query for a single case""" return CasePrefetchQuery(self.variant_query.case, self.get_alchemy_engine()) @@ -133,6 +143,21 @@ class ProjectCasesFilter(FilterBase): """Class for storing query results for cases of a project. """ + def _get_genomebuild(self): + if self.job.cohort: + cases = [ + case + for case in self.project_or_cohort.get_accessible_cases_for_user( + self.job.bg_job.user + ) + ] + else: + cases = [case for case in self.variant_query.project.case_set.all()] + if cases: + return cases[0].release + else: + return "GRCh37" + def _get_assembled_query(self): """Render filter query for a project""" return ProjectPrefetchQuery( diff --git a/variants/templates/variants/_case_list_buttons.html b/variants/templates/variants/_case_list_buttons.html index ecec593c4..5e9af1db8 100644 --- a/variants/templates/variants/_case_list_buttons.html +++ b/variants/templates/variants/_case_list_buttons.html @@ -1,3 +1,5 @@ +{% load variants_tags %} +
{% if not kiosk_mode %} @@ -5,10 +7,18 @@ Back to Project {% endif %} - - - Joint Filtration - + {% same_release project.case_set.all as project_cases_same_release %} + {% if project_cases_same_release %} + + + Joint Filtration + + {% else %} + + + Joint Filtration + + {% endif %} {% if request.user.is_superuser or kiosk_mode %} {% if disable_pedigree_sex_check %} diff --git a/variants/templates/variants/_details_card.html b/variants/templates/variants/_details_card.html index 58aa32bd6..690b9cea9 100644 --- a/variants/templates/variants/_details_card.html +++ b/variants/templates/variants/_details_card.html @@ -14,6 +14,7 @@ {% if svs_enabled %} SVs {% endif %} + Genome @@ -24,18 +25,26 @@ {% include 'variants/case/item.html' with item=case details_card_mode=True %} {% endfor %} - + See list of all cases - - Joint Filtration - + {% same_release cases as project_cases_same_release %} + {% if project_cases_same_release %} + + Joint Filtration + + {% else %} + + + Joint Filtration + + {% endif %} {% else %} - No cases (yet) + No cases (yet) {% endif %} diff --git a/variants/templates/variants/_filter_form.html b/variants/templates/variants/_filter_form.html index 2f0707e03..818a7cf82 100644 --- a/variants/templates/variants/_filter_form.html +++ b/variants/templates/variants/_filter_form.html @@ -135,13 +135,20 @@ Download as File {% if allow_md_submission %} - + {% if form.case.release == "GRCh37" %} + + {% else %} + + {% endif %} {% endif %} {% if cadd_submission_enabled %} + {% if form.case.release == "GRCh37" %} + + {% else %} + + {% endif %} {% endif %}
diff --git a/variants/templates/variants/_search_item.html b/variants/templates/variants/_search_item.html index 36d7c0d1d..f1234b95f 100644 --- a/variants/templates/variants/_search_item.html +++ b/variants/templates/variants/_search_item.html @@ -49,6 +49,9 @@ {% endif %} {% endif %} + + {{ item.release }} + {% if svs_enabled %}
diff --git a/variants/templates/variants/_search_results.html b/variants/templates/variants/_search_results.html index f04835910..5870580fb 100644 --- a/variants/templates/variants/_search_results.html +++ b/variants/templates/variants/_search_results.html @@ -18,6 +18,7 @@ {% if svs_enabled %} SVs {% endif %} + Genome diff --git a/variants/templates/variants/case/detail_overview.html b/variants/templates/variants/case/detail_overview.html index f3448d324..59eb5c7e5 100644 --- a/variants/templates/variants/case/detail_overview.html +++ b/variants/templates/variants/case/detail_overview.html @@ -48,6 +48,16 @@

+
  • +
    + + Reference Genome + + + {{ object.release }} + +
    +
  • diff --git a/variants/templates/variants/case/item.html b/variants/templates/variants/case/item.html index e8e3c8d76..4ab512730 100644 --- a/variants/templates/variants/case/item.html +++ b/variants/templates/variants/case/item.html @@ -71,6 +71,9 @@ {% endif %} {% endif %} + + {{ item.release }} + {% if svs_enabled %}
    diff --git a/variants/templates/variants/case_detail.html b/variants/templates/variants/case_detail.html index 873457ded..1b1ce83bf 100644 --- a/variants/templates/variants/case_detail.html +++ b/variants/templates/variants/case_detail.html @@ -20,6 +20,9 @@

    Case {{ object.name }} + + {{ object.release }} +

    {% if request.user and request.user.is_authenticated %} {% get_app_setting 'userprofile' 'enable_project_uuid_copy' user=request.user as enable_uuid_copy %} diff --git a/variants/templates/variants/case_list/list.html b/variants/templates/variants/case_list/list.html index f79ff854b..46127ff05 100644 --- a/variants/templates/variants/case_list/list.html +++ b/variants/templates/variants/case_list/list.html @@ -47,6 +47,7 @@

    {% if svs_enabled %} SVs {% endif %} + Genome @@ -56,7 +57,7 @@

    {% endfor %} {% if not object_list %} - + No cases added yet. diff --git a/variants/templates/variants/filter.html b/variants/templates/variants/filter.html index d5c0d615a..c66bee75d 100644 --- a/variants/templates/variants/filter.html +++ b/variants/templates/variants/filter.html @@ -149,6 +149,9 @@

    {% if query_type == "case" %} Filter Variants for Case {{ object.name }} + + {{ object.release }} + {% else %} {% if cohort %} Joint Filtration for Cohort diff --git a/variants/templates/variants/filter_form/frequency.html b/variants/templates/variants/filter_form/frequency.html index 4eb00bc5c..3dbb5f16a 100644 --- a/variants/templates/variants/filter_form/frequency.html +++ b/variants/templates/variants/filter_form/frequency.html @@ -1,6 +1,7 @@ {% load crispy_forms_tags %} {% load dict %} {% load projectroles_common_tags %} + {% get_django_setting 'PROJECTROLES_KIOSK_MODE' as kiosk_mode %}
    @@ -8,6 +9,7 @@ The checkboxes enable () or disable () filtration based on the population frequencies of the given database. You can provide the number of carriers with maximal heterozygous/homozygous (respectively: -plasmid) state or population frequencies. For the in-house DB, you can only filter based on carrier state as currently it is tracked how many carriers have sufficient coverage for each variant. + {% if form.genomebuild != "GRCh37" %}Thousand genomes and ExAC frequencies are only available GRCh37 cases.{% endif %}
    @@ -22,7 +24,7 @@ - + @@ -30,7 +32,7 @@ - + diff --git a/variants/templates/variants/filter_result/row.html b/variants/templates/variants/filter_result/row.html index fc1c04de4..cba4de1a7 100644 --- a/variants/templates/variants/filter_result/row.html +++ b/variants/templates/variants/filter_result/row.html @@ -63,7 +63,7 @@
    {{ form.thousand_genomes_enabled|as_crispy_field }} 1000 Genomes (samples: 1000) {{ form.thousand_genomes_homozygous|as_crispy_field }}{{ form.thousand_genomes_hemizygous|as_crispy_field }} {{ form.thousand_genomes_frequency|as_crispy_field }}
    {{ form.exac_enabled|as_crispy_field }} ExAC (samples: 60,706) {{ form.exac_homozygous|as_crispy_field }}
    - chr{{ entry.chromosome }}:{{ entry.start|intcomma }} + {% entry_chr entry %}:{{ entry.start|intcomma }} {% if entry.chromosome_no == 25 %} {% if entry.start|check_mt_position_homopolymer or entry.end|check_mt_position_homopolymer %} @@ -488,25 +488,59 @@ Toggle Dropdown
    diff --git a/variants/templates/variants/filter_result/table.html b/variants/templates/variants/filter_result/table.html index 275b9b8d1..cc5817617 100644 --- a/variants/templates/variants/filter_result/table.html +++ b/variants/templates/variants/filter_result/table.html @@ -174,8 +174,10 @@

    Frequency