Extending models with release field for GRCh37/GRCh38.

varfish-org · Jan 12, 2022 · ec17d55 · ec17d55
1 parent 591d9f8
commit ec17d55
Show file tree

Hide file tree

Showing 43 changed files with 832 additions and 184 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -6,6 +6,8 @@ History / Changelog
 HEAD (unreleased)
 -----------------
 
+Breaking changes, see below.
+
 End-User Summary
 ================
 
@@ -25,6 +27,10 @@ End-User Summary
 - Added section for developers in manual (#267).
 - Migrated icons to iconify (#208).
 - Bumped chrome-driver version (#208).
+- VarFish now allows for the import of GRCh38 annotated variants.
+  For this, GRCh38 background data must be imported.
+  Kiosk mode does not support GRCh38 yet.
+  **This is a breaking change, new data and CLI must be used!**
 
 Full Change List
 ================
@@ -50,6 +56,13 @@ Full Change List
 - Migrated icons to iconify (#208).
 - Bumped chrome-driver version (#208).
 - Skipping codacy if token is not defined (#275).
+- Adjusting models and UI for supporting GRCh38 annotated cases.
+  It is currently not possible to migrate a GRCh37 case to GRCh38.
+- Adjusting models and UI for supporting GRCh38 annotated cases.
+  It is currently not possible to migrate a GRCh37 case to GRCh38.
+- Setting ``VARFISH_CADD_SUBMISSION_RELEASE`` is called ``VARFISH_CADD_SUBMISSION_VERSION`` now (**breaking change**).
+- ``import_info.tsv`` expected as in data release from ``20210728`` as built from varfish-db-downloader ``1b03e97`` or later.
+- Extending  columns of ``Hgnc`` to upstream update.
 
 -------
 v0.23.9

diff --git a/cohorts/templates/cohorts/cohort_create.html b/cohorts/templates/cohorts/cohort_create.html
@@ -112,6 +112,7 @@ <h5 class="mb-0">
           '    <span class="badge-group">' +
           '      <span class="badge badge-secondary">{{ label }}</span>' +
           '      <span class="badge badge-info members-count">{{ case.get_members|length }}</span>' +
+          '      <span class="badge badge-outlined release" style="border-width; 1px 1px 1px 0; border: 1px solid #323a45;">{{ case.release }}</span>' +
           '    </span>' +
           '  </label>' +
           '</span>'

diff --git a/cohorts/templates/cohorts/cohort_list.html b/cohorts/templates/cohorts/cohort_list.html
@@ -2,6 +2,7 @@
 
 {% load dict %}
 {% load humanize %}
+{% load variants_tags %}
 {% load cohorts_tags %}
 {% load projectroles_common_tags %}
 
@@ -90,6 +91,7 @@ <h4 class="card-header">
                       <a href="{{ case.get_absolute_url }}" class="badge-group" data-toggle="tooltip" data-html="true" title="From project <strong>{{ case.project.title }}</strong>, having {{ case.get_members|length }} individual(s)">
                         <span class="badge badge-secondary">{{ case.name }}</span>
                         <span class="badge badge-info">{{ case.get_members|length }}</span>
+                        <span class="badge badge-outlined release" style="border-width: 1px 1px 1px 0; border: 1px solid #323a45;">{{ case.release }}</span>
                       </a>
                   {% endfor %}
                   {% if not item|check_accessible_cases:user %}
@@ -105,7 +107,18 @@ <h4 class="card-header">
                       <span class="btn btn-sm btn-danger disabled"><i class="iconify" data-icon="mdi:trash-can"></i></span>
                       <span class="btn btn-sm btn-secondary disabled"><i class="iconify" data-icon="mdi:pencil"></i></span>
                     {% endif %}
-                    <a href="{% url 'variants:project-cases-filter-cohort' project=project.sodar_uuid cohort=item.sodar_uuid %}" class="btn btn-sm btn-primary"><i class="iconify" data-icon="mdi:filter"></i></a>
+
+
+                    {% same_release cases as cases_same_release %}
+                    {% if cases_same_release %}
+                      <a href="{% url 'variants:project-cases-filter-cohort' project=project.sodar_uuid cohort=item.sodar_uuid %}" class="btn btn-sm btn-primary"><i class="iconify" data-icon="mdi:filter"></i></a>
+                    {% else %}
+                      <span
+                        class="btn btn-sm btn-primary disabled"
+                        data-toggle="tooltip"
+                        title="Cannot filter cases with different genomes"
+                      ><i class="iconify" data-icon="mdi:filter"></i></span>
+                    {% endif %}
                   </span>
                 </td>
               </tr>

diff --git a/cohorts/templates/cohorts/cohort_update.html b/cohorts/templates/cohorts/cohort_update.html
@@ -112,6 +112,7 @@ <h5 class="mb-0">
           '    <span class="badge-group">' +
           '      <span class="badge badge-secondary">{{ label }}</span>' +
           '      <span class="badge badge-info members-count">{{ case.get_members|length }}</span>' +
+          '      <span class="badge badge-outlined release" style="border-width; 1px 1px 1px 0; border: 1px solid #323a45;">{{ case.release }}</span>' +
           '    </span>' +
           '  </label>' +
           '</span>'

diff --git a/config/settings/base.py b/config/settings/base.py
@@ -192,6 +192,51 @@
     logger.info("Enabling VarFishKioskUserMiddleware")
     MIDDLEWARE += ["varfish.utils.VarFishKioskUserMiddleware"]
 
+# Logging
+# ------------------------------------------------------------------------------
+
+# Custom logging level
+LOGGING_LEVEL = env.str("LOGGING_LEVEL", "DEBUG" if DEBUG else "ERROR")
+
+# List of apps to include in logging
+LOGGING_APPS = env.list(
+    "LOGGING_APPS",
+    default=["projectroles", "siteinfo", "sodarcache", "taskflowbackend", "timeline",],
+)
+
+# Path for file logging. If not set, will log only to console
+LOGGING_FILE_PATH = env.str("LOGGING_FILE_PATH", None)
+
+
+def set_logging(level=None):
+    if not level:
+        level = "DEBUG" if DEBUG else "ERROR"
+    app_logger_config = {
+        "level": level,
+        "handlers": ["console", "file"] if LOGGING_FILE_PATH else ["console"],
+        "propagate": True,
+    }
+    log_handlers = {
+        "console": {"level": level, "class": "logging.StreamHandler", "formatter": "simple",}
+    }
+    if LOGGING_FILE_PATH:
+        log_handlers["file"] = {
+            "level": level,
+            "class": "logging.FileHandler",
+            "filename": LOGGING_FILE_PATH,
+            "formatter": "simple",
+        }
+    return {
+        "version": 1,
+        "disable_existing_loggers": False,
+        "formatters": {"simple": {"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s"}},
+        "handlers": log_handlers,
+        "loggers": {a: app_logger_config for a in LOGGING_APPS},
+    }
+
+
+LOGGING = set_logging(LOGGING_LEVEL)
+
 # FIXTURE CONFIGURATION
 # ------------------------------------------------------------------------------
 # See: https://docs.djangoproject.com/en/dev/ref/settings/#std:setting-FIXTURE_DIRS
@@ -258,7 +303,7 @@
 
 # GENERAL CONFIGURATION
 # ------------------------------------------------------------------------------
-# Local time zone for this installation. Choices can be found here:
+# Local  zone for this installation. Choices can be found here:
 # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name
 # although not all choices may be available on all operating systems.
 # In a Windows environment this must be set to your system time zone.
@@ -483,7 +528,7 @@
 # Enable submission of variants to CADD server.
 VARFISH_ENABLE_CADD_SUBMISSION = env.bool("VARFISH_ENABLE_CADD_SUBMISSION", default=False)
 # CADD version to use for for submission
-VARFISH_CADD_SUBMISSION_RELEASE = env.str("VARFISH_CADD_SUBMISSION_RELEASE", default="GRCh37-v1.6")
+VARFISH_CADD_SUBMISSION_VERSION = env.str("VARFISH_CADD_SUBMISSION_VERSION", default="v1.6")
 
 # Varfish: MutationTaster URL
 VARFISH_MUTATIONTASTER_REST_API_URL = env.str(

diff --git a/config/settings/test.py b/config/settings/test.py
@@ -78,6 +78,12 @@
     ]
 ]
 
+# Logging
+# ------------------------------------------------------------------------------
+
+LOGGING_LEVEL = env.str("LOGGING_LEVEL", "CRITICAL")
+LOGGING = set_logging(LOGGING_LEVEL)
+
 # Varfish: REST Services
 # ------------------------------------------------------------------------------
 

diff --git a/docs_manual/admin_config.rst b/docs_manual/admin_config.rst
@@ -103,7 +103,7 @@ format, before starting your varfish instance (you can find more details `here <
 If you deploy varfish without docker, you can pass the file paths of your metadata.xml and key pair directly. Otherwise, make sure that you have included them
 into a single folder and added the corresponding folder to your ``docker-compose.yml`` (or add it as a ``docker-compose-overrrided.yml``), like in the following snippet.
 
-.. code-block:: yml
+.. code-block:: yaml
 
     varfish-web:
       ...

diff --git a/docs_manual/admin_ingest.rst b/docs_manual/admin_ingest.rst
@@ -68,6 +68,9 @@ First, obtain some tests data for annotation and later import into VarFish Serve
     $ sha256sum --check varfish-test-data-v0.22.2-20210212.tar.gz.sha256
     $ tar -xf varfish-test-data-v0.22.2-20210212.tar.gz.sha256
 
+Annotating Small Variant VCFs
+-----------------------------
+
 Next, you can use the ``varfish-annotator`` command:
 
 .. code-block:: bash
@@ -83,7 +86,7 @@ Next, you can use the ``varfish-annotator`` command:
         --ref-path varfish-annotator-20201006/hs37d5.fa \
         --input-vcf "INPUT.vcf.gz" \
         --release "GRCh37" \
-        --output-db-info "FAM_name.db-info.tsv" \
+        --output-db-info "FAM_name.db-infos.tsv" \
         --output-gts "FAM_name.gts.tsv" \
         --case-id "FAM_name"
 
@@ -181,6 +184,72 @@ For example, if you have genotypes for two siblings but none for the parents:
     FAM_index   father      0       0       1       1
     FAM_index   mother      0       0       2       1
 
+Annotating Structural Variant VCFs
+----------------------------------
+
+Structural variants can be annotated as follows.
+
+
+.. code-block:: bash
+    :linenos:
+
+    $ varfish-annotator \
+        annotate-svs \
+        -XX:MaxHeapSize=10g \
+        -XX:+UseConcMarkSweepGC \
+        \
+        --default-sv-method=YOURCALLERvVERSION"
+        --release GRCh37 \
+        \
+        --db-path varfish-annotator-20201006/varfish-annotator-db-20191129.h2.db \
+        --ensembl-ser-path varfish-annotator-20201006/hg19_ensembl.ser \
+        --refseq-ser-path varfish-annotator-20201006/hg19_refseq_curated.ser \
+        \
+        --input-vcf FAM_sv_calls.vcf.gz \
+        --output-db-info FAM_sv_calls.db-info.tsv \
+        --output-gts FAM_sv_calls.gts.tsv
+        --output-feature-effects CASE_SV_CALLS.feature-effects.tsv
+
+.. note::
+
+    ``varfish-annotator annotate-svs`` will write out the ``INFO/SVMETHOD`` column to the output file.
+    If this value is empty then the value from ``--default-sv-method`` will be used.
+    You **must** either provide ``INFO/SVMETHOD`` or ``--default-sv-method``.
+    Otherwise, you will get errors in the import step (visible in the case import background task view).
+
+    You can use the following shell snippet for adding ``INFO/SVMETHOD`` to your VCF file properly.
+    Replace ``YOURCALLERvVERSION`` with the value that you want to provide to Varfish.
+
+    .. code-block:: shell
+
+        cat >$TMPDIR/header.txt <<"EOF"
+        ##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
+        EOF
+
+        bcftools annotate \
+            --header-lines $TMPDIR/header.txt \
+            INPUT.vcf.gz \
+        | awk -F $'\t' '
+            BEGIN { OFS = FS; }
+            /^#/ { print $0; }
+            /^[^#]/ { $8 = $8 ";SVMETHOD=YOURCALLERvVERSION"; print $0; }
+            ' \
+        | bgzip -c \
+        > OUTPUT.vcf.gz
+        tabix -f OUTPUT.vcf.gz
+
+Again, you have have to compress the output TSV files with ``gzip`` and compute MD5 sums.
+
+.. code-block:: bash
+
+    $ gzip -c FAM_sv_calls.db-info.tsv >FAM_sv_calls.db-info.tsv.gz
+    $ md5sum FAM_sv_calls.db-info.tsv.gz >FAM_sv_calls.db-info.tsv.gz.md5
+    $ gzip -c FAM_sv_calls.gts.tsv >FAM_sv_calls.gts.tsv.gz
+    $ md5sum FAM_sv_calls.gts.tsv.gz >FAM_sv_calls.gts.tsv.gz.md5
+    $ gzip -c FAM_sv_calls.feature-effects.tsv >FAM_sv_calls.feature-effects.tsv.gz
+    $ md5sum FAM_sv_calls.feature-effects.tsv.gz >FAM_sv_calls.feature-effectstsv.gz.md5
+
+
 --------------
 Variant Import
 --------------
@@ -214,11 +283,13 @@ When executing the import as shown above, you have to specify:
 
 - a pedigree file with suffix ``.ped``,
 - a genotype annotation file as generated by ``varfish-annotator`` ending in ``.gts.tsv.gz``,
-- a database info file as generated by ``varfish-annotator`` ending in ``.db-infos.tsv.gz``.
+- a database info file as generated by ``varfish-annotator`` ending in ``.db-info.tsv.gz``.
 
 Optionally, you can also specify a TSV file with BAM quality control metris ending in ``.bam-qc.tsv.gz``.
 Currently, the format is not properly documented yet but documentation and supporting tools are forthcoming.
 
+If you want to import structural variants for your case, then you simply submit the output files from the SV annotation step together with the the ``.feature-effects.tsv.gz`` and ``.gts.tsv.gz`` files from the small variant annotation step.
+
 Running the import command through VarFish CLI will create a background import job as shown below.
 Once the job is done, the created or updated case will appear in the case list.
 

diff --git a/geneinfo/migrations/0025_auto_20211019_0829.py b/geneinfo/migrations/0025_auto_20211019_0829.py
@@ -0,0 +1,22 @@
+# Generated by Django 3.2.7 on 2021-10-19 08:29
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("geneinfo", "0024_alter_ncbigenerif_pubmed_ids"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="hgnc", name="agr", field=models.CharField(max_length=32, null=True),
+        ),
+        migrations.AddField(
+            model_name="hgnc", name="lncipedia", field=models.CharField(max_length=32, null=True),
+        ),
+        migrations.AddField(
+            model_name="hgnc", name="mane_select", field=models.CharField(max_length=64, null=True),
+        ),
+    ]
diff --git a/geneinfo/migrations/0026_hgnc_gtrnadb.py b/geneinfo/migrations/0026_hgnc_gtrnadb.py
@@ -0,0 +1,16 @@
+# Generated by Django 3.2.7 on 2021-10-20 07:15
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("geneinfo", "0025_auto_20211019_0829"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="hgnc", name="gtrnadb", field=models.CharField(max_length=32, null=True),
+        ),
+    ]
diff --git a/geneinfo/models.py b/geneinfo/models.py
@@ -107,6 +107,14 @@ class Hgnc(models.Model):
     intermediate_filament_db = models.CharField(max_length=32, null=True)
     #: RNACentral ID (rnacentral.org database)
     rna_central_ids = models.CharField(max_length=32, null=True)
+    #: gtrna DB ID
+    gtrnadb = models.CharField(max_length=32, null=True)
+    #: lcipedia.org IDs
+    lncipedia = models.CharField(max_length=32, null=True)
+    #: Alliance of genome resources.
+    agr = models.CharField(max_length=32, null=True)
+    #: MANE collected ID.
+    mane_select = models.CharField(max_length=64, null=True)
 
     #: Allow bulk import
     objects = CopyManager()