Skip to content

Commit

Permalink
Extending models with release field for GRCh37/GRCh38.
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Jan 12, 2022
1 parent 591d9f8 commit ec17d55
Show file tree
Hide file tree
Showing 43 changed files with 832 additions and 184 deletions.
13 changes: 13 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ History / Changelog
HEAD (unreleased)
-----------------

Breaking changes, see below.

End-User Summary
================

Expand All @@ -25,6 +27,10 @@ End-User Summary
- Added section for developers in manual (#267).
- Migrated icons to iconify (#208).
- Bumped chrome-driver version (#208).
- VarFish now allows for the import of GRCh38 annotated variants.
For this, GRCh38 background data must be imported.
Kiosk mode does not support GRCh38 yet.
**This is a breaking change, new data and CLI must be used!**

Full Change List
================
Expand All @@ -50,6 +56,13 @@ Full Change List
- Migrated icons to iconify (#208).
- Bumped chrome-driver version (#208).
- Skipping codacy if token is not defined (#275).
- Adjusting models and UI for supporting GRCh38 annotated cases.
It is currently not possible to migrate a GRCh37 case to GRCh38.
- Adjusting models and UI for supporting GRCh38 annotated cases.
It is currently not possible to migrate a GRCh37 case to GRCh38.
- Setting ``VARFISH_CADD_SUBMISSION_RELEASE`` is called ``VARFISH_CADD_SUBMISSION_VERSION`` now (**breaking change**).
- ``import_info.tsv`` expected as in data release from ``20210728`` as built from varfish-db-downloader ``1b03e97`` or later.
- Extending columns of ``Hgnc`` to upstream update.

-------
v0.23.9
Expand Down
1 change: 1 addition & 0 deletions cohorts/templates/cohorts/cohort_create.html
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ <h5 class="mb-0">
' <span class="badge-group">' +
' <span class="badge badge-secondary">{{ label }}</span>' +
' <span class="badge badge-info members-count">{{ case.get_members|length }}</span>' +
' <span class="badge badge-outlined release" style="border-width; 1px 1px 1px 0; border: 1px solid #323a45;">{{ case.release }}</span>' +
' </span>' +
' </label>' +
'</span>'
Expand Down
15 changes: 14 additions & 1 deletion cohorts/templates/cohorts/cohort_list.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

{% load dict %}
{% load humanize %}
{% load variants_tags %}
{% load cohorts_tags %}
{% load projectroles_common_tags %}

Expand Down Expand Up @@ -90,6 +91,7 @@ <h4 class="card-header">
<a href="{{ case.get_absolute_url }}" class="badge-group" data-toggle="tooltip" data-html="true" title="From project <strong>{{ case.project.title }}</strong>, having {{ case.get_members|length }} individual(s)">
<span class="badge badge-secondary">{{ case.name }}</span>
<span class="badge badge-info">{{ case.get_members|length }}</span>
<span class="badge badge-outlined release" style="border-width: 1px 1px 1px 0; border: 1px solid #323a45;">{{ case.release }}</span>
</a>
{% endfor %}
{% if not item|check_accessible_cases:user %}
Expand All @@ -105,7 +107,18 @@ <h4 class="card-header">
<span class="btn btn-sm btn-danger disabled"><i class="iconify" data-icon="mdi:trash-can"></i></span>
<span class="btn btn-sm btn-secondary disabled"><i class="iconify" data-icon="mdi:pencil"></i></span>
{% endif %}
<a href="{% url 'variants:project-cases-filter-cohort' project=project.sodar_uuid cohort=item.sodar_uuid %}" class="btn btn-sm btn-primary"><i class="iconify" data-icon="mdi:filter"></i></a>


{% same_release cases as cases_same_release %}
{% if cases_same_release %}
<a href="{% url 'variants:project-cases-filter-cohort' project=project.sodar_uuid cohort=item.sodar_uuid %}" class="btn btn-sm btn-primary"><i class="iconify" data-icon="mdi:filter"></i></a>
{% else %}
<span
class="btn btn-sm btn-primary disabled"
data-toggle="tooltip"
title="Cannot filter cases with different genomes"
><i class="iconify" data-icon="mdi:filter"></i></span>
{% endif %}
</span>
</td>
</tr>
Expand Down
1 change: 1 addition & 0 deletions cohorts/templates/cohorts/cohort_update.html
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ <h5 class="mb-0">
' <span class="badge-group">' +
' <span class="badge badge-secondary">{{ label }}</span>' +
' <span class="badge badge-info members-count">{{ case.get_members|length }}</span>' +
' <span class="badge badge-outlined release" style="border-width; 1px 1px 1px 0; border: 1px solid #323a45;">{{ case.release }}</span>' +
' </span>' +
' </label>' +
'</span>'
Expand Down
49 changes: 47 additions & 2 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,51 @@
logger.info("Enabling VarFishKioskUserMiddleware")
MIDDLEWARE += ["varfish.utils.VarFishKioskUserMiddleware"]

# Logging
# ------------------------------------------------------------------------------

# Custom logging level
LOGGING_LEVEL = env.str("LOGGING_LEVEL", "DEBUG" if DEBUG else "ERROR")

# List of apps to include in logging
LOGGING_APPS = env.list(
"LOGGING_APPS",
default=["projectroles", "siteinfo", "sodarcache", "taskflowbackend", "timeline",],
)

# Path for file logging. If not set, will log only to console
LOGGING_FILE_PATH = env.str("LOGGING_FILE_PATH", None)


def set_logging(level=None):
if not level:
level = "DEBUG" if DEBUG else "ERROR"
app_logger_config = {
"level": level,
"handlers": ["console", "file"] if LOGGING_FILE_PATH else ["console"],
"propagate": True,
}
log_handlers = {
"console": {"level": level, "class": "logging.StreamHandler", "formatter": "simple",}
}
if LOGGING_FILE_PATH:
log_handlers["file"] = {
"level": level,
"class": "logging.FileHandler",
"filename": LOGGING_FILE_PATH,
"formatter": "simple",
}
return {
"version": 1,
"disable_existing_loggers": False,
"formatters": {"simple": {"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s"}},
"handlers": log_handlers,
"loggers": {a: app_logger_config for a in LOGGING_APPS},
}


LOGGING = set_logging(LOGGING_LEVEL)

# FIXTURE CONFIGURATION
# ------------------------------------------------------------------------------
# See: https://docs.djangoproject.com/en/dev/ref/settings/#std:setting-FIXTURE_DIRS
Expand Down Expand Up @@ -258,7 +303,7 @@

# GENERAL CONFIGURATION
# ------------------------------------------------------------------------------
# Local time zone for this installation. Choices can be found here:
# Local zone for this installation. Choices can be found here:
# http://en.wikipedia.org/wiki/List_of_tz_zones_by_name
# although not all choices may be available on all operating systems.
# In a Windows environment this must be set to your system time zone.
Expand Down Expand Up @@ -483,7 +528,7 @@
# Enable submission of variants to CADD server.
VARFISH_ENABLE_CADD_SUBMISSION = env.bool("VARFISH_ENABLE_CADD_SUBMISSION", default=False)
# CADD version to use for for submission
VARFISH_CADD_SUBMISSION_RELEASE = env.str("VARFISH_CADD_SUBMISSION_RELEASE", default="GRCh37-v1.6")
VARFISH_CADD_SUBMISSION_VERSION = env.str("VARFISH_CADD_SUBMISSION_VERSION", default="v1.6")

# Varfish: MutationTaster URL
VARFISH_MUTATIONTASTER_REST_API_URL = env.str(
Expand Down
6 changes: 6 additions & 0 deletions config/settings/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@
]
]

# Logging
# ------------------------------------------------------------------------------

LOGGING_LEVEL = env.str("LOGGING_LEVEL", "CRITICAL")
LOGGING = set_logging(LOGGING_LEVEL)

# Varfish: REST Services
# ------------------------------------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion docs_manual/admin_config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ format, before starting your varfish instance (you can find more details `here <
If you deploy varfish without docker, you can pass the file paths of your metadata.xml and key pair directly. Otherwise, make sure that you have included them
into a single folder and added the corresponding folder to your ``docker-compose.yml`` (or add it as a ``docker-compose-overrrided.yml``), like in the following snippet.

.. code-block:: yml
.. code-block:: yaml
varfish-web:
...
Expand Down
75 changes: 73 additions & 2 deletions docs_manual/admin_ingest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ First, obtain some tests data for annotation and later import into VarFish Serve
$ sha256sum --check varfish-test-data-v0.22.2-20210212.tar.gz.sha256
$ tar -xf varfish-test-data-v0.22.2-20210212.tar.gz.sha256
Annotating Small Variant VCFs
-----------------------------

Next, you can use the ``varfish-annotator`` command:

.. code-block:: bash
Expand All @@ -83,7 +86,7 @@ Next, you can use the ``varfish-annotator`` command:
--ref-path varfish-annotator-20201006/hs37d5.fa \
--input-vcf "INPUT.vcf.gz" \
--release "GRCh37" \
--output-db-info "FAM_name.db-info.tsv" \
--output-db-info "FAM_name.db-infos.tsv" \
--output-gts "FAM_name.gts.tsv" \
--case-id "FAM_name"
Expand Down Expand Up @@ -181,6 +184,72 @@ For example, if you have genotypes for two siblings but none for the parents:
FAM_index father 0 0 1 1
FAM_index mother 0 0 2 1
Annotating Structural Variant VCFs
----------------------------------

Structural variants can be annotated as follows.


.. code-block:: bash
:linenos:
$ varfish-annotator \
annotate-svs \
-XX:MaxHeapSize=10g \
-XX:+UseConcMarkSweepGC \
\
--default-sv-method=YOURCALLERvVERSION"
--release GRCh37 \
\
--db-path varfish-annotator-20201006/varfish-annotator-db-20191129.h2.db \
--ensembl-ser-path varfish-annotator-20201006/hg19_ensembl.ser \
--refseq-ser-path varfish-annotator-20201006/hg19_refseq_curated.ser \
\
--input-vcf FAM_sv_calls.vcf.gz \
--output-db-info FAM_sv_calls.db-info.tsv \
--output-gts FAM_sv_calls.gts.tsv
--output-feature-effects CASE_SV_CALLS.feature-effects.tsv
.. note::
``varfish-annotator annotate-svs`` will write out the ``INFO/SVMETHOD`` column to the output file.
If this value is empty then the value from ``--default-sv-method`` will be used.
You **must** either provide ``INFO/SVMETHOD`` or ``--default-sv-method``.
Otherwise, you will get errors in the import step (visible in the case import background task view).
You can use the following shell snippet for adding ``INFO/SVMETHOD`` to your VCF file properly.
Replace ``YOURCALLERvVERSION`` with the value that you want to provide to Varfish.
.. code-block:: shell
cat >$TMPDIR/header.txt <<"EOF"
##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
EOF
bcftools annotate \
--header-lines $TMPDIR/header.txt \
INPUT.vcf.gz \
| awk -F $'\t' '
BEGIN { OFS = FS; }
/^#/ { print $0; }
/^[^#]/ { $8 = $8 ";SVMETHOD=YOURCALLERvVERSION"; print $0; }
' \
| bgzip -c \
> OUTPUT.vcf.gz
tabix -f OUTPUT.vcf.gz
Again, you have have to compress the output TSV files with ``gzip`` and compute MD5 sums.
.. code-block:: bash
$ gzip -c FAM_sv_calls.db-info.tsv >FAM_sv_calls.db-info.tsv.gz
$ md5sum FAM_sv_calls.db-info.tsv.gz >FAM_sv_calls.db-info.tsv.gz.md5
$ gzip -c FAM_sv_calls.gts.tsv >FAM_sv_calls.gts.tsv.gz
$ md5sum FAM_sv_calls.gts.tsv.gz >FAM_sv_calls.gts.tsv.gz.md5
$ gzip -c FAM_sv_calls.feature-effects.tsv >FAM_sv_calls.feature-effects.tsv.gz
$ md5sum FAM_sv_calls.feature-effects.tsv.gz >FAM_sv_calls.feature-effectstsv.gz.md5
--------------
Variant Import
--------------
Expand Down Expand Up @@ -214,11 +283,13 @@ When executing the import as shown above, you have to specify:
- a pedigree file with suffix ``.ped``,
- a genotype annotation file as generated by ``varfish-annotator`` ending in ``.gts.tsv.gz``,
- a database info file as generated by ``varfish-annotator`` ending in ``.db-infos.tsv.gz``.
- a database info file as generated by ``varfish-annotator`` ending in ``.db-info.tsv.gz``.
Optionally, you can also specify a TSV file with BAM quality control metris ending in ``.bam-qc.tsv.gz``.
Currently, the format is not properly documented yet but documentation and supporting tools are forthcoming.
If you want to import structural variants for your case, then you simply submit the output files from the SV annotation step together with the the ``.feature-effects.tsv.gz`` and ``.gts.tsv.gz`` files from the small variant annotation step.
Running the import command through VarFish CLI will create a background import job as shown below.
Once the job is done, the created or updated case will appear in the case list.
Expand Down
22 changes: 22 additions & 0 deletions geneinfo/migrations/0025_auto_20211019_0829.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 3.2.7 on 2021-10-19 08:29

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("geneinfo", "0024_alter_ncbigenerif_pubmed_ids"),
]

operations = [
migrations.AddField(
model_name="hgnc", name="agr", field=models.CharField(max_length=32, null=True),
),
migrations.AddField(
model_name="hgnc", name="lncipedia", field=models.CharField(max_length=32, null=True),
),
migrations.AddField(
model_name="hgnc", name="mane_select", field=models.CharField(max_length=64, null=True),
),
]
16 changes: 16 additions & 0 deletions geneinfo/migrations/0026_hgnc_gtrnadb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Generated by Django 3.2.7 on 2021-10-20 07:15

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("geneinfo", "0025_auto_20211019_0829"),
]

operations = [
migrations.AddField(
model_name="hgnc", name="gtrnadb", field=models.CharField(max_length=32, null=True),
),
]
8 changes: 8 additions & 0 deletions geneinfo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ class Hgnc(models.Model):
intermediate_filament_db = models.CharField(max_length=32, null=True)
#: RNACentral ID (rnacentral.org database)
rna_central_ids = models.CharField(max_length=32, null=True)
#: gtrna DB ID
gtrnadb = models.CharField(max_length=32, null=True)
#: lcipedia.org IDs
lncipedia = models.CharField(max_length=32, null=True)
#: Alliance of genome resources.
agr = models.CharField(max_length=32, null=True)
#: MANE collected ID.
mane_select = models.CharField(max_length=64, null=True)

#: Allow bulk import
objects = CopyManager()
Expand Down
Loading

0 comments on commit ec17d55

Please sign in to comment.