Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create single result row even if multiple clinvar entries (#565) #631

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions HISTORY.rst
Expand Up @@ -13,6 +13,7 @@ End-User Summary
- Extra annotations in export completed and tested (#495).
- Fixed bug where Exac and thousand genomes settings were not shown in frequency tab for GRCh37 (#597).
- Form template reports error if genomebuild variable is not set (#607).
- Create single result row even if multiple clinvar entries (#565).

Full Change List
================
Expand All @@ -21,6 +22,7 @@ Full Change List
- Fixing issue with sync-from-remote when no remote is defined (#570).
- Fixed bug where Exac and thousand genomes settings were not shown in frequency tab for GRCh37 (#597).
- Form template reports error if genomebuild variable is not set (#607).
- Create single result row even if multiple clinvar entries (#565).

-----------------
v1.2.1 (anthenea)
Expand Down
6 changes: 3 additions & 3 deletions variants/file_export.py
Expand Up @@ -110,7 +110,7 @@ def to_str(val):
("gnomad_oe_lof", "Gnomad constrains lof observed/expected", float),
("gnomad_oe_lof_upper", "Gnomad constrains lof observed/expected upper", float),
("gnomad_oe_lof_lower", "Gnomad constrains lof observed/expected lower", float),
("pathogenicity_summary", "ClinVar pathogenicity summary", str),
("pathogenicity_summary_arr", "ClinVar pathogenicity summary", str),
)
if settings.KIOSK_MODE:
HEADER_FIXED = tuple(filter(lambda x: not x[0].startswith("inhouse_"), HEADER_FIXED))
Expand All @@ -123,8 +123,8 @@ def to_str(val):

#: Names of the pathogenicity scoring header columns.
HEADERS_PATHO_SCORES = (
("pathogenicity_score", "Pathogenicity Score", float),
("pathogenicity_rank", "Pathogenicity Rank", int),
("pathogenicity_score_arr", "Pathogenicity Score", float),
("pathogenicity_rank_arr", "Pathogenicity Rank", int),
)

HEADERS_TRANSCRIPTS = (("transcripts", "Transcript ids", str),)
Expand Down
39 changes: 23 additions & 16 deletions variants/queries.py
Expand Up @@ -9,8 +9,9 @@
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from sqlalchemy import Table, true, column, union, literal_column, delete, tuple_
from sqlalchemy.dialects.postgresql import ARRAY
from sqlalchemy.sql import select, func, and_, not_, or_, cast
from sqlalchemy.types import ARRAY, VARCHAR, Integer, Float
from sqlalchemy.types import VARCHAR, Integer, Float, String
import sqlparse

from clinvar.models import Clinvar
Expand Down Expand Up @@ -360,16 +361,23 @@ def extend_selectable(self, query_parts):
class ExtendQueryPartsClinvarJoin(ExtendQueryPartsBase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._col_names = (
"variation_type",
"vcv",
"point_rating",
"pathogenicity",
"review_status",
"pathogenicity_summary",
"details",
)

self.subquery = (
select(
(
Clinvar.sa.variation_type,
Clinvar.sa.vcv,
Clinvar.sa.point_rating,
Clinvar.sa.pathogenicity,
Clinvar.sa.review_status,
Clinvar.sa.pathogenicity_summary,
Clinvar.sa.details,
tuple(
func.array_agg(
getattr(Clinvar.sa, name), type_=ARRAY(String(length=128))
).label(f"{name}_arr")
for name in self._col_names
)
)
.select_from(Clinvar.sa)
Expand All @@ -382,12 +390,8 @@ def extend_selectable(self, query_parts):

def extend_fields(self, _query_parts):
return [
self.subquery.c.variation_type,
self.subquery.c.vcv,
self.subquery.c.point_rating,
self.subquery.c.pathogenicity,
self.subquery.c.pathogenicity_summary,
self.subquery.c.details,
func.coalesce(getattr(self.subquery.c, f"{name}_arr"), []).label(f"{name}_arr")
for name in self._col_names
]


Expand Down Expand Up @@ -417,7 +421,10 @@ def _build_significance_term(self):
return True
for patho_key in self.patho_keys:
if self.kwargs.get("clinvar_include_%s" % patho_key):
terms.append(self.subquery.c.pathogenicity == patho_key.replace("_", " "))
# import pdb; pdb.set_trace()
terms.append(
self.subquery.c.pathogenicity_arr.contains([patho_key.replace("_", " ")])
)
return or_(*terms)


Expand Down
27 changes: 27 additions & 0 deletions variants/tests/test_queries.py
Expand Up @@ -4476,6 +4476,33 @@ def test_render_query_require_membership_include_benign(self):
)
self.assertEqual(res[0].start, self.small_vars[6].start)

def test_render_query_single_output_line_even_with_multiple_clinvar_annos(self):
# Add second ClinVar annotation
ClinvarFactory(
release=self.small_vars[1].release,
chromosome=self.small_vars[1].chromosome,
start=self.small_vars[1].start,
end=self.small_vars[1].end,
bin=self.small_vars[1].bin,
reference=self.small_vars[1].reference,
alternative=self.small_vars[1].alternative,
pathogenicity="pathogenic",
)
res = self.run_query(
self.query_class,
{
"genomic_region": [
(
self.small_vars[1].chromosome,
self.small_vars[1].start - 1,
self.small_vars[1].end + 1,
)
]
},
1,
)
self.assertEqual(res[0].start, self.small_vars[1].start)


class RenderQueryTestCaseThreeClinvarFilter(CaseThreeClinvarFilterTestMixin, SupportQueryTestBase):
"""Test clinvar membership using RenderFilterQuery."""
Expand Down
4 changes: 2 additions & 2 deletions variants/tests/test_views.py
Expand Up @@ -1098,7 +1098,7 @@ def test_clinvar(self):
),
{"filter_job_uuid": self.bgjob.sodar_uuid},
)
self.assertEqual(response.context["result_rows"][1].pathogenicity, "pathogenic")
self.assertEqual(response.context["result_rows"][1].pathogenicity_arr, ["pathogenic"])

def test_training_mode(self):
with self.login(self.superuser):
Expand Down Expand Up @@ -2264,7 +2264,7 @@ def test_clinvar(self):
{"filter_job_uuid": self.bgjob.sodar_uuid},
)
self.assertEqual(response.status_code, 200)
self.assertEqual(response.context["result_rows"][4].pathogenicity, "pathogenic")
self.assertEqual(response.context["result_rows"][4].pathogenicity_arr, ["pathogenic"])

@patch("django.conf.settings.VARFISH_ENABLE_CADD", True)
@patch("django.conf.settings.VARFISH_CADD_REST_API_URL", "https://cadd.com")
Expand Down