From 784a6bb7d7140d9ead170236120bc241c03dbb9c Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Wed, 14 Apr 2021 14:02:18 +0200 Subject: [PATCH] Fixing issue of too large database queries in Clinvar Export feature. --- HISTORY.rst | 2 ++ variants/queries.py | 25 ++++++++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 5106bdb95..9ee426683 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -9,11 +9,13 @@ HEAD (unreleased) End-User Summary ================ +- Fixing issue of database query in Clinvar Export feature where too large queries were created. - Fixing search feature. Full Change List ================ +- Fixing issue of database query in Clinvar Export feature where too large queries were created and postgres ran out of stack memory. - Adding more Sentry integrations (redis, celery, sqlalchemy). - Fixing search feature. diff --git a/variants/queries.py b/variants/queries.py index dbde32ed6..abb4d83bb 100644 --- a/variants/queries.py +++ b/variants/queries.py @@ -1811,6 +1811,11 @@ def run(self, *, case=None, cases=None, project=None): return self._query(case_ids) def _query(self, case_ids: typing.List[int]): + def chunks(lst, n=50): + """Yield successive n-sized chunks from lst.""" + for i in range(0, len(lst), n): + yield lst[i : i + n] + result_flags = self._query_model(SmallVariantFlags, case_ids) result_comments = self._query_model(SmallVariantComment, case_ids) result_ratings = self._query_model(AcmgCriteriaRating, case_ids) @@ -1822,17 +1827,19 @@ def _query(self, case_ids: typing.List[int]): ) ) - keys = ["case_id", "release", "chromosome", "start", "reference", "alternative"] - stmt = ( - select([SmallVariant.sa.id]) - .select_from(SmallVariant.sa.table) - .where( - tuple_(*[getattr(SmallVariant.sa, key) for key in keys]).in_( - [[getattr(k, key) for key in keys] for k in variant_keys] + small_var_ids = [] + for variant_keys_chunk in chunks(variant_keys): + keys = ["case_id", "release", "chromosome", "start", "reference", "alternative"] + stmt = ( + select([SmallVariant.sa.id]) + .select_from(SmallVariant.sa.table) + .where( + tuple_(*[getattr(SmallVariant.sa, key) for key in keys]).in_( + [[getattr(k, key) for key in keys] for k in variant_keys_chunk] + ) ) ) - ) - small_var_ids = [rec.id for rec in self.engine.execute(stmt)] + small_var_ids += [rec.id for rec in self.engine.execute(stmt)] flags_ids = [x["id"] for x in result_flags] comments_ids = [x["id"] for x in result_comments]