From cc1f9c4d7b1ad7824aa9b2349ae89208dadff011 Mon Sep 17 00:00:00 2001 From: Rudolf Cardinal Date: Mon, 29 Nov 2021 12:11:14 +0000 Subject: [PATCH] TIMELY: minor refactoring --- .../ancillary/timely_project/ddcriteria.py | 125 ++++++++++++++++++ .../filter_cpft_rio_data_dictionary.py | 86 ++---------- 2 files changed, 133 insertions(+), 78 deletions(-) create mode 100644 crate_anon/ancillary/timely_project/ddcriteria.py diff --git a/crate_anon/ancillary/timely_project/ddcriteria.py b/crate_anon/ancillary/timely_project/ddcriteria.py new file mode 100644 index 000000000..caf5afa73 --- /dev/null +++ b/crate_anon/ancillary/timely_project/ddcriteria.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python + +""" +crate_anon/ancillary/timely_project/ddcriteria.py + +=============================================================================== + + Copyright (C) 2015-2021 Rudolf Cardinal (rudolf@pobox.com). + + This file is part of CRATE. + + CRATE is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + CRATE is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with CRATE. If not, see . + +=============================================================================== + +Helper code for MRC TIMELY project (Moore, grant MR/T046430/1). Not of general +interest. + +Helpers for data dictionary filtering. + +""" + +# ============================================================================= +# Imports +# ============================================================================= + +import re +from typing import List, Optional, Tuple + + +# ============================================================================= +# Constants +# ============================================================================= + +# Approvals are in stages. +N_STAGES = 6 + +# Arbitrary symbol that we'll use for "regex matches": +MATCHES = "≛" + + +# ============================================================================= +# Deciding about rows +# ============================================================================= + +class TableCriterion: + """ + Stores a regular expression so we can reuse it compiled for speed and view + it and its associated stage. + """ + def __init__(self, stage: Optional[int], table_regex_str: str) -> None: + assert stage is None or 1 <= stage <= N_STAGES + self.stage = stage + self.table_regex_str = table_regex_str + self.table_regex_compiled = re.compile(table_regex_str, + flags=re.IGNORECASE) + + def table_match(self, tablename: str) -> bool: + """ + Does ``tablename`` match our stored pattern? + """ + return bool(self.table_regex_compiled.match(tablename)) + + def description(self) -> str: + return f"table {MATCHES} {self.table_regex_str}" + + +class FieldCriterion(TableCriterion): + """ + As for :class:`TableCriterion`, but for both a table and a field (column) + name. + """ + def __init__(self, field_regex_str: str, **kwargs) -> None: + super().__init__(**kwargs) + self.field_regex_str = field_regex_str + self.field_regex_compiled = re.compile(field_regex_str, + flags=re.IGNORECASE) + + def table_field_match(self, tablename: str, fieldname: str) -> bool: + """ + Do both the table and field names match? + """ + return bool( + self.table_regex_compiled.match(tablename) + and self.field_regex_compiled.match(fieldname) + ) + + def description(self) -> str: + return ( + f"table {MATCHES} {self.table_regex_str}, " + f"field {MATCHES} {self.field_regex_str}" + ) + + +def add_table_criteria(criteria: List[TableCriterion], + stage: Optional[int], + regex_strings: List[str]) -> None: + """ + Appends to ``criteria``. + """ + for rs in regex_strings: + criteria.append(TableCriterion(stage=stage, table_regex_str=rs)) + + +def add_field_criteria(criteria: List[TableCriterion], + stage: Optional[int], + regex_tuples: List[Tuple[str, str]]) -> None: + """ + Appends to ``criteria``. + """ + for tablename, fieldname in regex_tuples: + criteria.append(FieldCriterion(stage=stage, + table_regex_str=tablename, + field_regex_str=fieldname)) diff --git a/crate_anon/ancillary/timely_project/filter_cpft_rio_data_dictionary.py b/crate_anon/ancillary/timely_project/filter_cpft_rio_data_dictionary.py index 73b5e9d8f..dd16d2635 100755 --- a/crate_anon/ancillary/timely_project/filter_cpft_rio_data_dictionary.py +++ b/crate_anon/ancillary/timely_project/filter_cpft_rio_data_dictionary.py @@ -42,12 +42,18 @@ import argparse import copy import logging -import re -from typing import List, Optional, Tuple +from typing import List, Optional from cardinal_pythonlib.logs import main_only_quicksetup_rootlogger from sqlalchemy.dialects.mssql.base import dialect as mssql_server_dialect +from crate_anon.ancillary.timely_project.ddcriteria import ( + add_field_criteria, + add_table_criteria, + FieldCriterion, + N_STAGES, + TableCriterion, +) from crate_anon.anonymise.config import Config from crate_anon.anonymise.dd import DataDictionary from crate_anon.anonymise.ddr import DataDictionaryRow @@ -59,82 +65,6 @@ # Deciding about rows # ============================================================================= -# Approvals are in stages. - -N_STAGES = 6 - - -class TableCriterion: - """ - Stores a regular expression so we can reuse it compiled for speed and view - it and its associated stage. - """ - def __init__(self, stage: Optional[int], table_regex_str: str) -> None: - assert stage is None or 1 <= stage <= N_STAGES - self.stage = stage - self.table_regex_str = table_regex_str - self.table_regex_compiled = re.compile(table_regex_str, - flags=re.IGNORECASE) - - def table_match(self, tablename: str) -> bool: - """ - Does ``tablename`` match our stored pattern? - """ - return bool(self.table_regex_compiled.match(tablename)) - - def description(self) -> str: - return f"table ≛ {self.table_regex_str}" - - -class FieldCriterion(TableCriterion): - """ - As for :class:`TableCriterion`, but for both a table and a field (column) - name. - """ - def __init__(self, field_regex_str: str, **kwargs) -> None: - super().__init__(**kwargs) - self.field_regex_str = field_regex_str - self.field_regex_compiled = re.compile(field_regex_str, - flags=re.IGNORECASE) - - def table_field_match(self, tablename: str, fieldname: str) -> bool: - """ - Do both the table and field names match? - """ - return bool( - self.table_regex_compiled.match(tablename) - and self.field_regex_compiled.match(fieldname) - ) - - def description(self) -> str: - return ( - f"table ≛ {self.table_regex_str}, " - f"field ≛ {self.field_regex_str}" - ) - - -def add_table_criteria(criteria: List[TableCriterion], - stage: Optional[int], - regex_strings: List[str]) -> None: - """ - Appends to ``criteria``. - """ - for rs in regex_strings: - criteria.append(TableCriterion(stage=stage, table_regex_str=rs)) - - -def add_field_criteria(criteria: List[TableCriterion], - stage: Optional[int], - regex_tuples: List[Tuple[str, str]]) -> None: - """ - Appends to ``criteria``. - """ - for tablename, fieldname in regex_tuples: - criteria.append(FieldCriterion(stage=stage, - table_regex_str=tablename, - field_regex_str=fieldname)) - - # ----------------------------------------------------------------------------- # Generic exclusions # -----------------------------------------------------------------------------