Skip to content

Commit

Permalink
Resolve Reivew
Browse files Browse the repository at this point in the history
  • Loading branch information
BinamB committed May 9, 2022
1 parent 16bbe64 commit eb16450
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 18 deletions.
2 changes: 0 additions & 2 deletions fence/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -904,8 +904,6 @@ SERVICE_ACCOUNT_LIMIT: 6
GLOBAL_PARSE_VISAS_ON_LOGIN:
# Settings for usersync with visas
USERSYNC:
# allowed list of custom project ids that can be processed by usersync
allowed_custom_project_ids: ["CUSTOM-*"]
sync_from_visas: false
# fallback to dbgap sftp when there are no valid visas for a user i.e. if they're expired or if they're malformed
fallback_to_dbgap_sftp: false
Expand Down
26 changes: 16 additions & 10 deletions fence/sync/sync_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,6 @@ def __init__(
self.folder = folder
self.sync_from_visas = sync_from_visas
self.fallback_to_dbgap_sftp = fallback_to_dbgap_sftp
self.allow_non_dbgap_whitelist = dbGaP[0].get(
"allow_non_dbgap_whitelist", False
)

self.auth_source = defaultdict(set)
# auth_source used for logging. username : [source1, source2]
Expand All @@ -341,6 +338,10 @@ def __init__(
self.storage_manager = StorageManager(
storage_credentials, logger=self.logger
)
self.id_patterns = []
self.non_dbGaP_whitelist = config.get("USERSYNC", {}).get(
"non_dbgap_whitelist_enabled", False
)

@staticmethod
def _match_pattern(filepath, id_patterns, encrypted=True):
Expand All @@ -354,9 +355,9 @@ def _match_pattern(filepath, id_patterns, encrypted=True):
Returns:
bool: whether the pattern matches
"""
id_patterns.insert(0, "phs(\d{6})")
id_patterns.append("authentication_file_phs(\d{6}).(csv|txt)")
for pattern in id_patterns:
pattern = r"authentication_file_{}.(csv|txt)".format(pattern)
pattern = r"{}".format(pattern)
if encrypted:
pattern += ".enc"
pattern += "$"
Expand Down Expand Up @@ -479,9 +480,10 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
# parse dbGaP sftp server information
dbgap_key = dbgap_config.get("decrypt_key", None)
parse_consent_code = dbgap_config.get("parse_consent_code", True)
id_patterns = (
dbgap_config.get("allowed_id_patterns", [])
if self.allow_non_dbgap_whitelist
self.id_patterns += (
dbgap_config.get("allowed_whitelist_patterns", [])
if self.non_dbGaP_whitelist
and dbgap_config.get("allow_non_dbGaP_whitelist", False)
else []
)
enable_common_exchange_area_access = dbgap_config.get(
Expand All @@ -501,7 +503,7 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
self.logger.warning("Empty file {}".format(filepath))
continue
if not self._match_pattern(
filepath, id_patterns=id_patterns, encrypted=encrypted
filepath, id_patterns=self.id_patterns, encrypted=encrypted
):
self.logger.warning(
"Filename {} does not match dbgap access control filename pattern;"
Expand All @@ -523,7 +525,11 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
continue

phsid_privileges = {}
phsid = row.get("phsid", row.get("project_id", "")).split(".")
if self.non_dbGaP_whitelist:
phsid = row.get("phsid", row.get("project_id", "")).split(".")
else:
phsid = row.get("phsid", "").split(".")

dbgap_project = phsid[0]
if len(phsid) > 1 and parse_consent_code:
consent_code = phsid[-1]
Expand Down
9 changes: 7 additions & 2 deletions tests/dbgap_sync/test_user_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,12 @@ def test_sync(
syncer.dbGaP[0], "parse_consent_code", parse_consent_code_config
)
monkeypatch.setattr(syncer, "parse_consent_code", parse_consent_code_config)
monkeypatch.setattr(syncer, "allow_non_dbgap_whitelist", allow_non_dbgap_whitelist)
monkeypatch.setitem(
syncer.dbGaP[2], "allow_non_dbGaP_whitelist", allow_non_dbgap_whitelist
)
monkeypatch.setattr(syncer, "non_dbGaP_whitelist", allow_non_dbgap_whitelist)
# merge sftp server containing both dbgap and non-dbgap whitelists
monkeypatch.setattr(syncer, "is_sync_from_dbgap_server", allow_non_dbgap_whitelist)

syncer.sync()

Expand Down Expand Up @@ -683,7 +688,7 @@ def mock_merge(dbgap_servers, sess):

# this function will be called once for each sftp server
# the test config file has 2 dbgap sftp servers
assert syncer._process_dbgap_files.call_count == 2
assert syncer._process_dbgap_files.call_count == 3


@pytest.mark.parametrize("syncer", ["cleversafe", "google"], indirect=True)
Expand Down
27 changes: 23 additions & 4 deletions tests/test-fence-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,6 @@ dbGaP:
proxy_user: ''
protocol: 'sftp'
decrypt_key: ''
allow_non_dbGaP_whitelist: false
allowed_id_patterns: ['PROJECT-12345']
# parse out the consent from the dbgap accession number such that something
# like "phs000123.v1.p1.c2" becomes "phs000123.c2".
#
Expand Down Expand Up @@ -370,8 +368,6 @@ dbGaP:
'phs000178': ['/orgA/', '/orgB/', '/']
# study when parsing consent codes
'phs000178.c2': ['/orgA/', '/orgB/', '/']
# non dbgap study:
'PROJECT-12345': ['']
- info:
host: ''
username: ''
Expand Down Expand Up @@ -408,6 +404,20 @@ dbGaP:
'phs000178': ['/orgA/', '/orgB/', '/']
# study when parsing consent codes
'phs000178.c2': ['/orgA/', '/orgB/', '/']
- info:
host: ''
username: ''
password: ''
port: 22
proxy: ''
proxy_user: ''
protocol: 'sftp'
decrypt_key: ''
allow_non_dbGaP_whitelist: true
allowed_whitelist_patterns: ['authentication_file_PROJECT-(\d*).(csv|txt)']
study_to_resource_namespaces:
# non dbgap study:
'PROJECT-12345': ['']
# Regex to match an assession number that has consent information in forms like:
# phs00301123.c999
# phs000123.v3.p1.c3
Expand Down Expand Up @@ -639,3 +649,12 @@ ASSUME_ROLE_CACHE_SECONDS: 1800
# List of JWT issuers from which Fence will accept GA4GH visas
GA4GH_VISA_ISSUER_ALLOWLIST:
- 'https://stsstg.nih.gov'

USERSYNC:
non_dbgap_whitelist_enabled: false
sync_from_visas: false
# fallback to dbgap sftp when there are no valid visas for a user i.e. if they're expired or if they're malformed
fallback_to_dbgap_sftp: false
visa_types:
ras: ["https://ras.nih.gov/visas/v1", "https://ras.nih.gov/visas/v1.1"]
RAS_USERINFO_ENDPOINT: '/openid/connect/v1.1/userinfo'

0 comments on commit eb16450

Please sign in to comment.