Skip to content

Commit

Permalink
Merge 464a526 into fd45bf4
Browse files Browse the repository at this point in the history
  • Loading branch information
BinamB committed Jan 30, 2023
2 parents fd45bf4 + 464a526 commit f37b9cd
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 137 deletions.
2 changes: 2 additions & 0 deletions fence/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,8 @@ dbGaP:
# 'studyX': ['/orgA/', '/orgB/']
# 'studyX.c2': ['/orgB/', '/orgC/']
# 'studyZ': ['/orgD/']
# Additional allowed patterns for project_ids. The default value in usersync is 'phs(\d{6}) for dbgap projects'
additional_allowed_project_id_patterns: []
# Regex to match an assession number that has consent information in forms like:
# phs00301123.c999
# phs000123.v3.p1.c3
Expand Down
44 changes: 35 additions & 9 deletions fence/sync/sync_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,11 +498,8 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
# parse dbGaP sftp server information
dbgap_key = dbgap_config.get("decrypt_key", None)

self.id_patterns += (
dbgap_config.get("allowed_whitelist_patterns", [])
if dbgap_config.get("allow_non_dbGaP_whitelist", False)
else []
)
self.id_patterns += dbgap_config.get("allowed_whitelist_patterns", [])

enable_common_exchange_area_access = dbgap_config.get(
"enable_common_exchange_area_access", False
)
Expand All @@ -514,6 +511,16 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
self.logger.info(
f"using study to common exchange area mapping: {study_common_exchange_areas}"
)

project_id_patterns = [r"phs(\d{6})"]
if "additional_allowed_project_id_patterns" in dbgap_config:
patterns = dbgap_config.get("additional_allowed_project_id_patterns")
patterns = [
r"{}".format(pattern.encode().decode("unicode_escape"))
for pattern in patterns
] # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
project_id_patterns += patterns

for filepath, privileges in file_dict.items():
self.logger.info("Reading file {}".format(filepath))
if os.stat(filepath).st_size == 0:
Expand Down Expand Up @@ -542,12 +549,31 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
continue

phsid_privileges = {}
if dbgap_config.get("allow_non_dbGaP_whitelist", False):
phsid = row.get("phsid", row.get("project_id", "")).split(".")
else:
phsid = row.get("phsid", "").split(".")
phsid = row.get("phsid", row.get("project_id", "")).split(".")

dbgap_project = phsid[0]
# There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
skip = False
for pattern in project_id_patterns:
self.logger.debug(
"Checking pattern:{} with project_id:{}".format(
pattern, dbgap_project
)
)
if re.match(pattern, dbgap_project):
skip = False
break
else:
skip = True
if skip:
self.logger.warning(
"Skip processing from file {}, user {} with project {}".format(
filepath,
username,
dbgap_project,
)
)
continue
if len(phsid) > 1 and self.parse_consent_code:
consent_code = phsid[-1]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
user name, login, project_id
USER D,TESTUSERD,PROJECT-12345
USER B,TESTUSERB,PROJECT-12345
USER C,USERC,PROJECT-12345
USER C,USERC,PROJECT-12345
USER F,USERF,(888)-888-8888
213 changes: 87 additions & 126 deletions tests/dbgap_sync/test_user_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,11 @@ def test_sync_incorrect_user_yaml_file(syncer, monkeypatch, db_session):
assert syncer.arborist_client.create_policy.not_called()


@pytest.mark.parametrize("allow_non_dbgap_whitelist", [False, True])
@pytest.mark.parametrize("syncer", ["google", "cleversafe"], indirect=True)
@pytest.mark.parametrize("parse_consent_code_config", [False, True])
def test_sync(
syncer,
db_session,
allow_non_dbgap_whitelist,
storage_client,
parse_consent_code_config,
monkeypatch,
Expand All @@ -90,9 +88,6 @@ def test_sync(
syncer.dbGaP[0], "parse_consent_code", parse_consent_code_config
)
monkeypatch.setattr(syncer, "parse_consent_code", parse_consent_code_config)
monkeypatch.setitem(
syncer.dbGaP[2], "allow_non_dbGaP_whitelist", allow_non_dbgap_whitelist
)

syncer.sync()

Expand All @@ -102,129 +97,79 @@ def test_sync(
assert len(users) == 9

if parse_consent_code_config:
if allow_non_dbgap_whitelist:
user = models.query_for_user(session=db_session, username="TESTUSERD")
assert equal_project_access(
user.project_access,
{
"phs000179.c1": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)

user = models.query_for_user(session=db_session, username="TESTUSERB")
assert equal_project_access(
user.project_access,
{
"phs000178.c1": ["read", "read-storage"],
"phs000179.c1": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)

user = models.query_for_user(session=db_session, username="USERC")
assert equal_project_access(
user.project_access,
{
"phs000178.c1": ["read", "read-storage"],
"phs000178.c2": ["read", "read-storage"],
"phs000178.c999": ["read", "read-storage"],
"phs000179.c1": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)
else:
user = models.query_for_user(session=db_session, username="USERC")
assert equal_project_access(
user.project_access,
{
"phs000178.c1": ["read", "read-storage"],
"phs000178.c2": ["read", "read-storage"],
"phs000178.c999": ["read", "read-storage"],
"phs000179.c1": ["read", "read-storage"],
},
)
user = models.query_for_user(session=db_session, username="TESTUSERD")
assert equal_project_access(
user.project_access,
{
"phs000179.c1": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)

user = models.query_for_user(session=db_session, username="USERF")
assert equal_project_access(
user.project_access,
{
"phs000178.c1": ["read", "read-storage"],
"phs000178.c2": ["read", "read-storage"],
},
)
user = models.query_for_user(session=db_session, username="TESTUSERB")
assert equal_project_access(
user.project_access,
{
"phs000178.c1": ["read", "read-storage"],
"phs000179.c1": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)

user = models.query_for_user(session=db_session, username="TESTUSERB")
assert equal_project_access(
user.project_access,
{
"phs000179.c1": ["read", "read-storage"],
"phs000178.c1": ["read", "read-storage"],
},
)
user = models.query_for_user(session=db_session, username="USERC")
assert equal_project_access(
user.project_access,
{
"phs000178.c1": ["read", "read-storage"],
"phs000178.c2": ["read", "read-storage"],
"phs000178.c999": ["read", "read-storage"],
"phs000179.c1": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)
# Assertion to ensure an invalid project id is not used
user = models.query_for_user(session=db_session, username="USERF")
assert "(888)-888-8888" not in user.project_access
assert equal_project_access(
user.project_access,
{
"phs000178.c1": ["read", "read-storage"],
"phs000178.c2": ["read", "read-storage"],
},
)
else:
if allow_non_dbgap_whitelist:
user = models.query_for_user(session=db_session, username="TESTUSERD")
assert equal_project_access(
user.project_access,
{
"phs000179": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)

user = models.query_for_user(session=db_session, username="TESTUSERB")
assert equal_project_access(
user.project_access,
{
"phs000178": ["read", "read-storage"],
"phs000179": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
"TCGA-PCAWG": ["read", "read-storage"],
},
)

user = models.query_for_user(session=db_session, username="USERC")
assert equal_project_access(
user.project_access,
{
"phs000178": ["read", "read-storage"],
"phs000178": ["read", "read-storage"],
"phs000178": ["read", "read-storage"],
"phs000179": ["read", "read-storage"],
"TCGA-PCAWG": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)
else:
user = models.query_for_user(session=db_session, username="USERC")
assert equal_project_access(
user.project_access,
{
"phs000178": ["read", "read-storage"],
"TCGA-PCAWG": ["read", "read-storage"],
"phs000179": ["read", "read-storage"],
},
)
user = models.query_for_user(session=db_session, username="TESTUSERD")
assert equal_project_access(
user.project_access,
{
"phs000179": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)

user = models.query_for_user(session=db_session, username="USERF")
assert equal_project_access(
user.project_access,
{
"phs000178": ["read", "read-storage"],
"TCGA-PCAWG": ["read", "read-storage"],
},
)
user = models.query_for_user(session=db_session, username="TESTUSERB")
assert equal_project_access(
user.project_access,
{
"phs000178": ["read", "read-storage"],
"phs000179": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
"TCGA-PCAWG": ["read", "read-storage"],
},
)

user = models.query_for_user(session=db_session, username="TESTUSERB")
assert equal_project_access(
user.project_access,
{
"phs000178": ["read", "read-storage"],
"TCGA-PCAWG": ["read", "read-storage"],
"phs000179": ["read", "read-storage"],
},
)
user = models.query_for_user(session=db_session, username="USERC")
assert equal_project_access(
user.project_access,
{
"phs000178": ["read", "read-storage"],
"phs000178": ["read", "read-storage"],
"phs000178": ["read", "read-storage"],
"phs000179": ["read", "read-storage"],
"TCGA-PCAWG": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)

user = models.query_for_user(session=db_session, username="TESTUSERD")
assert user.display_name == "USER D"
Expand Down Expand Up @@ -291,6 +236,7 @@ def test_dbgap_consent_codes(
# should additionally include the study-specific exchange area access and
# access to the common exchange area
"test_common_exchange_area": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)
else:
Expand All @@ -304,6 +250,7 @@ def test_dbgap_consent_codes(
"phs000178.c1": ["read", "read-storage"],
"phs000178.c2": ["read", "read-storage"],
"phs000178.c999": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)
else:
Expand All @@ -313,6 +260,7 @@ def test_dbgap_consent_codes(
{
"phs000178": ["read", "read-storage"],
"phs000179": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)

Expand All @@ -327,7 +275,10 @@ def test_dbgap_consent_codes(
)
else:
assert equal_project_access(
user.project_access, {"phs000178": ["read", "read-storage"]}
user.project_access,
{
"phs000178": ["read", "read-storage"],
},
)

user = models.query_for_user(session=db_session, username="TESTUSERB")
Expand All @@ -337,6 +288,7 @@ def test_dbgap_consent_codes(
{
"phs000178.c1": ["read", "read-storage"],
"phs000179.c1": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)
else:
Expand All @@ -345,17 +297,26 @@ def test_dbgap_consent_codes(
{
"phs000178": ["read", "read-storage"],
"phs000179": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)

user = models.query_for_user(session=db_session, username="TESTUSERD")
if parse_consent_code_config:
assert equal_project_access(
user.project_access, {"phs000179.c1": ["read", "read-storage"]}
user.project_access,
{
"phs000179.c1": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)
else:
assert equal_project_access(
user.project_access, {"phs000179": ["read", "read-storage"]}
user.project_access,
{
"phs000179": ["read", "read-storage"],
"PROJECT-12345": ["read", "read-storage"],
},
)

resource_to_parent_paths = collections.defaultdict(list)
Expand Down
3 changes: 2 additions & 1 deletion tests/test-fence-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -413,8 +413,9 @@ dbGaP:
proxy_user: ''
protocol: 'sftp'
decrypt_key: ''
allow_non_dbGaP_whitelist: false
allowed_whitelist_patterns: ['authentication_file_PROJECT-(\d*).(csv|txt)']
# Additional allowed patterns for project_ids. The default value in usersync is 'phs(\d{6}) for dbgap projects'
additional_allowed_project_id_patterns: ['PROJECT-(\d*)']
study_to_resource_namespaces:
# non dbgap study:
'PROJECT-12345': ['']
Expand Down

0 comments on commit f37b9cd

Please sign in to comment.