Skip to content

Commit

Permalink
Merge 13accc6 into cab1b90
Browse files Browse the repository at this point in the history
  • Loading branch information
BinamB committed Jan 19, 2023
2 parents cab1b90 + 13accc6 commit 873c9db
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 140 deletions.
2 changes: 2 additions & 0 deletions fence/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,8 @@ dbGaP:
# 'studyX': ['/orgA/', '/orgB/']
# 'studyX.c2': ['/orgB/', '/orgC/']
# 'studyZ': ['/orgD/']
# Additional allowed patterns for project_ids. The default value in usersync is 'phs(\d{6}) for dbgap projects'
additional_allowed_project_id_patterns: []
# Regex to match an assession number that has consent information in forms like:
# phs00301123.c999
# phs000123.v3.p1.c3
Expand Down
8 changes: 6 additions & 2 deletions fence/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,14 @@ def post_process(self):

# allow setting DB connection string via env var
if os.environ.get("DB"):
logger.info("Found environment variable 'DB': overriding 'DB' field from config file")
logger.info(
"Found environment variable 'DB': overriding 'DB' field from config file"
)
self["DB"] = os.environ["DB"]
else:
logger.info("Environment variable 'DB' empty or not set: using 'DB' field from config file")
logger.info(
"Environment variable 'DB' empty or not set: using 'DB' field from config file"
)

if "ROOT_URL" not in self._configs and "BASE_URL" in self._configs:
url = urllib.parse.urlparse(self._configs["BASE_URL"])
Expand Down
2 changes: 1 addition & 1 deletion fence/resources/openid/idp_oauth2.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def get_access_token(self, user, token_endpoint, db_session=None):
expires = None

# get refresh_token and expiration from db
for row in sorted(user.upstream_refresh_tokens, key=lambda row:row.expires):
for row in sorted(user.upstream_refresh_tokens, key=lambda row: row.expires):
refresh_token = row.refresh_token
expires = row.expires

Expand Down
44 changes: 35 additions & 9 deletions fence/sync/sync_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,11 +498,8 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
# parse dbGaP sftp server information
dbgap_key = dbgap_config.get("decrypt_key", None)

self.id_patterns += (
dbgap_config.get("allowed_whitelist_patterns", [])
if dbgap_config.get("allow_non_dbGaP_whitelist", False)
else []
)
self.id_patterns += dbgap_config.get("allowed_whitelist_patterns", [])

enable_common_exchange_area_access = dbgap_config.get(
"enable_common_exchange_area_access", False
)
Expand All @@ -514,6 +511,16 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
self.logger.info(
f"using study to common exchange area mapping: {study_common_exchange_areas}"
)

project_id_patterns = [r"phs(\d{6})"]
if "additional_allowed_project_id_patterns" in dbgap_config:
patterns = dbgap_config.get("additional_allowed_project_id_patterns")
patterns = [
r"{}".format(pattern.encode().decode("unicode_escape"))
for pattern in patterns
] # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
project_id_patterns += patterns

for filepath, privileges in file_dict.items():
self.logger.info("Reading file {}".format(filepath))
if os.stat(filepath).st_size == 0:
Expand Down Expand Up @@ -542,12 +549,31 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
continue

phsid_privileges = {}
if dbgap_config.get("allow_non_dbGaP_whitelist", False):
phsid = row.get("phsid", row.get("project_id", "")).split(".")
else:
phsid = row.get("phsid", "").split(".")
phsid = row.get("phsid", row.get("project_id", "")).split(".")

dbgap_project = phsid[0]
# There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
skip = False
for pattern in project_id_patterns:
self.logger.debug(
"Checking pattern:{} with project_id:{}".format(
pattern, dbgap_project
)
)
if re.match(pattern, dbgap_project):
skip = False
break
else:
skip = True
if skip:
self.logger.warning(
"Skip processing from file {}, user {} with project {}".format(
filepath,
username,
dbgap_project,
)
)
continue
if len(phsid) > 1 and self.parse_consent_code:
consent_code = phsid[-1]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
user name, login, project_id
USER D,TESTUSERD,PROJECT-12345
USER B,TESTUSERB,PROJECT-12345
USER C,USERC,PROJECT-12345
USER C,USERC,PROJECT-12345
USER F,USERF,(888)-888-8888
Loading

0 comments on commit 873c9db

Please sign in to comment.