Skip to content

Commit

Permalink
(PXP-10410): Add Project ID format check in usersync (#1067)
Browse files Browse the repository at this point in the history
  • Loading branch information
BinamB committed Feb 23, 2023
1 parent e991cfc commit 8bdb967
Show file tree
Hide file tree
Showing 14 changed files with 100 additions and 20 deletions.
4 changes: 3 additions & 1 deletion fence/blueprints/login/ras.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,9 @@ def post_login(self, user=None, token_result=None, id_from_idp=None):
refresh_token = flask.g.tokens["refresh_token"]
assert "id_token" in flask.g.tokens, "No id_token in user tokens"
id_token = flask.g.tokens["id_token"]
decoded_id = jwt.decode(id_token, algorithms=["RS256"], options={"verify_signature": False})
decoded_id = jwt.decode(
id_token, algorithms=["RS256"], options={"verify_signature": False}
)

# Add 15 days to iat to calculate refresh token expiration time
# TODO do they really not provide exp?
Expand Down
11 changes: 11 additions & 0 deletions fence/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,13 @@ dbGaP:
proxy_user: ''
protocol: 'sftp'
decrypt_key: ''
# allow non_dbgap_whitelist allows you to read a non-dbgap sftp server.
# by default Fence only allows usersync to read a dbgap whitelist with
# the format authentication_file_phs000123.csv enabling this with addition to
# providing a list of additional_allowed_project_id_patterns allows usersyn to
# read any filename that matches the patterns in the list.
allow_non_dbGaP_whitelist: false
additional_allowed_project_id_patterns: []
# parse out the consent from the dbgap accession number such that something
# like "phs000123.v1.p1.c2" becomes "phs000123.c2".
#
Expand Down Expand Up @@ -551,6 +558,10 @@ dbGaP:
# 'studyX': ['/orgA/', '/orgB/']
# 'studyX.c2': ['/orgB/', '/orgC/']
# 'studyZ': ['/orgD/']
# Allowed patterns for project_ids. The default value in usersync is 'phs(\d{6}) for dbgap projects'
allowed_project_id_patterns: []
# Additional allowed patterns for project_ids. The default value in usersync is 'phs(\d{6}) for dbgap projects'
additional_allowed_project_id_patterns: []
# Regex to match an assession number that has consent information in forms like:
# phs00301123.c999
# phs000123.v3.p1.c3
Expand Down
8 changes: 4 additions & 4 deletions fence/jwt/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ def validate_jwt(
if oidc_iss:
issuers.append(oidc_iss)
try:
token_iss = jwt.decode(encoded_token, algorithms=["RS256"], options={"verify_signature": False}).get(
"iss"
)
token_iss = jwt.decode(
encoded_token, algorithms=["RS256"], options={"verify_signature": False}
).get("iss")
except jwt.InvalidTokenError as e:
raise JWTError(e)
attempt_refresh = attempt_refresh and (token_iss != iss)
Expand Down Expand Up @@ -176,7 +176,7 @@ def validate_jwt(
##### end refresh token, API key patch block #####
msg = "Invalid token : {}".format(str(e))
unverified_claims = jwt.decode(
encoded_token, algorithms=["RS256"], options={"verify_signature": False}
encoded_token, algorithms=["RS256"], options={"verify_signature": False}
)
if not unverified_claims.get("scope") or "" in unverified_claims["scope"]:
msg += "; was OIDC client configured with scopes?"
Expand Down
4 changes: 3 additions & 1 deletion fence/sync/passport_sync/ras_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ def _parse_single_visa(self, user, encoded_visa, expires, parse_consent_code):

# do not verify again, assume this happens upstream
# note that this can fail, upstream should handle the case that parsing fails
decoded_visa = jwt.decode(encoded_visa, algorithms=["RS256"], options={"verify_signature": False})
decoded_visa = jwt.decode(
encoded_visa, algorithms=["RS256"], options={"verify_signature": False}
)

ras_dbgap_permissions = decoded_visa.get("ras_dbgap_permissions", [])
project = {}
Expand Down
33 changes: 33 additions & 0 deletions fence/sync/sync_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
if dbgap_config.get("allow_non_dbGaP_whitelist", False)
else []
)

enable_common_exchange_area_access = dbgap_config.get(
"enable_common_exchange_area_access", False
)
Expand All @@ -514,6 +515,16 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
self.logger.info(
f"using study to common exchange area mapping: {study_common_exchange_areas}"
)

project_id_patterns = [r"phs(\d{6})"]
if "additional_allowed_project_id_patterns" in dbgap_config:
patterns = dbgap_config.get("additional_allowed_project_id_patterns")
patterns = [
r"{}".format(pattern.encode().decode("unicode_escape"))
for pattern in patterns
] # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match
project_id_patterns += patterns

for filepath, privileges in file_dict.items():
self.logger.info("Reading file {}".format(filepath))
if os.stat(filepath).st_size == 0:
Expand Down Expand Up @@ -548,6 +559,28 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
phsid = row.get("phsid", "").split(".")

dbgap_project = phsid[0]
# There are issues where dbgap has a wrong entry in their whitelist. Since we do a bulk arborist request, there are wrong entries in it that invalidates the whole request causing other correct entries not to be added
skip = False
for pattern in project_id_patterns:
self.logger.debug(
"Checking pattern:{} with project_id:{}".format(
pattern, dbgap_project
)
)
if re.match(pattern, dbgap_project):
skip = False
break
else:
skip = True
if skip:
self.logger.warning(
"Skip processing from file {}, user {} with project {}".format(
filepath,
username,
dbgap_project,
)
)
continue
if len(phsid) > 1 and self.parse_consent_code:
consent_code = phsid[-1]

Expand Down
4 changes: 1 addition & 3 deletions tests/admin/test_admin_users_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@ def encoded_admin_jwt(kid, rsa_private_key):
claims["iss"] = config["BASE_URL"]
claims["exp"] += 600
claims["scope"].append("admin")
return jwt.encode(
claims, key=rsa_private_key, headers=headers, algorithm="RS256"
)
return jwt.encode(claims, key=rsa_private_key, headers=headers, algorithm="RS256")


# Dictionary for all these random magic numbers that the delete user
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
user name, login, project_id
USER D,TESTUSERD,PROJECT-12345
USER B,TESTUSERB,PROJECT-12345
USER C,USERC,PROJECT-12345
USER C,USERC,PROJECT-12345
USER F,USERF,(888)-888-8888
15 changes: 12 additions & 3 deletions tests/dbgap_sync/test_user_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,10 @@ def test_dbgap_consent_codes(
)
else:
assert equal_project_access(
user.project_access, {"phs000178": ["read", "read-storage"]}
user.project_access,
{
"phs000178": ["read", "read-storage"],
},
)

user = models.query_for_user(session=db_session, username="TESTUSERB")
Expand All @@ -351,11 +354,17 @@ def test_dbgap_consent_codes(
user = models.query_for_user(session=db_session, username="TESTUSERD")
if parse_consent_code_config:
assert equal_project_access(
user.project_access, {"phs000179.c1": ["read", "read-storage"]}
user.project_access,
{
"phs000179.c1": ["read", "read-storage"],
},
)
else:
assert equal_project_access(
user.project_access, {"phs000179": ["read", "read-storage"]}
user.project_access,
{
"phs000179": ["read", "read-storage"],
},
)

resource_to_parent_paths = collections.defaultdict(list)
Expand Down
4 changes: 3 additions & 1 deletion tests/jwt/test_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def test_passport_access_token(app, kid, rsa_private_key, test_user_a):
user=test_user_a,
client_id="client_a",
)
payload = jwt.decode(jwt_token.token, options={"verify_signature": False}, algorithms=["RS256"])
payload = jwt.decode(
jwt_token.token, options={"verify_signature": False}, algorithms=["RS256"]
)
# assert required fields exist
assert payload["iss"] is not None or ""
assert payload["sub"] is not None or ""
Expand Down
8 changes: 6 additions & 2 deletions tests/link/test_link_id_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ def test_google_id_token_not_linked(oauth_test_client):
data = {"confirm": "yes"}
oauth_test_client.authorize(data=data)
tokens = oauth_test_client.token()
id_token = jwt.decode(tokens.id_token, options={"verify_signature": False}, algorithms=["RS256"])
id_token = jwt.decode(
tokens.id_token, options={"verify_signature": False}, algorithms=["RS256"]
)
assert id_token["context"]["user"].get("google") is None


Expand Down Expand Up @@ -48,7 +50,9 @@ def test_google_id_token_linked(db_session, encoded_creds_jwt, oauth_test_client
data = {"confirm": "yes"}
oauth_test_client.authorize(data=data)
tokens = oauth_test_client.token()
id_token = jwt.decode(tokens.id_token, options={"verify_signature": False}, algorithms=["RS256"])
id_token = jwt.decode(
tokens.id_token, options={"verify_signature": False}, algorithms=["RS256"]
)

assert "google" in id_token["context"]["user"]
assert (
Expand Down
12 changes: 10 additions & 2 deletions tests/rfc6749/test_oauth2.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ def test_oauth2_token_post(oauth_test_client):
assert "expires_in" in response
assert response.get("token_type") == "Bearer"

payload = jwt.decode(response["access_token"], options={"verify_signature": False}, algorithms=["RS256"])
payload = jwt.decode(
response["access_token"],
options={"verify_signature": False},
algorithms=["RS256"],
)
assert payload.get("iss") == "http://localhost/user"
assert payload.get("azp") == oauth_test_client.client_id
assert "context" in payload
Expand Down Expand Up @@ -124,7 +128,11 @@ def test_oauth2_with_client_credentials(
assert "expires_in" in response
assert response.get("token_type") == "Bearer"

payload = jwt.decode(response["access_token"], options={"verify_signature": False}, algorithms=["RS256"])
payload = jwt.decode(
response["access_token"],
options={"verify_signature": False},
algorithms=["RS256"],
)
assert payload.get("iss") == "http://localhost/user"
assert payload.get("azp") == oauth_test_client_with_client_credentials.client_id
assert payload.get("context") == {} # no user linked to this token
Expand Down
8 changes: 7 additions & 1 deletion tests/rfc6749/test_revoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@ def test_blacklisted_token(client, oauth_client, encoded_jwt_refresh_token):
print(encoded_jwt_refresh_token)
import jwt

print(jwt.decode(encoded_jwt_refresh_token, options={"verify_signature": False}, algorithms=["RS256"]))
print(
jwt.decode(
encoded_jwt_refresh_token,
options={"verify_signature": False},
algorithms=["RS256"],
)
)
assert response.status_code == 200, response.data
assert is_token_blacklisted(encoded_jwt_refresh_token)

Expand Down
2 changes: 2 additions & 0 deletions tests/test-fence-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,8 @@ dbGaP:
decrypt_key: ''
allow_non_dbGaP_whitelist: false
allowed_whitelist_patterns: ['authentication_file_PROJECT-(\d*).(csv|txt)']
# Additional allowed patterns for project_ids. The default value in usersync is 'phs(\d{6}) for dbgap projects'
additional_allowed_project_id_patterns: ['PROJECT-(\d*)']
study_to_resource_namespaces:
# non dbgap study:
'PROJECT-12345': ['']
Expand Down
4 changes: 3 additions & 1 deletion tests/test_audit_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,9 @@ def test_login_log_login_endpoint(
idp_name = idp
headers = {}
get_user_id_value = {}
jwt_string = jwt.encode({"iat": int(time.time())}, key=rsa_private_key, algorithm="RS256")
jwt_string = jwt.encode(
{"iat": int(time.time())}, key=rsa_private_key, algorithm="RS256"
)

if idp == "synapse":
get_user_id_value = {
Expand Down

0 comments on commit 8bdb967

Please sign in to comment.