diff --git a/.secrets.baseline b/.secrets.baseline index 3b595fd2f..04fb8f259 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -22,7 +22,7 @@ }, { "name": "HexHighEntropyString", - "limit": 3.0 + "limit": 3 }, { "name": "IbmCloudIamDetector" @@ -100,16 +100,33 @@ }, { "path": "detect_secrets.filters.heuristic.is_templated_secret" + }, + { + "path": "detect_secrets.filters.regex.should_exclude_file", + "pattern": [ + "poetry.lock" + ] } ], "results": { + "deployment/scripts/postgresql/postgresql_init.sql": [ + { + "type": "Secret Keyword", + "filename": "deployment/scripts/postgresql/postgresql_init.sql", + "hashed_secret": "afc848c316af1a89d49826c5ae9d00ed769415f3", + "is_verified": false, + "line_number": 7, + "is_secret": false + } + ], "fence/blueprints/storage_creds/google.py": [ { "type": "Private Key", "filename": "fence/blueprints/storage_creds/google.py", "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", "is_verified": false, - "line_number": 139 + "line_number": 139, + "is_secret": false } ], "fence/blueprints/storage_creds/other.py": [ @@ -118,14 +135,16 @@ "filename": "fence/blueprints/storage_creds/other.py", "hashed_secret": "98c144f5ecbb4dbe575147a39698b6be1a5649dd", "is_verified": false, - "line_number": 66 + "line_number": 66, + "is_secret": false }, { "type": "Secret Keyword", "filename": "fence/blueprints/storage_creds/other.py", "hashed_secret": "98c144f5ecbb4dbe575147a39698b6be1a5649dd", "is_verified": false, - "line_number": 66 + "line_number": 66, + "is_secret": false } ], "fence/config-default.yaml": [ @@ -134,7 +153,8 @@ "filename": "fence/config-default.yaml", "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", "is_verified": false, - "line_number": 31 + "line_number": 31, + "is_secret": false } ], "fence/local_settings.example.py": [ @@ -143,14 +163,16 @@ "filename": "fence/local_settings.example.py", "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", "is_verified": false, - "line_number": 6 + "line_number": 6, + "is_secret": false }, { "type": "Secret Keyword", "filename": "fence/local_settings.example.py", "hashed_secret": "5d07e1b80e448a213b392049888111e1779a52db", "is_verified": false, - "line_number": 63 + "line_number": 63, + "is_secret": false } ], "fence/resources/google/utils.py": [ @@ -159,7 +181,7 @@ "filename": "fence/resources/google/utils.py", "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", "is_verified": false, - "line_number": 125 + "line_number": 129 } ], "fence/utils.py": [ @@ -168,44 +190,24 @@ "filename": "fence/utils.py", "hashed_secret": "8318df9ecda039deac9868adf1944a29a95c7114", "is_verified": false, - "line_number": 105 - }, - { - "type": "Secret Keyword", - "filename": "fence/utils.py", - "hashed_secret": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8", - "is_verified": false, - "line_number": 249 - }, - { - "type": "Secret Keyword", - "filename": "fence/utils.py", - "hashed_secret": "8954f53c9dc3f57137230a016d65bfaee24f8bc5", - "is_verified": false, - "line_number": 250 + "line_number": 105, + "is_secret": false } ], "tests/conftest.py": [ - { - "type": "Secret Keyword", - "filename": "tests/conftest.py", - "hashed_secret": "9801ff058ba790388c9efc095cb3e89a819d5ed6", - "is_verified": false, - "line_number": 160 - }, { "type": "Private Key", "filename": "tests/conftest.py", "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", "is_verified": false, - "line_number": 1358 + "line_number": 1482 }, { "type": "Base64 High Entropy String", "filename": "tests/conftest.py", "hashed_secret": "227dea087477346785aefd575f91dd13ab86c108", "is_verified": false, - "line_number": 1381 + "line_number": 1505 } ], "tests/credentials/google/test_credentials.py": [ @@ -214,21 +216,24 @@ "filename": "tests/credentials/google/test_credentials.py", "hashed_secret": "a06bdb09c0106ab559bd6acab2f1935e19f7e939", "is_verified": false, - "line_number": 381 + "line_number": 381, + "is_secret": false }, { "type": "Secret Keyword", "filename": "tests/credentials/google/test_credentials.py", "hashed_secret": "93aa43c580f5347782e17fba5091f944767b15f0", "is_verified": false, - "line_number": 474 + "line_number": 474, + "is_secret": false }, { "type": "Secret Keyword", "filename": "tests/credentials/google/test_credentials.py", "hashed_secret": "768b7fe00de4fd233c0c72375d12f87ce9670144", "is_verified": false, - "line_number": 476 + "line_number": 476, + "is_secret": false } ], "tests/keys/2018-05-01T21:29:02Z/jwt_private_key.pem": [ @@ -237,7 +242,8 @@ "filename": "tests/keys/2018-05-01T21:29:02Z/jwt_private_key.pem", "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", "is_verified": false, - "line_number": 1 + "line_number": 1, + "is_secret": false } ], "tests/login/test_fence_login.py": [ @@ -246,7 +252,8 @@ "filename": "tests/login/test_fence_login.py", "hashed_secret": "d300421e208bfd0d432294de15169fd9b8975def", "is_verified": false, - "line_number": 48 + "line_number": 48, + "is_secret": false } ], "tests/ras/test_ras.py": [ @@ -255,16 +262,7 @@ "filename": "tests/ras/test_ras.py", "hashed_secret": "d9db6fe5c14dc55edd34115cdf3958845ac30882", "is_verified": false, - "line_number": 95 - } - ], - "tests/scripting/test_fence-create.py": [ - { - "type": "Secret Keyword", - "filename": "tests/scripting/test_fence-create.py", - "hashed_secret": "e5e9fa1ba31ecd1ae84f75caaa474f3a663f05f4", - "is_verified": false, - "line_number": 1122 + "line_number": 120 } ], "tests/test-fence-config.yaml": [ diff --git a/bin/fence_create.py b/bin/fence_create.py index 1c32f1541..c906f1731 100755 --- a/bin/fence_create.py +++ b/bin/fence_create.py @@ -17,6 +17,8 @@ create_sample_data, delete_client_action, delete_users, + delete_expired_google_access, + cleanup_expired_ga4gh_information, google_init, list_client_action, link_external_bucket, @@ -33,7 +35,7 @@ force_update_google_link, migrate_database, google_list_authz_groups, - update_user_visas, + access_token_polling_job, ) from fence.settings import CONFIG_SEARCH_FOLDERS @@ -147,6 +149,8 @@ def parse_arguments(): subparsers.add_parser("expired-service-account-delete") subparsers.add_parser("bucket-access-group-verify") + subparsers.add_parser("delete-expired-google-access") + subparsers.add_parser("cleanup-expired-ga4gh-information") hmac_create = subparsers.add_parser("hmac-create") hmac_create.add_argument("yaml-input") @@ -405,9 +409,6 @@ def main(): STORAGE_CREDENTIALS = os.environ.get("STORAGE_CREDENTIALS") or config.get( "STORAGE_CREDENTIALS" ) - usersync = config.get("USERSYNC", {}) - sync_from_visas = usersync.get("sync_from_visas", False) - fallback_to_dbgap_sftp = usersync.get("fallback_to_dbgap_sftp", False) arborist = None if args.arborist: @@ -459,6 +460,10 @@ def main(): delete_expired_service_accounts(DB) elif args.action == "bucket-access-group-verify": verify_bucket_access_group(DB) + elif args.action == "delete-expired-google-access": + delete_expired_google_access(DB) + elif args.action == "cleanup-expired-ga4gh-information": + cleanup_expired_ga4gh_information(DB) elif args.action == "sync": sync_users( dbGaP, @@ -470,8 +475,6 @@ def main(): sync_from_local_yaml_file=args.yaml, folder=args.folder, arborist=arborist, - sync_from_visas=sync_from_visas, - fallback_to_dbgap_sftp=fallback_to_dbgap_sftp, ) elif args.action == "dbgap-download-access-files": download_dbgap_files( @@ -572,7 +575,7 @@ def main(): elif args.action == "migrate": migrate_database(DB) elif args.action == "update-visas": - update_user_visas( + access_token_polling_job( DB, chunk_size=args.chunk_size, concurrency=args.concurrency, diff --git a/docs/ga4gh_passports.md b/docs/ga4gh_passports.md new file mode 100644 index 000000000..0e6f0da21 --- /dev/null +++ b/docs/ga4gh_passports.md @@ -0,0 +1,73 @@ +# Passport Support in Gen3 Framework Services (G3FS) + +G3FS will support a data access flow accepting Global Alliance for Genomics and Health (GA4GH) Passport(s) as means of authentication and authorization to access file objects. + +For National Institutes of Health (NIH) data, we will no longer rely on dbGaP User Access Telemetry files from the hourly usersync for authorization, but instead on NIH's Researcher Auth Service (RAS) Passports. + +The adoption of GA4GH specifications across NIH-funded Platforms is a strategic initiative that is pushed for on numerous fronts. + +> Our overall goal is interoperability through accepted standards (like GA4GH). + +As we are a GA4GH Driver Project, throughout the process of implementing passport support, we have identified numerous gaps and concerns with GA4GH’s specifications along the way. We are at a point now where most have been either addressed, waived, risks accepted, or solutions punted to a future version. There are ongoing discussions about modifications for the future. + +Please refer to official documentation about RAS Milestones for all historic and official decisions and designs related to RAS. This document will serve as an **unofficial technical overview** to maintainers of Gen3 and **may not be updated as regularly or represented as clearly as other public facing documents**. + +## Passport and Visa JSON Web Token (JWT) Handling + +Overview of the standards-based verification and validation flow for JWTs. + +References: + +* [GA4GH AAI](https://github.com/ga4gh/data-security/blob/master/AAI/AAIConnectProfile.md) +* [GA4GH Passport](https://github.com/ga4gh-duri/ga4gh-duri.github.io/blob/master/researcher_ids/ga4gh_passport_v1.md) +* [OpenID Connect Core](https://openid.net/specs/openid-connect-core-1_0.html) +* [Internet Engineering Task Force (IETF) RCF: JSON Web Token (JWT)](https://datatracker.ietf.org/doc/html/rfc7519) + +This shows external DRS Client(s) communicating with Gen3 Framework Services (as a GA4GH DRS Server) and how G3FS interacts with Passport Brokers to validate and verify JWTs. + +![Passport and Visa JWT Handling](images/ga4gh/passport_jwt_handling.png) + +## G3FS: Configurable Roles for Data Access + +Gen3 Framework Services are capable of acting in many different roles. As data repositories (or DRS Servers in GA4GH terminology), as authorization decision makers (GA4GH Claims Clearinghouses), and/or as token issuers (GA4GH Passport Brokers). G3FS is also capable of being a client to other Passport Brokers. G3FS must be a client to an upstream Identity Provider (IdP) as it does not ever store user passwords but relies on authentication from another trusted source. + +In order to describe the role of the passport in these various configurations, the following diagrams may help. + +![Gen3 as DRS Server](images/ga4gh/gen3_as_drs.png) + +![Gen3 as Client](images/ga4gh/gen3_as_client.png) + +![Gen3 as Both](images/ga4gh/gen3_as_client_and_drs_server.png) + +## Performance Improvements + +In some respect, the support for passports required an auth re-architecture to: + +1. accept third-party generated token(s) to be a source of truth for authentication and authorization +2. parse that authorization information at the time of data access request (rather than synced before) + +Passports can be provided to our data access APIs before we've ever seen that user, whereas previously we used to bulk sync all authorization **before** data access (behind the scenes as a cronjob). Becuase of this new, dynamic authorization decision making upon data requests, we knew that we'd need to take extra steps to ensure non-degraded performance. + +We added a number of things to mitigate the performance impact on researchers' workflows. Most notably, we introduced a cache for valid passports such that when we recieve thousands of requests to access data and the _exact same_ passport is sent thousands of times over a few minutes, we are able to validate and parse it once and rely on that for subsequent requests. The cache only lives as long as policy and standards allow (which is usually less than an hour). + +To illustrate the need for such a cache, see the images below for before and after. + +![Before Caching](images/ga4gh/caching_before.png) + +![After Caching](images/ga4gh/caching_after.png) + +## User Identities + +Different GA4GH Visas may refer to the same subject differently. In order to maintain the known mappings between different representations of the same identity, we are creating an Issuer+Subject to User mapping table. The primary key on this table is the combination of the `iss` and `sub` from JWTs. + +![User Identities](images/ga4gh/users.png) + +## Backend Updates and Expiration + +In order to ensure the removal of access at the right time, the cronjobs we have are updated based on the figure and notes below. We are requiring movement away from the deprecated, legacy, limited Fence authorization support in favor of the new policy engine (which allows expiration of policies out of the box). + +There is an argument here for event-based architecture, but Gen3 does not currently support such an architecture. We are instead extending the support of our cronjobs to ensure expirations occur at the right time. + +![Cronjobs and Expirations](images/ga4gh/expiration.png) + +> _All diagrams are originally from an **internal** CTDS Document. The link to that document is [here](https://lucid.app/lucidchart/5c52b868-5cd2-4c6e-b53b-de2981f7da98/edit?invitationId=inv_9a757cb1-fc81-4189-934d-98c3db06d2fc) for internal people who need to edit the above diagrams._ diff --git a/docs/images/ga4gh/caching_after.png b/docs/images/ga4gh/caching_after.png new file mode 100644 index 000000000..e64dbc998 Binary files /dev/null and b/docs/images/ga4gh/caching_after.png differ diff --git a/docs/images/ga4gh/caching_before.png b/docs/images/ga4gh/caching_before.png new file mode 100644 index 000000000..d7371f06e Binary files /dev/null and b/docs/images/ga4gh/caching_before.png differ diff --git a/docs/images/ga4gh/expiration.png b/docs/images/ga4gh/expiration.png new file mode 100644 index 000000000..87e269e58 Binary files /dev/null and b/docs/images/ga4gh/expiration.png differ diff --git a/docs/images/ga4gh/gen3_as_client.png b/docs/images/ga4gh/gen3_as_client.png new file mode 100644 index 000000000..009919ee5 Binary files /dev/null and b/docs/images/ga4gh/gen3_as_client.png differ diff --git a/docs/images/ga4gh/gen3_as_client_and_drs_server.png b/docs/images/ga4gh/gen3_as_client_and_drs_server.png new file mode 100644 index 000000000..b65cd3ae2 Binary files /dev/null and b/docs/images/ga4gh/gen3_as_client_and_drs_server.png differ diff --git a/docs/images/ga4gh/gen3_as_drs.png b/docs/images/ga4gh/gen3_as_drs.png new file mode 100644 index 000000000..920b42236 Binary files /dev/null and b/docs/images/ga4gh/gen3_as_drs.png differ diff --git a/docs/images/ga4gh/passport_jwt_handling.png b/docs/images/ga4gh/passport_jwt_handling.png new file mode 100644 index 000000000..ef922241c Binary files /dev/null and b/docs/images/ga4gh/passport_jwt_handling.png differ diff --git a/docs/images/ga4gh/passport_to_drs_flow.png b/docs/images/ga4gh/passport_to_drs_flow.png new file mode 100644 index 000000000..31248c15b Binary files /dev/null and b/docs/images/ga4gh/passport_to_drs_flow.png differ diff --git a/docs/images/ga4gh/users.png b/docs/images/ga4gh/users.png new file mode 100644 index 000000000..37902cecf Binary files /dev/null and b/docs/images/ga4gh/users.png differ diff --git a/fence/__init__.py b/fence/__init__.py index 4a00ea14c..2444b5368 100755 --- a/fence/__init__.py +++ b/fence/__init__.py @@ -14,13 +14,27 @@ from werkzeug.middleware.dispatcher import DispatcherMiddleware from azure.storage.blob import BlobServiceClient from azure.core.exceptions import ResourceNotFoundError +from urllib.parse import urlparse + +# Can't read config yet. Just set to debug for now, else no handlers. +# Later, in app_config(), will actually set level based on config +logger = get_logger(__name__, log_level="debug") + +# Load the configuration *before* importing modules that rely on it +from fence.config import config +from fence.settings import CONFIG_SEARCH_FOLDERS + +config.load( + config_path=os.environ.get("FENCE_CONFIG_PATH"), + search_folders=CONFIG_SEARCH_FOLDERS, +) from fence.auth import logout, build_redirect_url from fence.blueprints.data.indexd import S3IndexedFileLocation from fence.blueprints.login.utils import allowed_login_redirects, domain from fence.errors import UserError from fence.jwt import keys -from fence.models import migrate +from fence.models import migrate, IdentityProvider from fence.oidc.client import query_client from fence.oidc.server import server from fence.resources.audit.client import AuditServiceClient @@ -38,8 +52,6 @@ from fence.resources.user.user_session import UserSessionInterface from fence.error_handler import get_error_response from fence.utils import random_str -from fence.config import config -from fence.settings import CONFIG_SEARCH_FOLDERS import fence.blueprints.admin import fence.blueprints.data import fence.blueprints.login @@ -60,10 +72,6 @@ PROMETHEUS_TMP_COUNTER_DIR = tempfile.TemporaryDirectory() -# Can't read config yet. Just set to debug for now, else no handlers. -# Later, in app_config(), will actually set level based on config -logger = get_logger(__name__, log_level="debug") - app = flask.Flask(__name__) CORS(app=app, headers=["content-type", "accept"], expose_headers="*") diff --git a/fence/auth.py b/fence/auth.py index 18bd8f634..516876561 100644 --- a/fence/auth.py +++ b/fence/auth.py @@ -114,6 +114,7 @@ def set_flask_session_values(user): if id_from_idp: user.id_from_idp = id_from_idp + # TODO: update iss_sub mapping table? # setup idp connection for new user (or existing user w/o it setup) idp = ( diff --git a/fence/blueprints/data/blueprint.py b/fence/blueprints/data/blueprint.py index 8b10e54ea..d19460421 100755 --- a/fence/blueprints/data/blueprint.py +++ b/fence/blueprints/data/blueprint.py @@ -9,7 +9,8 @@ IndexedFile, get_signed_url_for_file, ) -from fence.errors import Forbidden, InternalError, UserError +from fence.config import config +from fence.errors import Forbidden, InternalError, UserError, Forbidden from fence.resources.audit.utils import enable_audit_logging from fence.utils import get_valid_expiration diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index 9237a2e95..aaa6fdd59 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -2,9 +2,9 @@ import time import json from urllib.parse import urlparse, ParseResult, urlunparse - from datetime import datetime, timedelta +from sqlalchemy.sql.functions import user from cached_property import cached_property import cirrus from cirrus import GoogleCloudManager @@ -12,6 +12,7 @@ from cdispyutils.config import get_value from cdispyutils.hmac4 import generate_aws_presigned_url import flask +from flask_sqlalchemy_session import current_session import requests from azure.storage.blob import ( BlobServiceClient, @@ -19,6 +20,7 @@ AccountSasPermissions, generate_blob_sas, ) +from fence import auth from fence.auth import ( get_jwt, @@ -43,9 +45,10 @@ get_google_app_creds, give_service_account_billing_access_if_necessary, ) +from fence.resources.ga4gh.passports import sync_gen3_users_authz_from_ga4gh_passports from fence.utils import get_valid_expiration_from_request from . import multipart_upload -from ...models import AssumeRoleCacheAWS +from ...models import AssumeRoleCacheAWS, query_for_user, query_for_user_by_id from ...models import AssumeRoleCacheGCP logger = get_logger(__name__) @@ -58,28 +61,71 @@ SUPPORTED_PROTOCOLS = ["s3", "http", "ftp", "https", "gs", "az"] SUPPORTED_ACTIONS = ["upload", "download"] -ANONYMOUS_USER_ID = "anonymous" +ANONYMOUS_USER_ID = "-1" ANONYMOUS_USERNAME = "anonymous" -def get_signed_url_for_file(action, file_id, file_name=None, requested_protocol=None): +def get_signed_url_for_file( + action, + file_id, + file_name=None, + requested_protocol=None, + ga4gh_passports=None, + db_session=None, +): requested_protocol = requested_protocol or flask.request.args.get("protocol", None) r_pays_project = flask.request.args.get("userProject", None) + db_session = db_session or current_session - # default to signing the url even if it's a public object - # this will work so long as we're provided a user token + # default to signing the url force_signed_url = True no_force_sign_param = flask.request.args.get("no_force_sign") if no_force_sign_param and no_force_sign_param.lower() == "true": force_signed_url = False + if ga4gh_passports and not config["GA4GH_PASSPORTS_TO_DRS_ENABLED"]: + raise NotSupported( + "Using GA4GH Passports as a means of authentication and authorization " + "is not supported by this instance of Gen3." + ) + + users_from_passports = {} + if ga4gh_passports: + # users_from_passports = {"username": Fence.User} + users_from_passports = sync_gen3_users_authz_from_ga4gh_passports( + ga4gh_passports, db_session=db_session + ) + # add the user details to `flask.g.audit_data` first, so they are # included in the audit log if `IndexedFile(file_id)` raises a 404 - user_info = _get_user_info(sub_type=int) - flask.g.audit_data = { - "username": user_info["username"], - "sub": user_info["user_id"], - } + if users_from_passports: + if len(users_from_passports) > 1: + logger.warning( + "audit service doesn't support multiple users for a " + "single request yet, so just log userinfo here" + ) + for username, user in users_from_passports.items(): + audit_data = { + "username": username, + "sub": user.id, + } + logger.info( + f"passport with multiple user ids is attempting data access. audit log: {audit_data}" + ) + else: + username, user = next(iter(users_from_passports.items())) + flask.g.audit_data = { + "username": username, + "sub": user.id, + } + else: + user_info = _get_user_info_for_id_or_from_request( + sub_type=int, db_session=db_session + ) + flask.g.audit_data = { + "username": user_info["username"], + "sub": user_info["user_id"], + } indexed_file = IndexedFile(file_id) default_expires_in = config.get("MAX_PRESIGNED_URL_TTL", 3600) @@ -89,16 +135,24 @@ def get_signed_url_for_file(action, file_id, file_name=None, requested_protocol= ) prepare_presigned_url_audit_log(requested_protocol, indexed_file) - - signed_url = indexed_file.get_signed_url( + signed_url, authorized_user_from_passport = indexed_file.get_signed_url( requested_protocol, action, expires_in, force_signed_url=force_signed_url, r_pays_project=r_pays_project, file_name=file_name, + users_from_passports=users_from_passports, ) + # a single user from the list was authorized so update the audit log to reflect that + # users info + if authorized_user_from_passport: + flask.g.audit_data = { + "username": authorized_user_from_passport.username, + "sub": authorized_user_from_passport.id, + } + # increment counter for gen3-metrics counter = flask.current_app.prometheus_counters.get("pre_signed_url_req") if counter: @@ -313,7 +367,7 @@ def generate_aws_presigned_url_for_part(key, uploadId, partNumber, expires_in): "fence not configured with data upload bucket; can't create signed URL" ) s3_url = "s3://{}/{}".format(bucket, key) - return S3IndexedFileLocation(s3_url).generate_presigne_url_for_part_upload( + return S3IndexedFileLocation(s3_url).generate_presigned_url_for_part_upload( uploadId, partNumber, expires_in ) @@ -394,18 +448,31 @@ def get_signed_url( force_signed_url=True, r_pays_project=None, file_name=None, + users_from_passports=None, ): + users_from_passports = users_from_passports or {} + authorized_user = None if self.index_document.get("authz"): action_to_permission = { "upload": "write-storage", "download": "read-storage", } - if not self.check_authz(action_to_permission[action]): - raise Unauthorized( - f"Either you weren't logged in or you don't have " + is_authorized, authorized_username = self.get_authorized_with_username( + action_to_permission[action], + # keys are usernames + usernames_from_passports=list(users_from_passports.keys()), + ) + if not is_authorized: + msg = ( + f"Either you weren't authenticated successfully or you don't have " f"{action_to_permission[action]} permission " - f"on authz resource: {self.index_document['authz']}" + f"on authorization resource: {self.index_document['authz']}." + ) + logger.debug( + f"denied. authorized_username: {authorized_username}\nmsg:\n{msg}" ) + raise Unauthorized(msg) + authorized_user = users_from_passports.get(authorized_username) else: if self.public_acl and action == "upload": raise Unauthorized( @@ -413,19 +480,35 @@ def get_signed_url( ) # don't check the authorization if the file is public # (downloading public files with no auth is fine) - if not self.public_acl and not self.check_authorization(action): + if not self.public_acl and not self.check_legacy_authorization(action): raise Unauthorized( f"You don't have access permission on this file: {self.file_id}" ) if action is not None and action not in SUPPORTED_ACTIONS: raise NotSupported("action {} is not supported".format(action)) - return self._get_signed_url( - protocol, action, expires_in, force_signed_url, r_pays_project, file_name + return ( + self._get_signed_url( + protocol, + action, + expires_in, + force_signed_url, + r_pays_project, + file_name, + authorized_user, + ), + authorized_user, ) def _get_signed_url( - self, protocol, action, expires_in, force_signed_url, r_pays_project, file_name + self, + protocol, + action, + expires_in, + force_signed_url, + r_pays_project, + file_name, + authorized_user=None, ): if action == "upload": # NOTE: self.index_document ensures the GUID exists in indexd and raises @@ -442,9 +525,9 @@ def _get_signed_url( return self.indexed_file_locations[0].get_signed_url( action, expires_in, - public_data=self.public, force_signed_url=force_signed_url, r_pays_project=r_pays_project, + authorized_user=authorized_user, ) except IndexError: raise NotFound("Can't find any file locations.") @@ -457,9 +540,9 @@ def _get_signed_url( return file_location.get_signed_url( action, expires_in, - public_data=self.public, force_signed_url=force_signed_url, r_pays_project=r_pays_project, + authorized_user=authorized_user, ) raise NotFound( @@ -476,50 +559,76 @@ def set_acls(self): else: raise Unauthorized("This file is not accessible") - def check_authz(self, action): + def get_authorized_with_username(self, action, usernames_from_passports=None): + """ + Return a tuple of (boolean, str) which represents whether they're authorized + and their username. username is only returned if `usernames_from_passports` + is provided and one of the usernames from the passports is authorized. + + Args: + action (str): Authorization action being performed + usernames_from_passports (list[str], optional): List of user usernames parsed + from validated passports + + Returns: + tuple of (boolean, str): which represents whether they're authorized + and their username. username is only returned if `usernames_from_passports` + is provided and one of the usernames from the passports is authorized. + """ if not self.index_document.get("authz"): raise ValueError("index record missing `authz`") logger.debug( - f"authz check can user {action} on {self.index_document['authz']} for fence?" + f"authz check can user {action} on {self.index_document['authz']} for fence? " + f"if passport provided, IDs parsed: {usernames_from_passports}" ) - try: - token = get_jwt() - except Unauthorized: - # get_jwt raises an Unauthorized error when user is anonymous (no - # availble token), so to allow anonymous users possible access to - # public data, we still make the request to Arborist - token = None - - return flask.current_app.arborist.auth_request( - jwt=token, - service="fence", - methods=action, - resources=self.index_document["authz"], - ) + # handle multiple GA4GH passports as a means of authn/z + if usernames_from_passports: + authorized = False + for username in usernames_from_passports: + authorized = flask.current_app.arborist.auth_request( + jwt=None, + user_id=username, + service="fence", + methods=action, + resources=self.index_document["authz"], + ) + # if any passport provides access, user is authorized + if authorized: + # for google proxy groups and future use: we need to know which + # user_id actually gave access + return authorized, username + return authorized, None + else: + try: + token = get_jwt() + except Unauthorized: + # get_jwt raises an Unauthorized error when user is anonymous (no + # available token), so to allow anonymous users possible access to + # public data, we still make the request to Arborist + token = None + + return ( + flask.current_app.arborist.auth_request( + jwt=token, + service="fence", + methods=action, + resources=self.index_document["authz"], + ), + None, + ) @cached_property def metadata(self): return self.index_document.get("metadata", {}) - @cached_property - def public(self): - if self.index_document.get("authz", []): - return self.public_authz - else: - return self.public_acl - @cached_property def public_acl(self): return "*" in self.set_acls - @cached_property - def public_authz(self): - return "/open" in self.index_document.get("authz", []) - @login_required({"data"}) - def check_authorization(self, action): + def check_legacy_authorization(self, action): # if we have a data file upload without corresponding metadata, the record can # have just the `uploader` field and no ACLs. in this just check that the # current user's username matches the uploader field @@ -636,8 +745,8 @@ def get_signed_url( self, action, expires_in, - public_data=False, force_signed_url=True, + users_from_passports=None, **kwargs, ): return self.url @@ -833,7 +942,12 @@ def get_bucket_region(self): return bucket_cred["region"] def get_signed_url( - self, action, expires_in, public_data=False, force_signed_url=True, **kwargs + self, + action, + expires_in, + force_signed_url=True, + authorized_user=None, + **kwargs, ): aws_creds = get_value( @@ -859,7 +973,7 @@ def get_signed_url( bucket_name, aws_creds, expires_in ) - # if it's public and we don't need to force the signed url, just return the raw + # if we don't need to force the signed url, just return the raw # s3 url aws_access_key_id = get_value( credential, @@ -869,7 +983,7 @@ def get_signed_url( # `aws_access_key_id == "*"` is a special case to support public buckets # where we do *not* want to try signing at all. the other case is that the # data is public and user requested to not sign the url - if aws_access_key_id == "*" or (public_data and not force_signed_url): + if aws_access_key_id == "*" or (not force_signed_url): return http_url region = self.get_bucket_region() @@ -878,7 +992,7 @@ def get_signed_url( self.parsed_url.netloc, credential ) - user_info = _get_user_info() + user_info = _get_user_info_for_id_or_from_request(user=authorized_user) url = generate_aws_presigned_url( http_url, @@ -913,7 +1027,7 @@ def init_multipart_upload(self, expires_in): self.parsed_url.netloc, self.parsed_url.path.strip("/"), credentials ) - def generate_presigne_url_for_part_upload(self, uploadId, partNumber, expires_in): + def generate_presigned_url_for_part_upload(self, uploadId, partNumber, expires_in): """ Generate presigned url for uploading object part given uploadId and part number @@ -998,17 +1112,17 @@ def get_signed_url( self, action, expires_in, - public_data=False, force_signed_url=True, r_pays_project=None, + authorized_user=None, ): resource_path = self.get_resource_path() - user_info = _get_user_info() + user_info = _get_user_info_for_id_or_from_request(user=authorized_user) - if public_data and not force_signed_url: + if not force_signed_url: url = "https://storage.cloud.google.com/" + resource_path - elif public_data and _is_anonymous_user(user_info): + elif _is_anonymous_user(user_info): url = self._generate_anonymous_google_storage_signed_url( ACTION_DICT["gs"][action], resource_path, int(expires_in) ) @@ -1070,8 +1184,9 @@ def _generate_google_storage_signed_url( username, r_pays_project=None, ): - - proxy_group_id = get_or_create_proxy_group_id() + proxy_group_id = get_or_create_proxy_group_id( + user_id=user_id, username=username + ) expiration_time = int(time.time()) + expires_in is_cached = False @@ -1282,7 +1397,12 @@ def _get_converted_url(self): return urlunparse(new_parsed_url) def get_signed_url( - self, action, expires_in, public_data=False, force_signed_url=True, **kwargs + self, + action, + expires_in, + force_signed_url=True, + authorized_user=None, + **kwargs, ): """ Get a signed url for a given action @@ -1301,11 +1421,6 @@ def get_signed_url( Get a signed url for an action like "upload" or "download". :param int expires_in: The SAS token will expire in a given number of seconds from datetime.utcnow() - :param bool public_data: - Indicate if the Azure Blob Storage Account has public access. - If it's public and we don't need to force the signed url, just return the raw - url. - The default for public_data is False. :param bool force_signed_url: Enforce signing the URL for the Azure Blob Storage Account using a SAS token. The default is True. @@ -1318,8 +1433,8 @@ def get_signed_url( container_name, blob_name = self._get_container_and_blob() - user_info = _get_user_info() - if user_info and user_info.get("user_id") == ANONYMOUS_USER_ID: + user_info = _get_user_info_for_id_or_from_request(user=authorized_user) + if _is_anonymous_user(user_info): logger.info(f"Attempting to get a signed url an anonymous user") # if it's public and we don't need to force the signed url, just return the raw @@ -1327,7 +1442,7 @@ def get_signed_url( # `azure_creds == "*"` is a special case to support public buckets # where we do *not* want to try signing at all. the other case is that the # data is public and user requested to not sign the url - if azure_creds == "*" or (public_data and not force_signed_url): + if azure_creds == "*" or (not force_signed_url): return self._get_converted_url() url = self._generate_azure_blob_storage_sas( @@ -1379,36 +1494,56 @@ def delete(self, container, blob): # pylint: disable=R0201 return ("Failed to delete data file.", status_code) -def _get_user_info(sub_type=str): +def _get_user_info_for_id_or_from_request( + sub_type=str, user=None, username=None, db_session=None +): """ - Attempt to parse the request for token to authenticate the user. fallback to + Attempt to parse the request to get information about user. fallback to populated information about an anonymous user. + By default, cast `sub` to str. Use `sub_type` to override this behavior. + + WARNING: This does NOT actually check authorization information and always falls + back on anonymous user information. DO NOT USE THIS AS A MEANS TO AUTHORIZE, + IT WILL ALWAYS GIVE YOU BACK ANONYMOUS USER INFO. Only use this + after you've authorized the access to the data via other means. """ + db_session = db_session or current_session + try: - set_current_token( - validate_request(scope={"user"}, audience=config.get("BASE_URL")) + if user: + final_username = user.username + final_user_id = sub_type(user.id) + elif username: + result = query_for_user(db_session, username) + final_username = result.username + final_user_id = sub_type(result.id) + else: + set_current_token( + validate_request(scope={"user"}, audience=config.get("BASE_URL")) + ) + final_user_id = current_token["sub"] + final_user_id = sub_type(final_user_id) + final_username = current_token["context"]["user"]["name"] + except Exception as exc: + logger.info( + "could not determine user info from request. setting anonymous user information." ) - user_id = current_token["sub"] - if sub_type: - user_id = sub_type(user_id) - username = current_token["context"]["user"]["name"] - except JWTError: # this is fine b/c it might be public data, sign with anonymous username/id - user_id = None + final_user_id = None if sub_type == str: - user_id = ANONYMOUS_USER_ID - username = ANONYMOUS_USERNAME + final_user_id = sub_type(ANONYMOUS_USER_ID) + final_username = ANONYMOUS_USERNAME - return {"user_id": user_id, "username": username} + return {"user_id": final_user_id, "username": final_username} def _is_anonymous_user(user_info): """ Check if there's a current user authenticated or if request is anonymous """ - user_info = user_info or _get_user_info() - return user_info.get("user_id") == ANONYMOUS_USER_ID + user_info = user_info or _get_user_info_for_id_or_from_request() + return str(user_info.get("user_id")) == ANONYMOUS_USER_ID def filter_auth_ids(action, list_auth_ids): diff --git a/fence/blueprints/ga4gh.py b/fence/blueprints/ga4gh.py index c02ba4f7a..4bb110bcc 100644 --- a/fence/blueprints/ga4gh.py +++ b/fence/blueprints/ga4gh.py @@ -1,5 +1,7 @@ import flask +from flask import request from fence.errors import UserError +from fence.config import config from fence.blueprints.data.indexd import ( get_signed_url_for_file, @@ -18,10 +20,26 @@ methods=["GET", "POST"], ) def get_ga4gh_signed_url(object_id, access_id): + if not access_id: raise UserError("Access ID/Protocol is required.") + ga4gh_passports = None + if flask.request.method == "POST": + ga4gh_passports = flask.request.get_json(force=True, silent=True).get( + config["GA4GH_DRS_POSTED_PASSPORT_FIELD"] + ) + + if ga4gh_passports and flask.request.headers.get("Authorization"): + raise UserError( + "You cannot supply both GA4GH passports and a token " + "in the Authorization header of a request." + ) + result = get_signed_url_for_file( - "download", object_id, requested_protocol=access_id + "download", + object_id, + requested_protocol=access_id, + ga4gh_passports=ga4gh_passports, ) return flask.jsonify(result) diff --git a/fence/blueprints/login/ras.py b/fence/blueprints/login/ras.py index 8120af507..467d97ec7 100644 --- a/fence/blueprints/login/ras.py +++ b/fence/blueprints/login/ras.py @@ -1,23 +1,27 @@ import flask import jwt import os + +# the whole fence_create module is imported to avoid issues with circular imports +import fence.scripting.fence_create from distutils.util import strtobool +from urllib.parse import urlparse, parse_qs + from authutils.errors import JWTError -from authutils.token.core import validate_jwt -from authutils.token.keys import get_public_key_for_token from cdislogging import get_logger from flask_sqlalchemy_session import current_session -from urllib.parse import urlparse, parse_qs - -from fence.models import GA4GHVisaV1, IdentityProvider from gen3authz.client.arborist.client import ArboristClient from fence.blueprints.login.base import DefaultOAuth2Login, DefaultOAuth2Callback from fence.config import config -from fence.scripting.fence_create import init_syncer +from fence.jwt.validate import validate_jwt +from fence.models import GA4GHVisaV1, IdentityProvider from fence.utils import get_valid_expiration +import fence.resources.ga4gh.passports + logger = get_logger(__name__) +PKEY_CACHE = {} class RASLogin(DefaultOAuth2Login): @@ -36,86 +40,47 @@ def __init__(self): ) def post_login(self, user=None, token_result=None, id_from_idp=None): - # TODO: I'm not convinced this code should be in post_login. - # Just putting it in here for now, but might refactor later. - # This saves us a call to RAS /userinfo, but will not make sense - # when there is more than one visa issuer. - - # Clear all of user's visas, to avoid having duplicate visas - # where only iss/exp/jti differ - # TODO: This is not IdP-specific and will need a rethink when - # we have multiple IdPs - user.ga4gh_visas_v1 = [] - - current_session.commit() + parsed_url = urlparse(flask.session.get("redirect")) + query_params = parse_qs(parsed_url.query) - encoded_visas = [] + userinfo = flask.g.userinfo - try: - encoded_visas = flask.current_app.ras_client.get_encoded_visas_v11_userinfo( - flask.g.userinfo + global_parse_visas_on_login = config["GLOBAL_PARSE_VISAS_ON_LOGIN"] + parse_visas = global_parse_visas_on_login or ( + global_parse_visas_on_login == None + and ( + strtobool(query_params.get("parse_visas")[0]) + if query_params.get("parse_visas") + else False ) - except Exception as e: - err_msg = "Could not retrieve visas" - logger.error("{}: {}".format(e, err_msg)) - raise - - for encoded_visa in encoded_visas: + ) + # do an on-the-fly usersync for this user to give them instant access after logging in through RAS + # if GLOBAL_PARSE_VISAS_ON_LOGIN is true then we want to run it regardless of whether or not the client sent parse_visas on request + if parse_visas: + # get passport then call sync on it try: - # Do not move out of loop unless we can assume every visa has same issuer and kid - public_key = get_public_key_for_token( - encoded_visa, attempt_refresh=True - ) - except Exception as e: - # (But don't log the visa contents!) - logger.error( - "Could not get public key to validate visa: {}. Discarding visa.".format( - e + passport = ( + flask.current_app.ras_client.get_encoded_passport_v11_userinfo( + userinfo ) ) - continue - - try: - # Validate the visa per GA4GH AAI "Embedded access token" format rules. - # pyjwt also validates signature and expiration. - decoded_visa = validate_jwt( - encoded_visa, - public_key, - # Embedded token must not contain aud claim - aud=None, - # Embedded token must contain scope claim, which must include openid - scope={"openid"}, - issuers=config.get("GA4GH_VISA_ISSUER_ALLOWLIST", []), - # Embedded token must contain iss, sub, iat, exp claims - # options={"require": ["iss", "sub", "iat", "exp"]}, - # ^ FIXME 2021-05-13: Above needs pyjwt>=v2.0.0, which requires cryptography>=3. - # Once we can unpin and upgrade cryptography and pyjwt, switch to above "options" arg. - # For now, pyjwt 1.7.1 is able to require iat and exp; - # authutils' validate_jwt (i.e. the function being called) checks issuers already (see above); - # and we will check separately for sub below. - options={ - "require_iat": True, - "require_exp": True, - }, - ) - - # Also require 'sub' claim (see note above about pyjwt and the options arg). - if "sub" not in decoded_visa: - raise JWTError("Visa is missing the 'sub' claim.") except Exception as e: - logger.error("Visa failed validation: {}. Discarding visa.".format(e)) - continue - - visa = GA4GHVisaV1( - user=user, - source=decoded_visa["ga4gh_visa_v1"]["source"], - type=decoded_visa["ga4gh_visa_v1"]["type"], - asserted=int(decoded_visa["ga4gh_visa_v1"]["asserted"]), - expires=int(decoded_visa["exp"]), - ga4gh_visa=encoded_visa, + err_msg = "Could not retrieve passport or visas" + logger.error("{}: {}".format(e, err_msg)) + raise + + # now sync authz updates + users_from_passports = fence.resources.ga4gh.passports.sync_gen3_users_authz_from_ga4gh_passports( + [passport], + pkey_cache=PKEY_CACHE, + db_session=current_session, ) - current_session.add(visa) - current_session.commit() + user_ids_from_passports = list(users_from_passports.keys()) + + # TODO? + # put_gen3_usernames_for_passport_into_cache( + # passport, usernames_from_current_passport + # ) # Store refresh token in db assert "refresh_token" in flask.g.tokens, "No refresh_token in user tokens" @@ -125,12 +90,11 @@ def post_login(self, user=None, token_result=None, id_from_idp=None): decoded_id = jwt.decode(id_token, verify=False) # Add 15 days to iat to calculate refresh token expiration time + # TODO do they really not provide exp? issued_time = int(decoded_id.get("iat")) expires = config["RAS_REFRESH_EXPIRATION"] # User definied RAS refresh token expiration time - parsed_url = urlparse(flask.session.get("redirect")) - query_params = parse_qs(parsed_url.query) if query_params.get("upstream_expires_in"): custom_refresh_expiration = query_params.get("upstream_expires_in")[0] expires = get_valid_expiration( @@ -143,48 +107,4 @@ def post_login(self, user=None, token_result=None, id_from_idp=None): user=user, refresh_token=refresh_token, expires=expires + issued_time ) - global_parse_visas_on_login = config["GLOBAL_PARSE_VISAS_ON_LOGIN"] - usersync = config.get("USERSYNC", {}) - sync_from_visas = usersync.get("sync_from_visas", False) - parse_visas = global_parse_visas_on_login or ( - global_parse_visas_on_login == None - and ( - strtobool(query_params.get("parse_visas")[0]) - if query_params.get("parse_visas") - else False - ) - ) - # if sync_from_visas and (global_parse_visas_on_login or global_parse_visas_on_login == None): - # Check if user has any project_access from a previous session or from usersync AND if fence is configured to use visas as authZ source - # if not do an on-the-fly usersync for this user to give them instant access after logging in through RAS - # If GLOBAL_PARSE_VISAS_ON_LOGIN is true then we want to run it regardless of whether or not the client sent parse_visas on request - if sync_from_visas and parse_visas and not user.project_access: - # Close previous db sessions. Leaving it open causes a race condition where we're viewing user.project_access while trying to update it in usersync - # not closing leads to partially updated records - current_session.close() - - DB = os.environ.get("FENCE_DB") or config.get("DB") - if DB is None: - try: - from fence.settings import DB - except ImportError: - pass - - arborist = ArboristClient( - arborist_base_url=config["ARBORIST"], - logger=get_logger("user_syncer.arborist_client"), - authz_provider="user-sync", - ) - dbGaP = os.environ.get("dbGaP") or config.get("dbGaP") - if not isinstance(dbGaP, list): - dbGaP = [dbGaP] - - sync = init_syncer( - dbGaP, - None, - DB, - arborist=arborist, - ) - sync.sync_single_user_visas(user, current_session) - super(RASCallback, self).post_login(id_from_idp=id_from_idp) diff --git a/fence/blueprints/storage_creds/google.py b/fence/blueprints/storage_creds/google.py index 3e21d1e5e..e3d623856 100644 --- a/fence/blueprints/storage_creds/google.py +++ b/fence/blueprints/storage_creds/google.py @@ -183,6 +183,7 @@ def delete(self): :statuscode 405 Method Not Allowed if ?all=true is not included """ user_id = current_token["sub"] + username = current_token.get("context", {}).get("user", {}).get("name") try: all_arg = strtobool(flask.request.args.get("all", "false").lower()) @@ -197,7 +198,7 @@ def delete(self): with GoogleCloudManager() as g_cloud: client_id = current_token.get("azp") or None - service_account = get_service_account(client_id, user_id) + service_account = get_service_account(client_id, user_id, username=username) if service_account: keys_for_account = g_cloud.get_service_account_keys_info( @@ -259,9 +260,10 @@ def delete(self, access_key): :statuscode 404 Access key doesn't exist """ user_id = current_token["sub"] + username = current_token.get("context", {}).get("user", {}).get("name") with GoogleCloudManager() as g_cloud: client_id = current_token.get("azp") or None - service_account = get_service_account(client_id, user_id) + service_account = get_service_account(client_id, user_id, username=username) if service_account: keys_for_account = g_cloud.get_service_account_keys_info( diff --git a/fence/config-default.yaml b/fence/config-default.yaml index 7c7fdf445..83dd279a8 100755 --- a/fence/config-default.yaml +++ b/fence/config-default.yaml @@ -450,6 +450,11 @@ RENEW_ACCESS_TOKEN_BEFORE_EXPIRATION: false # The maximum lifetime of a Gen3 passport in seconds GEN3_PASSPORT_EXPIRES_IN: 43200 +# The JSON field the GA4GH Passport is in when a request is POST-ed to DRS +# We use the same field name for POSTs to /data/download for consistency +GA4GH_DRS_POSTED_PASSPORT_FIELD: "passports" + + ######################################################################################## # OPTIONAL CONFIGURATIONS # ######################################################################################## @@ -887,6 +892,16 @@ ASSUME_ROLE_CACHE_SECONDS: 1800 # will have access to download data. REGISTER_USERS_ON: false REGISTERED_USERS_GROUP: '' + +# Number of projects that can be registered to a Google Service Account +SERVICE_ACCOUNT_LIMIT: 6 + +# ////////////////////////////////////////////////////////////////////////////////////// +# GA4GH SUPPORT: DATA ACCESS AND AUTHORIZATION SYNCING +# ////////////////////////////////////////////////////////////////////////////////////// +# whether or not to accept GA4GH Passports as a means of AuthN/Z to the DRS data access endpoint +GA4GH_PASSPORTS_TO_DRS_ENABLED: false + # RAS refresh_tokens expire in 15 days RAS_REFRESH_EXPIRATION: 1296000 # List of JWT issuers from which Fence will accept GA4GH visas @@ -894,9 +909,15 @@ GA4GH_VISA_ISSUER_ALLOWLIST: - '{{BASE_URL}}' - 'https://sts.nih.gov' - 'https://stsstg.nih.gov' -# Number of projects that can be registered to a Google Service Accont -SERVICE_ACCOUNT_LIMIT: 6 - +GA4GH_VISA_V1_CLAIM_REQUIRED_FIELDS: + type: + - 'https://ras.nih.gov/visas/v1.1' + value: + - 'https://sts.nih.gov/passport/dbgap/v1.1' + - 'https://stsstg.nih.gov/passport/dbgap/v1.1' + source: + - 'https://ncbi.nlm.nih.gov/gap' +EXPIRED_AUTHZ_REMOVAL_JOB_FREQ_IN_SECONDS: 300 # Global sync visas during login # None(Default): Allow per client i.e. a fence client can pick whether or not to sync their visas during login with parse_visas param in /authorization endpoint # True: Parse for all clients i.e. a fence client will always sync their visas during login @@ -904,9 +925,6 @@ SERVICE_ACCOUNT_LIMIT: 6 GLOBAL_PARSE_VISAS_ON_LOGIN: # Settings for usersync with visas USERSYNC: - sync_from_visas: false - # fallback to dbgap sftp when there are no valid visas for a user i.e. if they're expired or if they're malformed - fallback_to_dbgap_sftp: false visa_types: - ras: ["https://ras.nih.gov/visas/v1", "https://ras.nih.gov/visas/v1.1"] + ras: ['https://ras.nih.gov/visas/v1', 'https://ras.nih.gov/visas/v1.1'] RAS_USERINFO_ENDPOINT: '/openid/connect/v1.1/userinfo' diff --git a/fence/job/visa_update_cronjob.py b/fence/job/visa_update_cronjob.py index 969a37426..cac8d9182 100644 --- a/fence/job/visa_update_cronjob.py +++ b/fence/job/visa_update_cronjob.py @@ -3,15 +3,9 @@ import time from cdislogging import get_logger -from userdatamodel.driver import SQLAlchemyDriver from fence.config import config -from fence.models import ( - GA4GHVisaV1, - User, - UpstreamRefreshToken, - query_for_user, -) +from fence.models import User from fence.resources.openid.ras_oauth2 import RASOauth2Client as RASClient @@ -69,8 +63,8 @@ async def update_tokens(self, db_session): Producer: Collects users from db and feeds it to the workers Worker: Takes in the users from the Producer and passes it to the Updater to update the tokens and passes those updated tokens for JWT validation - Updater: Updates refresh_tokens and visas by calling the update_user_visas from - the correct client + Updater: Updates refresh_tokens and visas by calling the update_user_authorization + from the correct client """ start_time = time.time() @@ -151,28 +145,48 @@ async def updater(self, name, updater_queue, db_session): """ while True: user = await updater_queue.get() - if user.ga4gh_visas_v1: - for visa in user.ga4gh_visas_v1: - client = self._pick_client(visa) + try: + client = self._pick_client(user) + if client: self.logger.info( - "Updater {} updating visa for user {}".format( + "Updater {} updating authorization for user {}".format( name, user.username ) ) - client.update_user_visas(user, self.pkey_cache, db_session) - else: - # clear expired refresh tokens - if user.upstream_refresh_tokens: - user.upstream_refresh_tokens = [] - db_session.commit() - - self.logger.info( - "User {} doesnt have visa. Skipping . . .".format(user.username) + # when getting access token, this persists new refresh token, + # it also persists validated visa(s) in the database + client.update_user_authorization( + user, + pkey_cache=self.pkey_cache, + db_session=db_session, + ) + else: + self.logger.debug( + f"Updater {name} NOT updating authorization for " + f"user {user.username} because no client was found for IdP: {user.identity_provider}" + ) + except Exception as exc: + self.logger.error( + f"Updater {name} could not update authorization " + f"for {user.username}. Error: {exc}. Continuing." ) + pass updater_queue.task_done() - def _pick_client(self, visa): + def _pick_client(self, user): + """ + Pick oidc client according to the identity provider + """ + client = None + if ( + user.identity_provider + and getattr(user.identity_provider, "name") == self.ras_client.idp + ): + client = self.ras_client + return client + + def _pick_client_from_visa(self, visa): """ Pick oidc client according to the visa provider """ diff --git a/fence/jwt/validate.py b/fence/jwt/validate.py index 84c8eea4f..5c6f336c3 100644 --- a/fence/jwt/validate.py +++ b/fence/jwt/validate.py @@ -1,7 +1,6 @@ import authutils.errors import authutils.token.keys import authutils.token.validate -import flask import jwt from fence.config import config @@ -41,9 +40,12 @@ def validate_jwt( encoded_token=None, aud=None, scope={"openid"}, + require_purpose=True, purpose=None, public_key=None, attempt_refresh=False, + issuers=None, + pkey_cache=None, **kwargs ): """ @@ -94,20 +96,22 @@ def validate_jwt( aud = config["BASE_URL"] iss = config["BASE_URL"] - issuers = [iss] - oidc_iss = ( - config.get("OPENID_CONNECT", {}).get("fence", {}).get("api_base_url", None) - ) - if oidc_iss: - issuers.append(oidc_iss) + if issuers is None: + issuers = [iss] + oidc_iss = ( + config.get("OPENID_CONNECT", {}).get("fence", {}).get("api_base_url", None) + ) + if oidc_iss: + issuers.append(oidc_iss) try: token_iss = jwt.decode(encoded_token, verify=False).get("iss") except jwt.InvalidTokenError as e: raise JWTError(e) attempt_refresh = attempt_refresh and (token_iss != iss) public_key = public_key or authutils.token.keys.get_public_key_for_token( - encoded_token, attempt_refresh=attempt_refresh + encoded_token, attempt_refresh=attempt_refresh, pkey_cache=pkey_cache ) + try: claims = authutils.token.validate.validate_jwt( encoded_token=encoded_token, @@ -173,12 +177,12 @@ def validate_jwt( raise JWTError(msg) if purpose: validate_purpose(claims, purpose) - if "pur" not in claims: + if require_purpose and "pur" not in claims: raise JWTError("token {} missing purpose (`pur`) claim".format(claims["jti"])) # For refresh tokens and API keys specifically, check that they are not # blacklisted. - if claims["pur"] == "refresh" or claims["pur"] == "api_key": + if require_purpose and (claims["pur"] == "refresh" or claims["pur"] == "api_key"): if is_blacklisted(claims["jti"]): raise JWTError("token is blacklisted") diff --git a/fence/models.py b/fence/models.py index bfb3fe33a..d911e7cb4 100644 --- a/fence/models.py +++ b/fence/models.py @@ -11,7 +11,6 @@ from authlib.flask.oauth2.sqla import OAuth2AuthorizationCodeMixin, OAuth2ClientMixin import bcrypt -import flask from sqlalchemy import ( Integer, BigInteger, @@ -23,6 +22,7 @@ MetaData, Table, text, + event, ) from sqlalchemy.dialects.postgresql import ARRAY, JSONB from sqlalchemy.orm import relationship, backref @@ -55,6 +55,7 @@ ) import warnings +from fence import logger from fence.config import config @@ -66,6 +67,68 @@ def query_for_user(session, username): ) +def query_for_user_by_id(session, user_id): + return session.query(User).filter(User.id == user_id).first() + + +def create_user(session, logger, username, email=None, idp_name=None): + """ + Create a new user in the database. + + Args: + session (sqlalchemy.orm.session.Session): database session + logger (logging.Logger): logger + username (str): username to save for the created user + email (str): email to save for the created user + idp_name (str): name of identity provider to link + + Return: + userdatamodel.user.User: the created user + """ + logger.info( + f"Creating a new user with username: {username}, " + f"email: {email}, and idp_name: {idp_name}" + ) + + user = User(username=username) + if email: + user.email = email + if idp_name: + idp = ( + session.query(IdentityProvider) + .filter(IdentityProvider.name == idp_name) + .first() + ) + if not idp: + idp = IdentityProvider(name=idp_name) + user.identity_provider = idp + + session.add(user) + session.commit() + return user + + +def get_project_to_authz_mapping(session): + """ + Get the mappings for Project.auth_id to authorization resource (Project.authz) + from the database if a mapping exists. e.g. will only return if Project.authz is + populated. + + Args: + session (sqlalchemy.orm.session.Session): database session + + Returns: + dict{str:str}: Mapping from Project.auth_id to Project.authz + """ + output = {} + + query_results = session.query(Project.auth_id, Project.authz) + if query_results: + output = {item.auth_id: item.authz for item in query_results if item.authz} + + return output + + class ClientAuthType(Enum): """ List the possible types of OAuth client authentication, which are @@ -461,6 +524,8 @@ class GoogleProxyGroupToGoogleBucketAccessGroup(Base): ), ) + expires = Column(BigInteger) + class UserServiceAccount(Base): __tablename__ = "user_service_account" @@ -551,6 +616,14 @@ class AssumeRoleCacheGCP(Base): gcp_key_db_entry = Column(String()) +class GA4GHPassportCache(Base): + __tablename__ = "ga4gh_passport_cache" + + passport_hash = Column(String(64), primary_key=True) + expires_at = Column(BigInteger, nullable=False) + user_ids = Column(ARRAY(String(255)), nullable=False) + + class GA4GHVisaV1(Base): __tablename__ = "ga4gh_visa_v1" @@ -592,6 +665,87 @@ class UpstreamRefreshToken(Base): expires = Column(BigInteger, nullable=False) +class IssSubPairToUser(Base): + # issuer & sub pair mapping to Gen3 User sub + + __tablename__ = "iss_sub_pair_to_user" + + iss = Column(String(), primary_key=True) + sub = Column(String(), primary_key=True) + + fk_to_User = Column( + Integer, ForeignKey(User.id, ondelete="CASCADE"), nullable=False + ) # foreign key for User table + user = relationship( + "User", + backref=backref( + "iss_sub_pairs", + cascade="all, delete-orphan", + passive_deletes=True, + ), + ) + + # dump whatever idp provides in here + extra_info = Column(JSONB(), server_default=text("'{}'")) + + def _get_issuer_to_idp(): + possibly_matching_idps = [IdentityProvider.ras] + issuer_to_idp = {} + + oidc = config.get("OPENID_CONNECT", {}) + for idp in possibly_matching_idps: + discovery_url = oidc.get(idp, {}).get("discovery_url") + if discovery_url: + for allowed_issuer in config["GA4GH_VISA_ISSUER_ALLOWLIST"]: + if discovery_url.startswith(allowed_issuer): + issuer_to_idp[allowed_issuer] = idp + break + + return issuer_to_idp + + ISSUER_TO_IDP = _get_issuer_to_idp() + + # no longer need function since results stored in var + del _get_issuer_to_idp + + +@event.listens_for(IssSubPairToUser.__table__, "after_create") +def populate_iss_sub_pair_to_user_table(target, connection, **kw): + """ + Populate iss_sub_pair_to_user table using User table's id_from_idp + column. + """ + for issuer, idp_name in IssSubPairToUser.ISSUER_TO_IDP.items(): + logger.info( + 'Attempting to populate iss_sub_pair_to_user table for users with "{}" idp and "{}" issuer'.format( + idp_name, issuer + ) + ) + transaction = connection.begin() + try: + connection.execute( + text( + """ + WITH identity_provider_id AS (SELECT id FROM identity_provider WHERE name=:idp_name) + INSERT INTO iss_sub_pair_to_user (iss, sub, "fk_to_User", extra_info) + SELECT :iss, id_from_idp, id, additional_info + FROM "User" + WHERE idp_id IN (SELECT * FROM identity_provider_id) AND id_from_idp IS NOT NULL; + """ + ), + idp_name=idp_name, + iss=issuer, + ) + except Exception as e: + transaction.rollback() + logger.warning( + "Could not populate iss_sub_pair_to_user table: {}".format(e) + ) + else: + transaction.commit() + logger.info("Population was successful") + + to_timestamp = ( "CREATE OR REPLACE FUNCTION pc_datetime_to_timestamp(datetoconvert timestamp) " "RETURNS BIGINT AS " @@ -850,6 +1004,15 @@ def migrate(driver): FOR EACH ROW EXECUTE PROCEDURE process_cert_audit();""" ) + # Google Access expiration + + add_column_if_not_exist( + table_name=GoogleProxyGroupToGoogleBucketAccessGroup.__tablename__, + column=Column("expires", BigInteger()), + driver=driver, + metadata=md, + ) + add_column_if_not_exist( table_name=Project.__tablename__, column=Column("authz", String), diff --git a/fence/resources/ga4gh/passports.py b/fence/resources/ga4gh/passports.py new file mode 100644 index 000000000..e0e3c6539 --- /dev/null +++ b/fence/resources/ga4gh/passports.py @@ -0,0 +1,525 @@ +import flask +import os +import collections +import hashlib +import time +import datetime +import jwt + +# the whole fence_create module is imported to avoid issue with circular imports +import fence.scripting.fence_create + +from authutils.errors import JWTError +from authutils.token.core import get_iss, get_kid +from cdislogging import get_logger +from flask_sqlalchemy_session import current_session + +from fence.jwt.validate import validate_jwt +from fence.config import config +from fence.models import ( + create_user, + query_for_user, + query_for_user_by_id, + GA4GHVisaV1, + GA4GHPassportCache, + IdentityProvider, + IssSubPairToUser, +) + +logger = get_logger(__name__) + +# cache will be in following format +# passport_hash: ([user_id_0, user_id_1, ...], expires_at) +PASSPORT_CACHE = {} + + +def sync_gen3_users_authz_from_ga4gh_passports( + passports, + pkey_cache=None, + db_session=None, +): + """ + Validate passports and embedded visas, using each valid visa's identity + established by combination to possibly create and definitely + determine a Fence user who is added to the list returned by this + function. In the process of determining Fence users from visas, visa + authorization information is also persisted in Fence and synced to + Arborist. + + Args: + passports (list): a list of raw encoded passport strings, each + including header, payload, and signature + + Return: + list: a list of users, each corresponding to a valid visa identity + embedded within the passports passed in + """ + db_session = db_session or current_session + + # {"username": user, "username2": user2} + users_from_all_passports = {} + for passport in passports: + try: + cached_usernames = get_gen3_usernames_for_passport_from_cache( + passport=passport, db_session=db_session + ) + if cached_usernames: + # there's a chance a given username exists in the cache but no longer in + # the database. if not all are in db, ignore the cache and actually parse + # and validate the passport + all_users_exist_in_db = True + usernames_to_update = {} + for username in cached_usernames: + user = query_for_user(session=db_session, username=username) + if not user: + all_users_exist_in_db = False + continue + usernames_to_update[user.username] = user + + if all_users_exist_in_db: + users_from_all_passports.update(usernames_to_update) + # existence in the cache and a user in db means that this passport + # was validated previously (expiration was also checked) + continue + + # below function also validates passport (or raises exception) + raw_visas = get_unvalidated_visas_from_valid_passport( + passport, pkey_cache=pkey_cache + ) + except Exception as exc: + logger.warning(f"Invalid passport provided, ignoring. Error: {exc}") + continue + + # an empty raw_visas list means that either the current passport is + # invalid or that it has no visas. in both cases, the current passport + # is ignored and we move on to the next passport + if not raw_visas: + continue + + identity_to_visas = collections.defaultdict(list) + min_visa_expiration = int(time.time()) + datetime.timedelta(hours=1).seconds + for raw_visa in raw_visas: + try: + validated_decoded_visa = validate_visa(raw_visa, pkey_cache=pkey_cache) + identity_to_visas[ + ( + validated_decoded_visa.get("iss"), + validated_decoded_visa.get("sub"), + ) + ].append((raw_visa, validated_decoded_visa)) + min_visa_expiration = min( + min_visa_expiration, validated_decoded_visa.get("exp") + ) + except Exception as exc: + logger.warning(f"Invalid visa provided, ignoring. Error: {exc}") + continue + + expired_authz_removal_job_freq_in_seconds = config[ + "EXPIRED_AUTHZ_REMOVAL_JOB_FREQ_IN_SECONDS" + ] + min_visa_expiration -= expired_authz_removal_job_freq_in_seconds + if min_visa_expiration <= int(time.time()): + logger.warning( + "The passport's earliest valid visa expiration time is set to " + f"occur within {expired_authz_removal_job_freq_in_seconds} " + "seconds from now, which is too soon an expiration to handle." + ) + continue + + users_from_current_passport = [] + for (issuer, subject_id), visas in identity_to_visas.items(): + gen3_user = get_or_create_gen3_user_from_iss_sub( + issuer, subject_id, db_session=db_session + ) + + ga4gh_visas = [ + GA4GHVisaV1( + user=gen3_user, + source=validated_decoded_visa["ga4gh_visa_v1"]["source"], + type=validated_decoded_visa["ga4gh_visa_v1"]["type"], + asserted=int(validated_decoded_visa["ga4gh_visa_v1"]["asserted"]), + expires=int(validated_decoded_visa["exp"]), + ga4gh_visa=raw_visa, + ) + for raw_visa, validated_decoded_visa in visas + ] + # NOTE: does not validate, assumes validation occurs above. + # This adds the visas to the database session but doesn't commit until + # the end of this function + _sync_validated_visa_authorization( + gen3_user=gen3_user, + ga4gh_visas=ga4gh_visas, + expiration=min_visa_expiration, + db_session=db_session, + ) + users_from_current_passport.append(gen3_user) + + for user in users_from_current_passport: + users_from_all_passports[user.username] = user + + put_gen3_usernames_for_passport_into_cache( + passport=passport, + user_ids_from_passports=list(users_from_all_passports.keys()), + expires_at=min_visa_expiration, + db_session=db_session, + ) + + db_session.commit() + + logger.info( + f"Got Gen3 usernames from passport(s): {list(users_from_all_passports.keys())}" + ) + return users_from_all_passports + + +def get_unvalidated_visas_from_valid_passport(passport, pkey_cache=None): + """ + Return encoded visas after extracting and validating encoded passport + + Args: + passport (string): encoded ga4gh passport + pkey_cache (dict): app cache of public keys_dir + + Return: + list: list of encoded GA4GH visas + """ + decoded_passport = {} + passport_issuer, passport_kid = None, None + + if not pkey_cache: + pkey_cache = {} + + try: + passport_issuer = get_iss(passport) + passport_kid = get_kid(passport) + except Exception as e: + logger.error( + "Could not get issuer or kid from passport: {}. Discarding passport.".format( + e + ) + ) + # ignore malformed/invalid passports + return [] + + public_key = pkey_cache.get(passport_issuer, {}).get(passport_kid) + + try: + decoded_passport = validate_jwt( + encoded_token=passport, + public_key=public_key, + attempt_refresh=True, + require_purpose=False, + scope={"openid"}, + issuers=config.get("GA4GH_VISA_ISSUER_ALLOWLIST", []), + options={ + "require_iat": True, + "require_exp": True, + "verify_aud": False, + }, + ) + + if "sub" not in decoded_passport: + raise JWTError(f"Passport is missing the 'sub' claim") + except Exception as e: + logger.error("Passport failed validation: {}. Discarding passport.".format(e)) + # ignore malformed/invalid passports + return [] + + return decoded_passport.get("ga4gh_passport_v1", []) + + +def validate_visa(raw_visa, pkey_cache=None): + """ + Validate a raw visa in accordance with: + - GA4GH AAI spec (https://github.com/ga4gh/data-security/blob/master/AAI/AAIConnectProfile.md) + - GA4GH DURI spec (https://github.com/ga4gh-duri/ga4gh-duri.github.io/blob/master/researcher_ids/ga4gh_passport_v1.md) + + Args: + raw_visa (str): a raw, encoded visa including header, payload, and signature + + Return: + dict: the decoded payload if validation was successful. an exception + is raised if validation was unsuccessful + """ + if jwt.get_unverified_header(raw_visa).get("jku"): + raise Exception( + "Visa Document Tokens are not currently supported by passing " + '"jku" in the header. Only Visa Access Tokens are supported.' + ) + + logger.info("Attempting to validate visa") + + decoded_visa = validate_jwt( + raw_visa, + attempt_refresh=True, + scope={"openid", "ga4gh_passport_v1"}, + require_purpose=False, + issuers=config["GA4GH_VISA_ISSUER_ALLOWLIST"], + options={"require_iat": True, "require_exp": True, "verify_aud": False}, + pkey_cache=pkey_cache, + ) + logger.info(f'Visa jti: "{decoded_visa.get("jti", "")}"') + logger.info(f'Visa txn: "{decoded_visa.get("txn", "")}"') + + for claim in ["sub", "ga4gh_visa_v1"]: + if claim not in decoded_visa: + raise Exception(f'Visa does not contain REQUIRED "{claim}" claim') + + if "aud" in decoded_visa: + raise Exception('Visa MUST NOT contain "aud" claim') + + field_to_allowed_values = config["GA4GH_VISA_V1_CLAIM_REQUIRED_FIELDS"] + for field, allowed_values in field_to_allowed_values.items(): + if field not in decoded_visa["ga4gh_visa_v1"]: + raise Exception( + f'"ga4gh_visa_v1" claim does not contain REQUIRED "{field}" field' + ) + if decoded_visa["ga4gh_visa_v1"][field] not in allowed_values: + raise Exception( + f'{field}={decoded_visa["ga4gh_visa_v1"][field]} field in "ga4gh_visa_v1" is not equal to one of the allowed_values: {allowed_values}' + ) + + if "asserted" not in decoded_visa["ga4gh_visa_v1"]: + raise Exception( + '"ga4gh_visa_v1" claim does not contain REQUIRED "asserted" field' + ) + asserted = decoded_visa["ga4gh_visa_v1"]["asserted"] + if type(asserted) not in (int, float): + raise Exception( + '"ga4gh_visa_v1" claim object\'s "asserted" field\'s type is not ' + "JSON numeric" + ) + if decoded_visa["iat"] < asserted: + raise Exception( + "The Passport Visa Assertion Source made the claim after the visa " + 'was minted (i.e. "ga4gh_visa_v1" claim object\'s "asserted" ' + 'field is greater than the visa\'s "iat" claim)' + ) + + if "conditions" in decoded_visa["ga4gh_visa_v1"]: + logger.warning( + 'Condition checking is not yet supported, but a visa was received that contained the "conditions" field' + ) + if decoded_visa["ga4gh_visa_v1"]["conditions"]: + raise Exception('"conditions" field in "ga4gh_visa_v1" is not empty') + + logger.info("Visa was successfully validated") + return decoded_visa + + +def get_or_create_gen3_user_from_iss_sub(issuer, subject_id, db_session=None): + """ + Get a user from the Fence database corresponding to the visa identity + indicated by the combination. If a Fence user has + not yet been created for the given combination, + create and return such a user. + + Args: + issuer (str): the issuer of a given visa + subject_id (str): the subject of a given visa + + Return: + userdatamodel.user.User: the Fence user corresponding to issuer and subject_id + """ + db_session = db_session or current_session + logger.debug( + f"get_or_create_gen3_user_from_iss_sub: issuer: {issuer} & subject_id: {subject_id}" + ) + iss_sub_pair_to_user = db_session.query(IssSubPairToUser).get((issuer, subject_id)) + if not iss_sub_pair_to_user: + username = subject_id + issuer[len("https://") :] + gen3_user = query_for_user(session=db_session, username=username) + idp_name = IssSubPairToUser.ISSUER_TO_IDP.get(issuer) + logger.debug(f"issuer_to_idp: {IssSubPairToUser.ISSUER_TO_IDP}") + if not gen3_user: + gen3_user = create_user(db_session, logger, username, idp_name=idp_name) + if not idp_name: + logger.info( + f"The user (id:{gen3_user.id}) was created without a linked identity " + f"provider since it could not be determined based on " + f"the issuer {issuer}" + ) + + # ensure user has an associated identity provider + if not gen3_user.identity_provider: + idp = ( + db_session.query(IdentityProvider) + .filter(IdentityProvider.name == idp_name) + .first() + ) + if not idp: + idp = IdentityProvider(name=idp_name) + gen3_user.identity_provider = idp + + logger.info( + f'Mapping subject id ("{subject_id}") and issuer ' + f'("{issuer}") combination to Fence user ' + f'"{gen3_user.username}" with IdP = "{idp_name}"' + ) + iss_sub_pair_to_user = IssSubPairToUser(iss=issuer, sub=subject_id) + iss_sub_pair_to_user.user = gen3_user + + db_session.add(iss_sub_pair_to_user) + db_session.commit() + + return iss_sub_pair_to_user.user + + +def _sync_validated_visa_authorization( + gen3_user, ga4gh_visas, expiration, db_session=None +): + """ + Wrapper around UserSyncer.sync_single_user_visas method, which parses + authorization information from the provided visas, persists it in Fence, + and syncs it to Arborist. + + IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISAS. ENSURE THIS IS DONE + BEFORE THIS. + + Args: + gen3_user (userdatamodel.user.User): the Fence user whose visas' + authz info is being synced + ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects + that are parsed + expiration (int): time at which synced Arborist policies and + inclusion in any GBAG are set to expire + + Return: + None + """ + db_session = db_session or current_session + default_args = fence.scripting.fence_create.get_default_init_syncer_inputs( + authz_provider="GA4GH" + ) + syncer = fence.scripting.fence_create.init_syncer(**default_args) + + synced_visas = syncer.sync_single_user_visas( + gen3_user, + ga4gh_visas, + db_session, + expires=expiration, + ) + + # after syncing authorization, persist the visas that were parsed successfully. + for visa in ga4gh_visas: + if visa not in synced_visas: + logger.debug(f"deleting visa with id={visa.id} from db session") + db_session.delete(visa) + else: + logger.debug(f"adding visa with id={visa.id} to db session") + db_session.add(visa) + + +def get_gen3_usernames_for_passport_from_cache(passport, db_session=None): + """ + Attempt to retrieve a cached list of users ids for a previously validated and + non-expired passport. + + Args: + passport (str): ga4gh encoded passport JWT + db_session (None, sqlalchemy session): optional database session to use + + Returns: + list[str]: list of usernames for users referred to by the previously validated + and non-expired passport + """ + db_session = db_session or current_session + user_ids_from_passports = None + current_time = int(time.time()) + + passport_hash = hashlib.sha256(passport.encode("utf-8")).hexdigest() + + # try to retrieve from local in-memory cache + if passport_hash in PASSPORT_CACHE: + user_ids_from_passports, expires = PASSPORT_CACHE[passport_hash] + if expires > current_time: + logger.debug( + f"Got users {user_ids_from_passports} for provided passport from in-memory cache. " + f"Expires: {expires}, Current Time: {current_time}" + ) + return user_ids_from_passports + else: + # expired, so remove it + del PASSPORT_CACHE[passport_hash] + + # try to retrieve from database cache + cached_passport = ( + db_session.query(GA4GHPassportCache) + .filter(GA4GHPassportCache.passport_hash == passport_hash) + .first() + ) + if cached_passport: + if cached_passport.expires_at > current_time: + user_ids_from_passports = cached_passport.user_ids + + # update local cache + PASSPORT_CACHE[passport_hash] = ( + user_ids_from_passports, + cached_passport.expires_at, + ) + + logger.debug( + f"Got users {user_ids_from_passports} for provided passport from " + f"database cache and placed in in-memory cache. " + f"Expires: {cached_passport.expires_at}, Current Time: {current_time}" + ) + return user_ids_from_passports + else: + # expired, so delete it + db_session.delete(cached_passport) + db_session.commit() + + return user_ids_from_passports + + +def put_gen3_usernames_for_passport_into_cache( + passport, user_ids_from_passports, expires_at, db_session=None +): + """ + Cache a validated and non-expired passport and map to the user_ids referenced + by the content. + + Args: + passport (str): ga4gh encoded passport JWT + db_session (None, sqlalchemy session): optional database session to use + user_ids_from_passports (list[str]): list of user identifiers referred to by + the previously validated and non-expired passport + expires_at (int): expiration time in unix time + """ + db_session = db_session or current_session + + passport_hash = hashlib.sha256(passport.encode("utf-8")).hexdigest() + + # stores back to cache and db + PASSPORT_CACHE[passport_hash] = user_ids_from_passports, expires_at + + db_session.execute( + """\ + INSERT INTO ga4gh_passport_cache ( + passport_hash, + expires_at, + user_ids + ) VALUES ( + :passport_hash, + :expires_at, + :user_ids + ) ON CONFLICT (passport_hash) DO UPDATE SET + expires_at = EXCLUDED.expires_at, + user_ids = EXCLUDED.user_ids;""", + dict( + passport_hash=passport_hash, + expires_at=expires_at, + user_ids=user_ids_from_passports, + ), + ) + + logger.debug( + f"Cached users {user_ids_from_passports} for provided passport in " + f"database cache and placed in in-memory cache. " + f"Expires: {expires_at}" + ) + + +# TODO to be called after login +def map_gen3_iss_sub_pair_to_user(gen3_issuer, gen3_subject_id, gen3_user): + pass diff --git a/fence/resources/google/utils.py b/fence/resources/google/utils.py index c6b7ed455..adeb0983a 100644 --- a/fence/resources/google/utils.py +++ b/fence/resources/google/utils.py @@ -28,6 +28,8 @@ UserServiceAccount, ServiceAccountAccessPrivilege, ServiceAccountToGoogleBucketAccessGroup, + query_for_user, + query_for_user_by_id, ) from fence.resources.google import STORAGE_ACCESS_PROVIDER_NAME from fence.errors import NotSupported, NotFound @@ -88,7 +90,9 @@ def _get_primary_service_account_key(user_id, username, proxy_group_id): user_service_account_key = None # Note that client_id is None, which is how we store the user's SA - user_google_service_account = get_service_account(client_id=None, user_id=user_id) + user_google_service_account = get_service_account( + client_id=None, user_id=user_id, username=username + ) if user_google_service_account: user_service_account_key = ( @@ -373,7 +377,7 @@ def add_custom_service_account_key_expiration( current_session.commit() -def get_service_account(client_id, user_id): +def get_service_account(client_id, user_id, username): """ Return the service account (from Fence db) for given client. @@ -386,11 +390,56 @@ def get_service_account(client_id, user_id): Returns: fence.models.GoogleServiceAccount: Client's service account """ - service_account = ( + service_accounts = ( current_session.query(GoogleServiceAccount) .filter_by(client_id=client_id, user_id=user_id) - .first() + .all() ) + if len(service_accounts) == 1: + return service_accounts[0] + + # in rare cases there's a possible that 2 SA's exist for 1 user that haven't + # been cleaned up yet. This happens when a users username is changed. To ensure + # getting the newest SA, we need to check for the SA ID based off the current + # username + service_account = None + + # determine expected SA name based off username + if client_id: + service_account_id = get_valid_service_account_id_for_client( + client_id, user_id, prefix=config["GOOGLE_SERVICE_ACCOUNT_PREFIX"] + ) + else: + service_account_id = get_valid_service_account_id_for_user( + user_id, username, prefix=config["GOOGLE_SERVICE_ACCOUNT_PREFIX"] + ) + + for sa in service_accounts: + if service_account_id in sa.email: + service_account = sa + else: + logger.info( + "Found Google Service Account using invalid/old name: " + "{}. Removing from db. Keys should still have access in Google until " + "cronjob removes them (e.g. fence-create google-manage-keys). NOTE: " + "the SA will still exist in Google but fence will use new SA {} for " + "new keys.".format(sa.email, service_account_id) + ) + + old_service_account_keys_db_entries = ( + current_session.query(GoogleServiceAccountKey) + .filter(GoogleServiceAccountKey.service_account_id == sa.id) + .all() + ) + + # remove the keys then the sa itself from db + for old_key in old_service_account_keys_db_entries: + current_session.delete(old_key) + + # commit the deletion of keys first, then do SA deletion + current_session.commit() + current_session.delete(sa) + current_session.commit() return service_account @@ -515,18 +564,40 @@ def _update_service_account_db_entry( return service_account_db_entry -def get_or_create_proxy_group_id(): +def get_or_create_proxy_group_id(expires=None, user_id=None, username=None): """ If no username returned from token or database, create a new proxy group - for the give user. Also, add the access privileges. + for the given user. Also, add the access privileges. Returns: int: id of (possibly newly created) proxy group associated with user """ - proxy_group_id = _get_proxy_group_id() + proxy_group_id = _get_proxy_group_id(user_id=user_id, username=username) if not proxy_group_id: - user_id = current_token["sub"] - username = current_token.get("context", {}).get("user", {}).get("name", "") + try: + user_by_id = query_for_user_by_id(current_session, user_id) + user_by_username = query_for_user( + session=current_session, username=username + ) + except Exception: + user_by_id = None + user_by_username = None + + if user_by_id: + user_id = user_id + username = user_by_id.username + elif user_by_username: + user_id = user_by_username.id + username = username + elif current_token: + user_id = current_token["sub"] + username = current_token.get("context", {}).get("user", {}).get("name", "") + else: + raise Exception( + f"could not find user given input user_id={user_id} or " + f"username={username}, nor was there a current_token" + ) + proxy_group_id = _create_proxy_group(user_id, username).id privileges = current_session.query(AccessPrivilege).filter( @@ -551,12 +622,13 @@ def get_or_create_proxy_group_id(): project=p.project, access=p.privilege, session=current_session, + expires=expires, ) return proxy_group_id -def _get_proxy_group_id(): +def _get_proxy_group_id(user_id=None, username=None): """ Get users proxy group id from the current token, if possible. Otherwise, check the database for it. @@ -567,10 +639,17 @@ def _get_proxy_group_id(): proxy_group_id = get_users_proxy_group_from_token() if not proxy_group_id: - user = ( - current_session.query(User).filter(User.id == current_token["sub"]).first() - ) - proxy_group_id = user.google_proxy_group_id + user_id = user_id or current_token["sub"] + + try: + user = query_for_user_by_id(current_session, user_id) + if not user: + user = query_for_user(current_session, username) + except Exception: + user = None + + if user: + proxy_group_id = user.google_proxy_group_id return proxy_group_id diff --git a/fence/resources/openid/idp_oauth2.py b/fence/resources/openid/idp_oauth2.py index 219b47bc9..1dd9c572b 100644 --- a/fence/resources/openid/idp_oauth2.py +++ b/fence/resources/openid/idp_oauth2.py @@ -180,9 +180,8 @@ def get_user_id(self, code): return {"error": f"Can't get user info from {self.idp}"} def get_access_token(self, user, token_endpoint, db_session=None): - """ - Get access_token using a refresh_token and store it in upstream_refresh_token table. + Get access_token using a refresh_token and store new refresh in upstream_refresh_token table. """ refresh_token = None expires = None @@ -193,7 +192,7 @@ def get_access_token(self, user, token_endpoint, db_session=None): expires = row.expires if not refresh_token: - raise AuthError("User doesnt have a refresh token") + raise AuthError("User doesn't have a refresh token") if time.time() > expires: raise AuthError("Refresh token expired. Please login again.") diff --git a/fence/resources/openid/ras_oauth2.py b/fence/resources/openid/ras_oauth2.py index 27fa08e93..fd739576d 100644 --- a/fence/resources/openid/ras_oauth2.py +++ b/fence/resources/openid/ras_oauth2.py @@ -1,21 +1,33 @@ import backoff -import base64 import flask -import httpx +import copy import requests + +# the whole passports module is imported to avoid issue with circular imports +import fence.resources.ga4gh.passports +import fence.scripting.fence_create +import fence.resources.ga4gh.passports + from flask_sqlalchemy_session import current_session from jose import jwt as jose_jwt from authutils.errors import JWTError -from authutils.token.core import get_iss, get_keys_url, get_kid, validate_jwt -from authutils.token.keys import get_public_key_for_token -from cryptography.hazmat.backends import default_backend -from cryptography.hazmat.primitives import serialization -from cryptography.hazmat.primitives.asymmetric import rsa +from authutils.token.core import get_iss, get_kid +from gen3authz.client.arborist.errors import ArboristError + from fence.config import config -from fence.models import GA4GHVisaV1 +from fence.models import ( + GA4GHVisaV1, + IdentityProvider, + User, + IssSubPairToUser, + query_for_user, + create_user, +) +from fence.jwt.validate import validate_jwt from fence.utils import DEFAULT_BACKOFF_SETTINGS +from fence.errors import InternalError from .idp_oauth2 import Oauth2ClientBase @@ -75,78 +87,35 @@ def get_userinfo(self, token): return {} return res.json() - def get_encoded_visas_v11_userinfo(self, userinfo, pkey_cache=None): + def get_encoded_passport_v11_userinfo(self, userinfo): """ - Return encoded visas after extracting and validating passport from userinfo respoonse + Return encoded passport after extracting from userinfo response Args: userinfo (dict): userinfo response - pkey_cache (dict): app cache of public keys_dir Return: - list: list of encoded GA4GH visas + str: encoded ga4gh passport """ - decoded_passport = {} - encoded_passport = userinfo.get("passport_jwt_v11") - passport_issuer, passport_kid = None, None + return userinfo.get("passport_jwt_v11") - if not pkey_cache: - pkey_cache = {} + def get_encoded_visas_v11_userinfo(self, userinfo, pkey_cache=None): + """ + Return encoded visas after extracting and validating passport from userinfo response - try: - passport_issuer = get_iss(encoded_passport) - passport_kid = get_kid(encoded_passport) - except Exception as e: - self.logger.error( - "Could not get issuer or kid from passport: {}. Discarding passport.".format( - e - ) - ) + Args: + userinfo (dict): userinfo response + pkey_cache (dict): app cache of public keys_dir - public_key = pkey_cache.get(passport_issuer, {}).get(passport_kid) - if not public_key: - try: - self.logger.info("Fetching public key from flask app...") - public_key = get_public_key_for_token( - encoded_passport, attempt_refresh=True - ) - except Exception as e: - self.logger.info( - "Could not fetch public key from flask app to validate passport: {}. Trying to fetch from source.".format( - e - ) - ) - try: - self.logger.info("Trying to Fetch public keys from JWKs url...") - public_key = self.refresh_cronjob_pkey_cache( - passport_issuer, passport_kid, pkey_cache - ) - except Exception as e: - self.logger.info( - "Could not fetch public key from JWKs key url: {}".format(e) - ) - if not public_key: - self.logger.error( - "Could not fetch public key to validate visa: Successfully fetched " - "issuer's keys but did not find the visa's key id among them. Discarding visa." - ) - try: - decoded_passport = validate_jwt( - encoded_passport, - public_key, - aud=None, - scope={"openid"}, - issuers=config.get("GA4GH_VISA_ISSUER_ALLOWLIST", []), - options={ - "require_iat": True, - "require_exp": True, - }, - ) - except Exception as e: - self.logger.error( - "Passport failed validation: {}. Discarding passport.".format(e) + Return: + list: list of encoded GA4GH visas + """ + encoded_passport = self.get_encoded_passport_v11_userinfo(userinfo) + return ( + fence.resources.ga4gh.passports.get_unvalidated_visas_from_valid_passport( + encoded_passport, pkey_cache ) - return decoded_passport.get("ga4gh_passport_v1", []) + ) def get_user_id(self, code): @@ -191,11 +160,24 @@ def get_user_id(self, code): self.logger.info("Using {} field as username.".format(field_name)) + email = userinfo.get("email") + issuer = self.get_value_from_discovery_doc("issuer", "") + subject_id = userinfo.get("sub") + if not issuer or not subject_id: + err_msg = "Could not determine both issuer and subject id" + self.logger.error(err_msg) + return {"error": err_msg} + username = self.map_iss_sub_pair_to_user( + issuer, subject_id, username, email + ) + # Save userinfo and token in flask.g for later use in post_login flask.g.userinfo = userinfo flask.g.tokens = token flask.g.keys = keys + except InternalError: + raise except Exception as e: self.logger.exception("{}: {}".format(err_msg, e)) return {"error": err_msg} @@ -206,178 +188,133 @@ def get_user_id(self, code): "sub": userinfo.get("sub"), } - def refresh_cronjob_pkey_cache(self, issuer, kid, pkey_cache): + def map_iss_sub_pair_to_user( + self, issuer, subject_id, username, email, db_session=None + ): """ - Update app public key cache for a specific Passport Visa issuer + Map combination to a Fence user whose username + equals the username argument passed into this function. + + One exception to this is when two Fence users exist who both + correspond to the user who is trying to log in. Please see logged + warning for more details. Args: - issuer(str): Passport Visa issuer. Can be found under `issuer` in a Passport or a Visa - kid(str): Passsport Visa kid. Can be found in the header of an encoded Passport or encoded Visa - pkey_cache (dict): app cache of public keys_dir + issuer (str): issuer + subject_id (str): subject + username (str): username of the Fence user who is being mapped to + email (str): email to populate the mapped Fence user with in cases + when this function creates the mapped user or changes + its username Return: - dict: public key for given issuer + str: username that should be logged in. this will be equal to + username that was passed in in all cases except for the + exception noted above """ - jwks_url = get_keys_url(issuer) - try: - jwt_public_keys = httpx.get(jwks_url).json()["keys"] - except Exception as e: - raise JWTError( - "Could not get public key to validate Passport/Visa: Could not fetch keys from JWKs url: {}".format( - e + db_session = db_session or current_session + iss_sub_pair_to_user = db_session.query(IssSubPairToUser).get( + (issuer, subject_id) + ) + user = query_for_user(db_session, username) + if iss_sub_pair_to_user: + if not user: + self.logger.info( + f'Issuer ("{issuer}") and subject id ("{subject_id}") ' + "have already been mapped to a Fence user " + f'("{iss_sub_pair_to_user.user.username}") created ' + "from the DRS endpoint. Changing said user's username" + f' to "{username}".' ) - ) - issuer_public_keys = {} - try: - for key in jwt_public_keys: - if "kty" in key and key["kty"] == "RSA": - self.logger.debug( - "Serializing RSA public key (kid: {}) to PEM format.".format( - key["kid"] + tries = 2 + for i in range(tries): + try: + flask.current_app.arborist.update_user( + iss_sub_pair_to_user.user.username, + new_username=username, + new_email=email, ) - ) - # Decode public numbers https://tools.ietf.org/html/rfc7518#section-6.3.1 - n_padded_bytes = base64.urlsafe_b64decode( - key["n"] + "=" * (4 - len(key["n"]) % 4) - ) - e_padded_bytes = base64.urlsafe_b64decode( - key["e"] + "=" * (4 - len(key["e"]) % 4) - ) - n = int.from_bytes(n_padded_bytes, "big", signed=False) - e = int.from_bytes(e_padded_bytes, "big", signed=False) - # Serialize and encode public key--PyJWT decode/validation requires PEM - rsa_public_key = rsa.RSAPublicNumbers(e, n).public_key( - default_backend() - ) - public_bytes = rsa_public_key.public_bytes( - serialization.Encoding.PEM, - serialization.PublicFormat.SubjectPublicKeyInfo, - ) - # Cache the encoded key by issuer - issuer_public_keys[key["kid"]] = public_bytes - else: - self.logger.debug( - "Key type (kty) is not 'RSA'; assuming PEM format. " - "Skipping key serialization. (kid: {})".format(key[0]) - ) - issuer_public_keys[key[0]] = key[1] - - pkey_cache.update({issuer: issuer_public_keys}) - self.logger.info( - "Refreshed cronjob pkey cache for Passport/Visa issuer {}".format( - issuer - ) - ) - except Exception as e: - self.logger.error( - "Could not refresh cronjob pkey cache for issuer {}: " - "Something went wrong during serialization: {}. Discarding Passport/Visa.".format( - issuer, e + except ArboristError as e: + self.logger.warning( + f"Try {i+1}: could not update user's username in Arborist: {e}" + ) + if i == tries - 1: + err_msg = f"Failed to update user's username in Arborist after {tries} tries" + self.logger.exception(err_msg) + raise InternalError(err_msg) + else: + self.logger.info( + "Successfully changed Arborist user's username from " + f'"{iss_sub_pair_to_user.user.username}" to "{username}"' + ) + break + + iss_sub_pair_to_user.user.username = username + if email: + iss_sub_pair_to_user.user.email = email + db_session.commit() + elif iss_sub_pair_to_user.user.username != username: + self.logger.warning( + "Two users exist in the Fence database corresponding " + "to the user who is currently trying to log in: one " + f'created from an earlier login ("{username}") and ' + f"one created from the DRS endpoint " + f'("{iss_sub_pair_to_user.user.username}"). ' + f'"{iss_sub_pair_to_user.user.username}" will be ' + f'logged in, rendering "{username}" inaccessible.' ) + return iss_sub_pair_to_user.user.username + + if not user: + user = create_user( + db_session, + self.logger, + username, + email=email, + idp_name=IdentityProvider.ras, ) - return pkey_cache.get(issuer, {}).get(kid) + self.logger.info( + f'Mapping issuer ("{issuer}") and subject id ("{subject_id}") ' + f'combination to Fence user "{user.username}"' + ) + iss_sub_pair_to_user = IssSubPairToUser(iss=issuer, sub=subject_id) + iss_sub_pair_to_user.user = user + db_session.add(iss_sub_pair_to_user) + db_session.commit() + return iss_sub_pair_to_user.user.username @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS) - def update_user_visas(self, user, pkey_cache, db_session=current_session): + def update_user_authorization(self, user, pkey_cache, db_session=current_session): """ Updates user's RAS refresh token and uses the new access token to retrieve new visas from - RAS's /userinfo endpoint and update the db with the new visa. - - delete user's visas from db if we're not able to get a new access_token - - delete user's visas from db if we're not able to get new visas - - only visas which pass validation are added to the database + RAS's /userinfo endpoint and update access """ - # Note: in the cronjob this is called per-user per-visa. - # So it should be noted that when there are more clients than just RAS, - # this code as it stands can remove visas that the user has from other clients. - user.ga4gh_visas_v1 = [] - db_session.commit() - try: token_endpoint = self.get_value_from_discovery_doc("token_endpoint", "") + + # this get_access_token also persists the refresh token in the db token = self.get_access_token(user, token_endpoint, db_session) userinfo = self.get_userinfo(token) - encoded_visas = self.get_encoded_visas_v11_userinfo(userinfo, pkey_cache) - + passport = self.get_encoded_passport_v11_userinfo(userinfo) except Exception as e: err_msg = "Could not retrieve visas" self.logger.exception("{}: {}".format(err_msg, e)) raise - for encoded_visa in encoded_visas: - try: - visa_issuer = get_iss(encoded_visa) - visa_kid = get_kid(encoded_visa) - except Exception as e: - self.logger.error( - "Could not get issuer or kid from visa: {}. Discarding visa.".format( - e - ) - ) - continue # Not raise: If visa malformed, does not make sense to retry - - # See if pkey is in cronjob cache; if not, update cache. - public_key = pkey_cache.get(visa_issuer, {}).get(visa_kid) - if not public_key: - try: - public_key = self.refresh_cronjob_pkey_cache( - visa_issuer, visa_kid, pkey_cache - ) - except Exception as e: - self.logger.error( - "Could not refresh public key cache: {}".format(e) - ) - continue - if not public_key: - self.logger.error( - "Could not get public key to validate visa: Successfully fetched " - "issuer's keys but did not find the visa's key id among them. Discarding visa." - ) - continue # Not raise: If issuer not publishing pkey, does not make sense to retry - - try: - # Validate the visa per GA4GH AAI "Embedded access token" format rules. - # pyjwt also validates signature and expiration. - decoded_visa = validate_jwt( - encoded_visa, - public_key, - # Embedded token must not contain aud claim - aud=None, - # Embedded token must contain scope claim, which must include openid - scope={"openid"}, - issuers=config.get("GA4GH_VISA_ISSUER_ALLOWLIST", []), - # Embedded token must contain iss, sub, iat, exp claims - # options={"require": ["iss", "sub", "iat", "exp"]}, - # ^ FIXME 2021-05-13: Above needs pyjwt>=v2.0.0, which requires cryptography>=3. - # Once we can unpin and upgrade cryptography and pyjwt, switch to above "options" arg. - # For now, pyjwt 1.7.1 is able to require iat and exp; - # authutils' validate_jwt (i.e. the function being called) checks issuers already (see above); - # and we will check separately for sub below. - options={ - "require_iat": True, - "require_exp": True, - }, - ) - - # Also require 'sub' claim (see note above about pyjwt and the options arg). - if "sub" not in decoded_visa: - raise JWTError("Visa is missing the 'sub' claim.") - except Exception as e: - self.logger.error( - "Visa failed validation: {}. Discarding visa.".format(e) - ) - continue - - visa = GA4GHVisaV1( - user=user, - source=decoded_visa["ga4gh_visa_v1"]["source"], - type=decoded_visa["ga4gh_visa_v1"]["type"], - asserted=int(decoded_visa["ga4gh_visa_v1"]["asserted"]), - expires=int(decoded_visa["exp"]), - ga4gh_visa=encoded_visa, + # now sync authz updates (this includes persisting new valid visas into the + # database) + users_from_passports = ( + fence.resources.ga4gh.passports.sync_gen3_users_authz_from_ga4gh_passports( + [passport], + pkey_cache=pkey_cache, + db_session=db_session, ) + ) + user_ids_from_passports = list(users_from_passports.keys()) - current_db_session = db_session.object_session(visa) - current_db_session.add(visa) - db_session.commit() + # TODO? + # put_gen3_usernames_for_passport_into_cache( + # passport, usernames_from_current_passport + # ) diff --git a/fence/resources/storage/__init__.py b/fence/resources/storage/__init__.py index a9873875c..acb402739 100644 --- a/fence/resources/storage/__init__.py +++ b/fence/resources/storage/__init__.py @@ -151,7 +151,14 @@ def create_bucket(self, provider, session, bucketname, project): @check_exist def grant_access( - self, provider, username, project, access, session, google_bulk_mapping=None + self, + provider, + username, + project, + access, + session, + google_bulk_mapping=None, + expires=None, ): """ this should be exposed via admin endpoint @@ -176,6 +183,7 @@ def grant_access( access, session, google_bulk_mapping=google_bulk_mapping, + expires=expires, ) @check_exist @@ -369,6 +377,7 @@ def _update_access_to_bucket( access, session, google_bulk_mapping=None, + expires=None, ): # Need different logic for google (since buckets can have multiple # access groups) @@ -412,7 +421,7 @@ def _update_access_to_bucket( ) StorageManager._add_google_db_entry_for_bucket_access( - storage_user, bucket_access_group, session + storage_user, bucket_access_group, session, expires=expires ) else: @@ -489,7 +498,7 @@ def _revoke_access_to_bucket( @staticmethod def _add_google_db_entry_for_bucket_access( - storage_user, bucket_access_group, session + storage_user, bucket_access_group, session, expires=None ): """ Add a db entry specifying that a given user has storage access @@ -507,9 +516,15 @@ def _add_google_db_entry_for_bucket_access( storage_user_access_db_entry = GoogleProxyGroupToGoogleBucketAccessGroup( proxy_group_id=storage_user.google_proxy_group_id, access_group_id=bucket_access_group.id, + expires=expires, ) session.add(storage_user_access_db_entry) session.commit() + # update expiration if doesn't match db + elif expires != storage_user_access_db_entry.expires: + storage_user_access_db_entry.expires = expires + session.add(storage_user_access_db_entry) + session.commit() # FIXME: create a delete() on GoogleProxyGroupToGoogleBucketAccessGroup and use here. # previous attempts to use similar delete() calls on other models resulting in errors diff --git a/fence/resources/user/__init__.py b/fence/resources/user/__init__.py index 50b6f3d65..0cf883548 100644 --- a/fence/resources/user/__init__.py +++ b/fence/resources/user/__init__.py @@ -105,7 +105,10 @@ def get_user_info(current_session, username): info["shib_idp"] = flask.session["shib_idp"] # User SAs are stored in db with client_id = None - primary_service_account = get_service_account(client_id=None, user_id=user.id) or {} + primary_service_account = ( + get_service_account(client_id=None, user_id=user.id, username=user.username) + or {} + ) primary_service_account_email = getattr(primary_service_account, "email", None) info["primary_google_service_account"] = primary_service_account_email diff --git a/fence/scripting/fence_create.py b/fence/scripting/fence_create.py index 9146613ba..23545646d 100644 --- a/fence/scripting/fence_create.py +++ b/fence/scripting/fence_create.py @@ -5,7 +5,6 @@ import json import pprint import asyncio - from cirrus import GoogleCloudManager from cirrus.google_cloud.errors import GoogleAuthError from cirrus.config import config as cirrus_config @@ -24,6 +23,7 @@ User, ProjectToBucket, ) +from sqlalchemy import and_ from fence.blueprints.link import ( force_update_user_google_account_expiration, @@ -48,12 +48,15 @@ ServiceAccountToGoogleBucketAccessGroup, query_for_user, migrate, + GA4GHVisaV1, ) from fence.scripting.google_monitor import email_users_without_access, validation_check from fence.config import config from fence.sync.sync_users import UserSyncer from fence.utils import create_client, get_valid_expiration +from gen3authz.client.arborist.client import ArboristClient + logger = get_logger(__name__) @@ -200,6 +203,33 @@ def _remove_client_service_accounts(db_session, client): ) +def get_default_init_syncer_inputs(authz_provider): + DB = os.environ.get("FENCE_DB") or config.get("DB") + if DB is None: + try: + from fence.settings import DB + except ImportError: + pass + + arborist = ArboristClient( + arborist_base_url=config["ARBORIST"], + logger=get_logger("user_syncer.arborist_client"), + authz_provider=authz_provider, + ) + dbGaP = os.environ.get("dbGaP") or config.get("dbGaP") + if not isinstance(dbGaP, list): + dbGaP = [dbGaP] + + storage_creds = config["STORAGE_CREDENTIALS"] + + return { + "DB": DB, + "arborist": arborist, + "dbGaP": dbGaP, + "STORAGE_CREDENTIALS": storage_creds, + } + + def init_syncer( dbGaP, STORAGE_CREDENTIALS, @@ -210,8 +240,6 @@ def init_syncer( sync_from_local_yaml_file=None, arborist=None, folder=None, - sync_from_visas=False, - fallback_to_dbgap_sftp=False, ): """ sync ACL files from dbGap to auth db and storage backends @@ -270,8 +298,6 @@ def init_syncer( sync_from_local_yaml_file=sync_from_local_yaml_file, arborist=arborist, folder=folder, - sync_from_visas=sync_from_visas, - fallback_to_dbgap_sftp=fallback_to_dbgap_sftp, ) @@ -313,8 +339,6 @@ def sync_users( sync_from_local_yaml_file=None, arborist=None, folder=None, - sync_from_visas=False, - fallback_to_dbgap_sftp=False, ): syncer = init_syncer( dbGaP, @@ -326,15 +350,10 @@ def sync_users( sync_from_local_yaml_file, arborist, folder, - sync_from_visas, - fallback_to_dbgap_sftp, ) if not syncer: exit(1) - if sync_from_visas: - syncer.sync_visas() - else: - syncer.sync() + syncer.sync() def create_sample_data(DB, yaml_file_path): @@ -674,6 +693,104 @@ def delete_users(DB, usernames): session.commit() +def cleanup_expired_ga4gh_information(DB): + """ + Remove any expired passports/visas from the database if they're expired. + + IMPORTANT NOTE: This DOES NOT actually remove authorization, it assumes that the + same expiration was set and honored in the authorization system. + """ + driver = SQLAlchemyDriver(DB) + with driver.session as session: + current_time = int(time.time()) + + # Get expires field from db, if None default to NOT expired + records_to_delete = ( + session.query(GA4GHVisaV1) + .filter( + and_( + GA4GHVisaV1.expires.isnot(None), + GA4GHVisaV1.expires < current_time, + ) + ) + .all() + ) + num_deleted_records = 0 + if records_to_delete: + for record in records_to_delete: + try: + session.delete(record) + session.commit() + + num_deleted_records += 1 + except Exception as e: + logger.error( + "ERROR: Could not remove GA4GHVisaV1 with id={}. Detail {}".format( + record.id, e + ) + ) + + logger.info( + f"Removed {num_deleted_records} expired GA4GHVisaV1 records from db." + ) + + +def delete_expired_google_access(DB): + """ + Delete all expired Google data access (e.g. remove proxy groups from Google Bucket + Access Groups if expired). + """ + cirrus_config.update(**config["CIRRUS_CFG"]) + + driver = SQLAlchemyDriver(DB) + with driver.session as session: + current_time = int(time.time()) + + # Get expires field from db, if None default to NOT expired + records_to_delete = ( + session.query(GoogleProxyGroupToGoogleBucketAccessGroup) + .filter( + and_( + GoogleProxyGroupToGoogleBucketAccessGroup.expires.isnot(None), + GoogleProxyGroupToGoogleBucketAccessGroup.expires < current_time, + ) + ) + .all() + ) + num_deleted_records = 0 + if records_to_delete: + with GoogleCloudManager() as manager: + for record in records_to_delete: + try: + member_email = record.proxy_group.email + access_group_email = record.access_group.email + manager.remove_member_from_group( + member_email, access_group_email + ) + logger.info( + "Removed {} from {}, expired {}. Current time: {} ".format( + member_email, + access_group_email, + record.expires, + current_time, + ) + ) + session.delete(record) + session.commit() + + num_deleted_records += 1 + except Exception as e: + logger.error( + "ERROR: Could not remove Google group member {} from access group {}. Detail {}".format( + member_email, access_group_email, e + ) + ) + + logger.info( + f"Removed {num_deleted_records} expired Google Access records from db and Google." + ) + + def delete_expired_service_accounts(DB): """ Delete all expired service accounts. @@ -1517,7 +1634,7 @@ def google_list_authz_groups(db): return google_authz -def update_user_visas( +def access_token_polling_job( db, chunk_size=None, concurrency=None, thread_pool_size=None, buffer_size=None ): """ diff --git a/fence/sync/passport_sync/ras_sync.py b/fence/sync/passport_sync/ras_sync.py index 9ab5fd5fe..134e2f9fc 100644 --- a/fence/sync/passport_sync/ras_sync.py +++ b/fence/sync/passport_sync/ras_sync.py @@ -9,24 +9,25 @@ class RASVisa(DefaultVisa): Class representing RAS visas """ - def _init__(self, logger): + def __init__(self, logger): super(RASVisa, self).__init__( logger=logger, ) - def _parse_single_visa( - self, user, encoded_visa, expires, parse_consent_code, db_session - ): + def _parse_single_visa(self, user, encoded_visa, expires, parse_consent_code): + """ + Return user information from the visa. + + IMPORTANT NOTE: THIS DOES NOT VALIDATE THE ENCODED VISA. ENSURE THIS IS DONE + BEFORE THIS. + """ decoded_visa = {} - try: - decoded_visa = jwt.decode(encoded_visa, verify=False) - except Exception as e: - self.logger.warning("Couldn't decode visa {}".format(e)) - # Remove visas if its invalid or expired - user.ga4gh_visas_v1 = [] - db_session.commit() - finally: - ras_dbgap_permissions = decoded_visa.get("ras_dbgap_permissions", []) + + # do not verify again, assume this happens upstream + # note that this can fail, upstream should handle the case that parsing fails + decoded_visa = jwt.decode(encoded_visa, verify=False) + + ras_dbgap_permissions = decoded_visa.get("ras_dbgap_permissions", []) project = {} info = {} info["tags"] = {} @@ -34,15 +35,31 @@ def _parse_single_visa( if time.time() < expires: for permission in ras_dbgap_permissions: phsid = permission.get("phs_id", "") - version = permission.get("version", "") - participant_set = permission.get("participant_set", "") consent_group = permission.get("consent_group", "") - full_phsid = phsid - if parse_consent_code and consent_group: - full_phsid += "." + consent_group - privileges = {"read-storage", "read"} - project[full_phsid] = privileges - info["tags"] = {"dbgap_role": permission.get("role", "")} + + if not phsid or not consent_group: + self.logger.error( + f"cannot determine visa permission for phsid {phsid} " + f"and consent_group {consent_group}. Ignoring this permission." + ) + else: + full_phsid = str(phsid) + if parse_consent_code and consent_group: + full_phsid += "." + str(consent_group) + privileges = {"read-storage", "read"} + + permission_expiration = None + try: + permission_expiration = int(permission.get("expiration", 0)) + except Exception as exc: + self.logger.error( + f"cannot determine visa expiration for {full_phsid} " + f"from: {permission.get('expiration')}. Ignoring this permission." + ) + + if permission_expiration and expires <= permission_expiration: + project[full_phsid] = privileges + info["tags"] = {"dbgap_role": permission.get("role", "")} else: # Remove visas if its invalid or expired user.ga4gh_visas_v1 = [] diff --git a/fence/sync/sync_users.py b/fence/sync/sync_users.py index 8ec52d28b..7f8f26f75 100644 --- a/fence/sync/sync_users.py +++ b/fence/sync/sync_users.py @@ -5,6 +5,10 @@ import subprocess as sp import yaml import copy +import datetime +import uuid +import collections +import hashlib from contextlib import contextmanager from collections import defaultdict @@ -31,6 +35,8 @@ User, query_for_user, Client, + IdentityProvider, + get_project_to_authz_mapping, ) from fence.resources.storage import StorageManager from fence.resources.google.access_utils import bulk_update_google_groups @@ -277,6 +283,24 @@ def from_file(cls, filepath, encrypted=True, key=None, logger=None): logger=logger, ) + def persist_project_to_resource(self, db_session): + """ + Store the mappings from Project.auth_id to authorization resource (Project.authz) + + The mapping comes from an external source, this function persists what was parsed + into memory into the database for future use. + """ + for auth_id, authz_resource in self.project_to_resource.items(): + project = ( + db_session.query(Project).filter(Project.auth_id == auth_id).first() + ) + if project: + project.authz = authz_resource + else: + project = Project(name=auth_id, auth_id=auth_id, authz=authz_resource) + db_session.add(project) + db_session.commit() + class UserSyncer(object): def __init__( @@ -291,8 +315,6 @@ def __init__( sync_from_local_yaml_file=None, arborist=None, folder=None, - sync_from_visas=False, - fallback_to_dbgap_sftp=False, ): """ Syncs ACL files from dbGap to auth database and storage backends @@ -307,8 +329,6 @@ def __init__( ArboristClient instance if the syncer should also create resources in arborist folder: a local folder where dbgap telemetry files will sync to - sync_from_visas: use visa for sync instead of dbgap - fallback_to_dbgap_sftp: fallback to telemetry files when visa sync fails """ self.sync_from_local_csv_dir = sync_from_local_csv_dir self.sync_from_local_yaml_file = sync_from_local_yaml_file @@ -327,8 +347,6 @@ def __init__( ) self.arborist_client = arborist self.folder = folder - self.sync_from_visas = sync_from_visas - self.fallback_to_dbgap_sftp = fallback_to_dbgap_sftp self.auth_source = defaultdict(set) # auth_source used for logging. username : [source1, source2] @@ -593,7 +611,7 @@ def _add_dbgap_project_for_user( # need to add dbgap project to arborist if self.arborist_client: - self._add_dbgap_study_to_arborist(dbgap_project, dbgap_config) + self._determine_arborist_resource(dbgap_project, dbgap_config) if project.name is None: project.name = dbgap_project @@ -688,7 +706,12 @@ def sync_two_phsids_dict( self.auth_source[user].add(source2) def sync_to_db_and_storage_backend( - self, user_project, user_info, sess, single_visa_sync=False + self, + user_project, + user_info, + sess, + do_not_revoke_from_db_and_storage=False, + expires=None, ): """ sync user access control to database and storage backend @@ -748,7 +771,7 @@ def sync_to_db_and_storage_backend( # pass the original, non-lowered user_info dict self._upsert_userinfo(sess, user_info) - if not single_visa_sync: + if not do_not_revoke_from_db_and_storage: self._revoke_from_storage( to_delete, sess, google_bulk_mapping=google_bulk_mapping ) @@ -759,6 +782,7 @@ def sync_to_db_and_storage_backend( user_project_lowercase, sess, google_bulk_mapping=google_bulk_mapping, + expires=expires, ) self._grant_from_db( @@ -775,10 +799,11 @@ def sync_to_db_and_storage_backend( user_project_lowercase, sess, google_bulk_mapping=google_bulk_mapping, + expires=expires, ) self._update_from_db(sess, to_update, user_project_lowercase) - if not single_visa_sync: + if not do_not_revoke_from_db_and_storage: self._validate_and_update_user_admin(sess, user_info_lowercase) if config["GOOGLE_BULK_UPDATES"]: @@ -788,6 +813,74 @@ def sync_to_db_and_storage_backend( sess.commit() + def sync_to_storage_backend(self, user_project, user_info, sess, expires): + """ + sync user access control to storage backend with given expiration + + Args: + user_project (dict): a dictionary of + + { + username: { + 'project1': {'read-storage','write-storage'}, + 'project2': {'read-storage'} + } + } + + user_info (dict): a dictionary of {username: user_info{}} + sess: a sqlalchemy session + + Return: + None + """ + if not expires: + raise Exception( + f"sync to storage backend requires an expiration. you provided: {expires}" + ) + + google_bulk_mapping = None + if config["GOOGLE_BULK_UPDATES"]: + google_bulk_mapping = {} + + # TODO: eventually it'd be nice to remove this step but it's required + # so that grant_from_storage can determine what storage backends + # are needed for a project. + self._init_projects(user_project, sess) + + # we need to compare db -> whitelist case-insensitively for username. + # db stores case-sensitively, but we need to query case-insensitively + user_project_lowercase = {} + syncing_user_project_list = set() + for username, projects in user_project.items(): + user_project_lowercase[username.lower()] = projects + for project, _ in projects.items(): + syncing_user_project_list.add((username.lower(), project)) + + user_info_lowercase = { + username.lower(): info for username, info in user_info.items() + } + + to_add = set(syncing_user_project_list) + + # when updating users we want to maintain case sesitivity in the username so + # pass the original, non-lowered user_info dict + self._upsert_userinfo(sess, user_info) + + self._grant_from_storage( + to_add, + user_project_lowercase, + sess, + google_bulk_mapping=google_bulk_mapping, + expires=expires, + ) + + if config["GOOGLE_BULK_UPDATES"]: + self.logger.info("Doing bulk Google update...") + bulk_update_google_groups(google_bulk_mapping) + self.logger.info("Bulk Google update done!") + + sess.commit() + def _revoke_from_db(self, sess, to_delete): """ Revoke user access to projects in the auth database @@ -929,6 +1022,17 @@ def _upsert_userinfo(self, sess, user_info): u.phone_number = user_info[username].get("phone_number", "") u.is_admin = user_info[username].get("admin", False) + idp_name = user_info[username].get("idp_name", "") + if idp_name and not u.identity_provider: + idp = ( + sess.query(IdentityProvider) + .filter(IdentityProvider.name == idp_name) + .first() + ) + if not idp: + idp = IdentityProvider(name=idp_name) + u.identity_provider = idp + # do not update if there is no tag if not user_info[username].get("tags"): continue @@ -989,7 +1093,9 @@ def _revoke_from_storage(self, to_delete, sess, google_bulk_mapping=None): google_bulk_mapping=google_bulk_mapping, ) - def _grant_from_storage(self, to_add, user_project, sess, google_bulk_mapping=None): + def _grant_from_storage( + self, to_add, user_project, sess, google_bulk_mapping=None, expires=None + ): """ If a project have storage backend, grant user's access to buckets in the storage backend. @@ -1029,6 +1135,7 @@ def _grant_from_storage(self, to_add, user_project, sess, google_bulk_mapping=No access=access, session=sess, google_bulk_mapping=google_bulk_mapping, + expires=expires, ) def _init_projects(self, user_project, sess): @@ -1052,7 +1159,9 @@ def _init_projects(self, user_project, sess): project = self._get_or_create(sess, Project, **data) except IntegrityError as e: sess.rollback() - self.logger.error(str(e)) + self.logger.error( + f"Project {auth_id} already exists. Detail {str(e)}" + ) raise Exception( "Project {} already exists. Detail {}. Please contact your system administrator.".format( auth_id, str(e) @@ -1178,7 +1287,7 @@ def _process_dbgap_project( # need to add dbgap project to arborist if self.arborist_client: - self._add_dbgap_study_to_arborist( + self._determine_arborist_resource( element_dict["auth_id"], dbgap_config ) @@ -1401,6 +1510,11 @@ def _sync(self, sess): for u, s in self.auth_source.items(): self.logger.info("Access for user {} from {}".format(u, s)) + self.logger.info( + f"Persisting authz mapping to database: {user_yaml.project_to_resource}" + ) + user_yaml.persist_project_to_resource(db_session=sess) + def _grant_all_consents_to_c999_users( self, user_projects, user_yaml_project_to_resources ): @@ -1493,7 +1607,7 @@ def _update_arborist(self, session, user_yaml): self.logger.debug( "attempting to update arborist resource: {}".format(resource) ) - self.arborist_client.update_resource("/", resource) + self.arborist_client.update_resource("/", resource, merge=True) except ArboristError as e: self.logger.error(e) # keep going; maybe just some conflicts from things existing already @@ -1589,7 +1703,12 @@ def _update_arborist(self, session, user_yaml): return True def _update_authz_in_arborist( - self, session, user_projects, user_yaml=None, single_user_sync=False + self, + session, + user_projects, + user_yaml=None, + single_user_sync=False, + expires=None, ): """ Assign users policies in arborist from the information in @@ -1602,6 +1721,8 @@ def _update_authz_in_arborist( Args: user_projects (dict) user_yaml (UserYAML) optional, if there are policies for users in a user.yaml + single_user_sync (bool) whether authz update is for a single user + expires (int) time at which authz info in Arborist should expire Return: bool: success @@ -1653,6 +1774,31 @@ def _update_authz_in_arborist( policy_id_list = [] policies = [] + # prefer in-memory if available from user_yaml, if not, get from database + if user_yaml and user_yaml.project_to_resource: + project_to_authz_mapping = user_yaml.project_to_resource + self.logger.debug( + f"using in-memory project to authz resource mapping from " + f"user.yaml (instead of database): {project_to_authz_mapping}" + ) + else: + project_to_authz_mapping = get_project_to_authz_mapping(session) + self.logger.debug( + f"using persisted project to authz resource mapping from database " + f"(instead of user.yaml - as it may not be available): {project_to_authz_mapping}" + ) + + self.logger.debug( + f"_dbgap_study_to_resources: {self._dbgap_study_to_resources}" + ) + all_resources = [ + r + for resources in self._dbgap_study_to_resources.values() + for r in resources + ] + all_resources.extend(r for r in project_to_authz_mapping.values()) + self._create_arborist_resources(all_resources) + for username, user_project_info in user_projects.items(): self.logger.info("processing user `{}`".format(username)) user = query_for_user(session=session, username=username) @@ -1660,57 +1806,58 @@ def _update_authz_in_arborist( username = user.username self.arborist_client.create_user_if_not_exist(username) - self.arborist_client.revoke_all_policies_for_user(username) - for project, permissions in user_project_info.items(): - - # check if this is a dbgap project, if it is, we need to get the right - # resource path, otherwise just use given project as path - paths = self._dbgap_study_to_resources.get(project, [project]) - - if user_yaml: - try: - # check if project is in mapping and convert accordingly - paths = [user_yaml.project_to_resource[project]] - except KeyError: - pass - - self.logger.info( - "resource paths for project {}: {}".format(project, paths) - ) - self.logger.debug("permissions: {}".format(permissions)) - for permission in permissions: - # "permission" in the dbgap sense, not the arborist sense - if permission not in self._created_roles: - try: - self.arborist_client.create_role( - arborist_role_for_permission(permission) - ) - except ArboristError as e: - self.logger.info( - "not creating role for permission `{}`; {}".format( - permission, str(e) - ) - ) - self._created_roles.add(permission) - - for path in paths: - # If everything was created fine, grant a policy to - # this user which contains exactly just this resource, - # with this permission as a role. + if not single_user_sync: + # TODO make this smarter - it should do a diff, not revoke all and add + self.arborist_client.revoke_all_policies_for_user(username) + + # as of 2/11/2022, for single_user_sync, as RAS visa parsing has + # previously mapped each project to the same set of privileges + # (i.e.{'read', 'read-storage'}), unique_policies will just be a + # single policy with ('read', 'read-storage') being the single + # key + unique_policies = self._determine_unique_policies( + user_project_info, project_to_authz_mapping + ) - # format project '/x/y/z' -> 'x.y.z' - # so the policy id will be something like 'x.y.z-create' - policy_id = _format_policy_id(path, permission) + for roles in unique_policies.keys(): + for role in roles: + self._create_arborist_role(role) - if not single_user_sync: + if single_user_sync: + for ordered_roles, ordered_resources in unique_policies.items(): + policy_hash = self._hash_policy_contents( + ordered_roles, ordered_resources + ) + self._create_arborist_policy( + policy_hash, + ordered_roles, + ordered_resources, + skip_if_exists=True, + ) + # return here as it is not expected single_user_sync + # will need any of the remaining user_yaml operations + # left in _update_authz_in_arborist + return self._grant_arborist_policy( + username, policy_hash, expires=expires + ) + else: + for roles, resources in unique_policies.items(): + for role in roles: + for resource in resources: + # grant a policy to this user which is a single + # role on a single resource + + # format project '/x/y/z' -> 'x.y.z' + # so the policy id will be something like 'x.y.z-create' + policy_id = _format_policy_id(resource, role) if policy_id not in self._created_policies: try: self.arborist_client.update_policy( policy_id, { "description": "policy created by fence sync", - "role_ids": [permission], - "resource_paths": [path], + "role_ids": [role], + "resource_paths": [resource], }, create_if_not_exist=True, ) @@ -1721,34 +1868,18 @@ def _update_authz_in_arborist( ) ) self._created_policies.add(policy_id) - self.arborist_client.grant_user_policy(username, policy_id) - - if single_user_sync: - policy_id_list.append(policy_id) - policy_json = { - "id": policy_id, - "description": "policy created by fence sync", - "role_ids": [permission], - "resource_paths": [path], - } - policies.append(policy_json) - if single_user_sync: - try: - self.arborist_client.update_bulk_policy(policies) - self.arborist_client.grant_bulk_user_policy( - username, policy_id_list - ) - except Exception as e: - self.logger.info( - "Couldn't update bulk policy for user {}: {}".format( - username, e - ) - ) + self._grant_arborist_policy( + username, policy_id, expires=expires + ) if user_yaml: for policy in user_yaml.policies.get(username, []): - self.arborist_client.grant_user_policy(username, policy) + self.arborist_client.grant_user_policy( + username, + policy, + expires_at=expires, + ) if user_yaml: for client_name, client_details in user_yaml.clients.items(): @@ -1775,22 +1906,243 @@ def _update_authz_in_arborist( return True - def _add_dbgap_study_to_arborist(self, dbgap_study, dbgap_config): + def _determine_unique_policies(self, user_project_info, project_to_authz_mapping): """ - Return the arborist resource path after adding the specified dbgap study - to arborist. + Determine and return a dictionary of unique policies. + + Args (examples): + user_project_info (dict): + { + 'phs000002.c1': { 'read-storage', 'read' }, + 'phs000001.c1': { 'read', 'read-storage' }, + 'phs000004.c1': { 'write', 'read' }, + 'phs000003.c1': { 'read', 'write' }, + 'phs000006.c1': { 'write-storage', 'write', 'read-storage', 'read' } + 'phs000005.c1': { 'read', 'read-storage', 'write', 'write-storage' }, + } + project_to_authz_mapping (dict): + { + 'phs000001.c1': '/programs/DEV/projects/phs000001.c1' + } + + Return (for examples): + dict: + { + ('read', 'read-storage'): ('phs000001.c1', 'phs000002.c1'), + ('read', 'write'): ('phs000003.c1', 'phs000004.c1'), + ('read', 'read-storage', 'write', 'write-storage'): ('phs000005.c1', 'phs000006.c1'), + } + """ + roles_to_resources = collections.defaultdict(list) + for study, roles in user_project_info.items(): + ordered_roles = tuple(sorted(roles)) + study_authz_paths = self._dbgap_study_to_resources.get(study, [study]) + if study in project_to_authz_mapping: + study_authz_paths = [project_to_authz_mapping[study]] + roles_to_resources[ordered_roles].extend(study_authz_paths) + + policies = {} + for ordered_roles, unordered_resources in roles_to_resources.items(): + policies[ordered_roles] = tuple(sorted(unordered_resources)) + return policies + + def _create_arborist_role(self, role): + """ + Wrapper around gen3authz's create_role with additional logging Args: - dbgap_study (str): study phs identifier - dbgap_config (dict): dictionary of config for dbgap server + role (str): what the Arborist identity should be of the created role - Returns: - str: arborist resource path for study + Return: + bool: True if the role was created successfully or it already + exists. False otherwise """ - healthy = self._is_arborist_healthy() - if not healthy: + if role in self._created_roles: + return True + try: + response_json = self.arborist_client.create_role( + arborist_role_for_permission(role) + ) + except ArboristError as e: + self.logger.error( + "could not create `{}` role in Arborist: {}".format(role, e) + ) + return False + self._created_roles.add(role) + + if response_json is None: + self.logger.info("role `{}` already exists in Arborist".format(role)) + else: + self.logger.info("created role `{}` in Arborist".format(role)) + return True + + def _create_arborist_resources(self, resources): + """ + Create resources in Arborist + + Args: + resources (list): a list of full Arborist resource paths to create + [ + "/programs/DEV/projects/phs000001.c1", + "/programs/DEV/projects/phs000002.c1", + "/programs/DEV/projects/phs000003.c1" + ] + + Return: + bool: True if the resources were successfully created, False otherwise + + + As of 2/11/2022, for resources above, + utils.combine_provided_and_dbgap_resources({}, resources) returns: + [ + { 'name': 'programs', 'subresources': [ + { 'name': 'DEV', 'subresources': [ + { 'name': 'projects', 'subresources': [ + { 'name': 'phs000001.c1', 'subresources': []}, + { 'name': 'phs000002.c1', 'subresources': []}, + { 'name': 'phs000003.c1', 'subresources': []} + ]} + ]} + ]} + ] + Because this list has a single object, only a single network request gets + sent to Arborist. + + However, for resources = ["/phs000001.c1", "/phs000002.c1", "/phs000003.c1"], + utils.combine_provided_and_dbgap_resources({}, resources) returns: + [ + {'name': 'phs000001.c1', 'subresources': []}, + {'name': 'phs000002.c1', 'subresources': []}, + {'name': 'phs000003.c1', 'subresources': []} + ] + Because this list has 3 objects, 3 network requests get sent to Arborist. + + As a practical matter, for sync_single_user_visas, studies + should be nested under the `/programs` resource as in the former + example (i.e. only one network request gets made). + + TODO for the sake of simplicity, it would be nice if only one network + request was made no matter the input. + """ + for request_body in utils.combine_provided_and_dbgap_resources({}, resources): + try: + response_json = self.arborist_client.update_resource( + "/", request_body, merge=True + ) + except ArboristError as e: + self.logger.error( + "could not create Arborist resources using request body `{}`. error: {}".format( + request_body, e + ) + ) + return False + + self.logger.debug( + "created {} resource(s) in Arborist: `{}`".format(len(resources), resources) + ) + return True + + def _create_arborist_policy( + self, policy_id, roles, resources, skip_if_exists=False + ): + """ + Wrapper around gen3authz's create_policy with additional logging + + Args: + policy_id (str): what the Arborist identity should be of the created policy + roles (iterable): what roles the create policy should have + resources (iterable): what resources the created policy should have + skip_if_exists (bool): if True, this function will not treat an already + existent policy as an error + + Return: + bool: True if policy creation was successful. False otherwise + """ + try: + response_json = self.arborist_client.create_policy( + { + "id": policy_id, + "role_ids": roles, + "resource_paths": resources, + }, + skip_if_exists=skip_if_exists, + ) + except ArboristError as e: + self.logger.error( + "could not create policy `{}` in Arborist: {}".format(policy_id, e) + ) + return False + + if response_json is None: + self.logger.info("policy `{}` already exists in Arborist".format(policy_id)) + else: + self.logger.info("created policy `{}` in Arborist".format(policy_id)) + return True + + def _hash_policy_contents(self, ordered_roles, ordered_resources): + """ + Generate a sha256 hexdigest representing ordered_roles and ordered_resources. + + Args: + ordered_roles (iterable): policy roles in sorted order + ordered_resources (iterable): policy resources in sorted order + + Return: + str: SHA256 hex digest + """ + + def escape(s): + return s.replace(",", "\,") + + canonical_roles = ",".join(escape(r) for r in ordered_roles) + canonical_resources = ",".join(escape(r) for r in ordered_resources) + canonical_policy = f"{canonical_roles},,f{canonical_resources}" + policy_hash = hashlib.sha256(canonical_policy.encode("utf-8")).hexdigest() + + return policy_hash + + def _grant_arborist_policy(self, username, policy_id, expires=None): + """ + Wrapper around gen3authz's grant_user_policy with additional logging + + Args: + username (str): username of user in Arborist who policy should be + granted to + policy_id (str): Arborist policy id + expires (int): POSIX timestamp for when policy should expire + + Return: + bool: True if granting of policy was successful, False otherwise + """ + try: + response_json = self.arborist_client.grant_user_policy( + username, + policy_id, + expires_at=expires, + ) + except ArboristError as e: + self.logger.error( + "could not grant policy `{}` to user `{}`: {}".format( + policy_id, username, e + ) + ) return False + self.logger.debug( + "granted policy `{}` to user `{}`".format(policy_id, username) + ) + return True + + def _determine_arborist_resource(self, dbgap_study, dbgap_config): + """ + Determine the arborist resource path and add it to + _self._dbgap_study_to_resources + + Args: + dbgap_study (str): study phs identifier + dbgap_config (dict): dictionary of config for dbgap server + + """ default_namespaces = dbgap_config.get("study_to_resource_namespaces", {}).get( "_default", ["/"] ) @@ -1804,40 +2156,12 @@ def _add_dbgap_study_to_arborist(self, dbgap_study, dbgap_config): namespace.rstrip("/") + "/programs/" for namespace in namespaces ] - try: - for resource_namespace in arborist_resource_namespaces: - # The update_resource function creates a put request which will overwrite - # existing resources. Therefore, only create if get_resource returns - # the resource doesn't exist. - full_resource_path = resource_namespace + dbgap_study - if not self.arborist_client.get_resource(full_resource_path): - response = self.arborist_client.update_resource( - resource_namespace, - {"name": dbgap_study, "description": "synced from dbGaP"}, - create_parents=True, - ) - self.logger.info( - "added arborist resource under parent path: {} for dbgap project {}.".format( - resource_namespace, dbgap_study - ) - ) - self.logger.debug("Arborist response: {}".format(response)) - else: - self.logger.debug( - "Arborist resource already exists: {}".format( - full_resource_path - ) - ) - - if dbgap_study not in self._dbgap_study_to_resources: - self._dbgap_study_to_resources[dbgap_study] = [] - - self._dbgap_study_to_resources[dbgap_study].append(full_resource_path) - - return arborist_resource_namespaces - except ArboristError as e: - self.logger.error(e) - # keep going; maybe just some conflicts from things existing already + for resource_namespace in arborist_resource_namespaces: + full_resource_path = resource_namespace + dbgap_study + if dbgap_study not in self._dbgap_study_to_resources: + self._dbgap_study_to_resources[dbgap_study] = [] + self._dbgap_study_to_resources[dbgap_study].append(full_resource_path) + return arborist_resource_namespaces def _is_arborist_healthy(self): if not self.arborist_client: @@ -1917,7 +2241,6 @@ def parse_user_visas(self, db_session): encoded_visa, visa.expires, self.parse_consent_code, - db_session, ) projects = {**projects, **project} if projects: @@ -1927,186 +2250,25 @@ def parse_user_visas(self, db_session): return (user_projects, user_info) - def _sync_visas(self, sess): - - self.logger.info("Running usersync with Visas") - self.logger.info( - "Fallback to telemetry files: {}".format(self.fallback_to_dbgap_sftp) - ) - - self.ras_sync_client = RASVisa(logger=self.logger) - - dbgap_config = self.dbGaP[0] - user_projects, user_info = self.parse_user_visas(sess) - enable_common_exchange_area_access = dbgap_config.get( - "enable_common_exchange_area_access", False - ) - study_common_exchange_areas = dbgap_config.get( - "study_common_exchange_areas", {} - ) - - try: - user_yaml = UserYAML.from_file( - self.sync_from_local_yaml_file, encrypted=False, logger=self.logger - ) - except (EnvironmentError, AssertionError) as e: - self.logger.error(str(e)) - self.logger.error("aborting early") - return - - # parse projects - user_projects = self.parse_projects(user_projects) - user_yaml.projects = self.parse_projects(user_yaml.projects) - - if self.fallback_to_dbgap_sftp: - # Collect user_info and user_projects from telemetry - user_projects_telemetry = {} - user_info_telemetry = {} - if self.is_sync_from_dbgap_server: - self.logger.debug( - "Pulling telemetry files from {} dbgap sftp servers".format( - len(self.dbGaP) - ) - ) - ( - user_projects_telemetry, - user_info_telemetry, - ) = self._merge_multiple_dbgap_sftp(self.dbGaP, sess) - local_csv_file_list = [] - if self.sync_from_local_csv_dir: - local_csv_file_list = glob.glob( - os.path.join(self.sync_from_local_csv_dir, "*") - ) - - # if syncing from local csv dir dbgap configurations - # come from the first dbgap instance in the fence config file - user_projects_csv, user_info_csv = self._get_user_permissions_from_csv_list( - local_csv_file_list, - encrypted=False, - session=sess, - dbgap_config=self.dbGaP[0], - ) - user_projects_csv = self.parse_projects(user_projects_csv) - user_projects_telemetry = self.parse_projects(user_projects_telemetry) - - # merge all user info dicts into "user_info". - # the user info (such as email) in the user.yaml files - # overrides the user info from the CSV files. - self.sync_two_user_info_dict(user_info_csv, user_info_telemetry) - - # merge all access info dicts into "user_projects". - # the access info is combined - if the user.yaml access is - # ["read"] and the CSV file access is ["read-storage"], the - # resulting access is ["read", "read-storage"]. - self.sync_two_phsids_dict( - user_projects_csv, - user_projects_telemetry, - source1="local_csv", - source2="dbgap", - ) - - # sync phsids so that this adds projects if visas were invalid or adds users that dont have visas. - # `phsids2_overrides_phsids1=True` because We want visa to be the source of truth when its available and not merge any telemetry file info into this. - # We only want visa to be used when visa is not valid or available - self.sync_two_phsids_dict( - user_projects_telemetry, - user_projects, - source1="dbgap", - source2="visa", - phsids2_overrides_phsids1=False, - ) - self.sync_two_user_info_dict(user_info_telemetry, user_info) - - if self.parse_consent_code and enable_common_exchange_area_access: - self.logger.info( - f"using study to common exchange area mapping: {study_common_exchange_areas}" - ) - - # merge all user info dicts into "user_info". - # the user info (such as email) in the user.yaml files - # overrides the user info from the CSV files. - self.sync_two_user_info_dict(user_yaml.user_info, user_info) - - # merge all access info dicts into "user_projects". - # the access info is combined - if the user.yaml access is - # ["read"] and the CSV file access is ["read-storage"], the - # resulting access is ["read", "read-storage"]. - self.sync_two_phsids_dict( - user_yaml.projects, user_projects, source1="user_yaml", source2="visa" - ) - - self._process_user_projects( - user_projects, - enable_common_exchange_area_access, - study_common_exchange_areas, - dbgap_config, - sess, - ) - - # Note: if there are multiple dbgap sftp servers configured - # this parameter is always from the config for the first dbgap sftp server - # not any additional ones - if self.parse_consent_code: - self._grant_all_consents_to_c999_users( - user_projects, user_yaml.project_to_resource - ) - # update fence db - if user_projects: - self.logger.info("Sync to db and storage backend") - self.sync_to_db_and_storage_backend(user_projects, user_info, sess) - else: - self.logger.info("No users for syncing") - - # update the Arborist DB (resources, roles, policies, groups) - if user_yaml.authz: - if not self.arborist_client: - raise EnvironmentError( - "yaml file contains authz section but sync is not configured with" - " arborist client--did you run sync with --arborist arg?" - ) - self.logger.info("Synchronizing arborist...") - success = self._update_arborist(sess, user_yaml) - if success: - self.logger.info("Finished synchronizing arborist") - else: - self.logger.error("Could not synchronize successfully") - exit(1) - else: - self.logger.info("No `authz` section; skipping arborist sync") - - # update arborist db (user access) - if self.arborist_client: - self.logger.info("Synchronizing arborist with authorization info...") - success = self._update_authz_in_arborist(sess, user_projects, user_yaml) - if success: - self.logger.info( - "Finished synchronizing authorization info to arborist" - ) - else: - self.logger.error( - "Could not synchronize authorization info successfully to arborist" - ) - exit(1) - else: - self.logger.error("No arborist client set; skipping arborist sync") + def sync_single_user_visas(self, user, ga4gh_visas, sess=None, expires=None): + """ + Sync a single user's visas during login or DRS/data access - # Logging authz source - for u, s in self.auth_source.items(): - self.logger.info("Access for user {} from {}".format(u, s)) + IMPORTANT NOTE: THIS DOES NOT VALIDATE THE VISA. ENSURE THIS IS DONE + BEFORE THIS. - def sync_visas(self): - if self.session: - self._sync_visas(self.session) - else: - with self.driver.session as s: - self._sync_visas(s) - # if returns with some failure use telemetry file + Args: + user (userdatamodel.user.User): Fence user whose visas' + authz info is being synced + ga4gh_visas (list): a list of fence.models.GA4GHVisaV1 objects + that are ALREADY VALIDATED + sess (sqlalchemy.orm.session.Session): database session + expires (int): time at which synced Arborist policies and + inclusion in any GBAG are set to expire - def sync_single_user_visas(self, user, sess=None): - """ - Sync a single user's visa during login + Return: + list of successfully parsed visas """ - self.ras_sync_client = RASVisa(logger=self.logger) dbgap_config = self.dbGaP[0] enable_common_exchange_area_access = dbgap_config.get( @@ -2129,19 +2291,29 @@ def sync_single_user_visas(self, user, sess=None): user_info = dict() projects = {} info = {} + parsed_visas = [] - for visa in user.ga4gh_visas_v1: + for visa in ga4gh_visas: project = {} visa_type = self._pick_sync_type(visa) encoded_visa = visa.ga4gh_visa - project, info = visa_type._parse_single_visa( - user, - encoded_visa, - visa.expires, - self.parse_consent_code, - sess, - ) + + try: + project, info = visa_type._parse_single_visa( + user, + encoded_visa, + visa.expires, + self.parse_consent_code, + ) + except Exception: + self.logger.warning( + f"ignoring unsuccessfully parsed or expired visa: {encoded_visa}" + ) + continue + projects = {**projects, **project} + parsed_visas.append(visa) + user_projects[user.username] = projects user_info[user.username] = info @@ -2165,11 +2337,10 @@ def sync_single_user_visas(self, user, sess=None): user_projects, user_yaml.project_to_resource ) - # update fence db if user_projects: - self.logger.info("Sync to db and storage backend") - self.sync_to_db_and_storage_backend( - user_projects, user_info, sess, single_visa_sync=True + self.logger.info("Sync to storage backend [sync_single_user_visas]") + self.sync_to_storage_backend( + user_projects, user_info, sess, expires=expires ) else: self.logger.info("No users for syncing") @@ -2178,7 +2349,11 @@ def sync_single_user_visas(self, user, sess=None): if self.arborist_client: self.logger.info("Synchronizing arborist with authorization info...") success = self._update_authz_in_arborist( - sess, user_projects, user_yaml=user_yaml, single_user_sync=True + sess, + user_projects, + user_yaml=user_yaml, + single_user_sync=True, + expires=expires, ) if success: self.logger.info( @@ -2190,3 +2365,5 @@ def sync_single_user_visas(self, user, sess=None): ) else: self.logger.error("No arborist client set; skipping arborist sync") + + return parsed_visas diff --git a/fence/utils.py b/fence/utils.py index 8dc949f14..e510a0f74 100644 --- a/fence/utils.py +++ b/fence/utils.py @@ -246,8 +246,8 @@ def send_email(from_email, to_emails, subject, text, smtp_domain): "smtp_hostname": "smtp.mailgun.org", "default_login": "postmaster@mailgun.planx-pla.net", "api_url": "https://api.mailgun.net/v3/mailgun.planx-pla.net", - "smtp_password": "password", - "api_key": "api key" + "smtp_password": "password", # pragma: allowlist secret + "api_key": "api key" # pragma: allowlist secret } Returns: @@ -364,6 +364,35 @@ def _is_status(code): return False +def get_from_cache(item_id, memory_cache, db_cache_table, db_cache_table_id_field="id"): + """ + Attempt to get a cached item and store in memory cache from db if necessary. + + NOTE: This requires custom implementation for putting items in the db cache table. + """ + # try to retrieve from local in-memory cache + rv, expires_at = memory_cache.get(item_id, (None, 0)) + if expires_at > expiry: + return rv + + # try to retrieve from database cache + if hasattr(flask.current_app, "db"): # we don't have db in startup + with flask.current_app.db.session as session: + cache = ( + session.query(db_cache_table) + .filter( + getattr(db_cache_table, db_cache_table_id_field, None) == item_id + ) + .first() + ) + if cache and cache.expires_at and cache.expires_at > expiry: + rv = dict(cache) + + # store in memory cache + memory_cache[item_id] = rv, cache.expires_at + return rv + + # Default settings to control usage of backoff library. DEFAULT_BACKOFF_SETTINGS = { "on_backoff": log_backoff_retry, diff --git a/openapis/swagger.yaml b/openapis/swagger.yaml index a76b0a228..34164c508 100644 --- a/openapis/swagger.yaml +++ b/openapis/swagger.yaml @@ -461,7 +461,7 @@ paths: required: false in: query description: >- - if `no_force_sign=True` and the file requested is actually public, this will + if `no_force_sign=True`, this will request to *not* sign the resulting URL (i.e. just provide the public url without using anonymous signing creds). schema: diff --git a/poetry.lock b/poetry.lock index 0266fde37..ecb7dd882 100644 --- a/poetry.lock +++ b/poetry.lock @@ -61,14 +61,14 @@ requests = "*" [[package]] name = "authutils" -version = "6.1.2" +version = "6.1.0" description = "Gen3 auth utility functions" category = "main" optional = false python-versions = ">=3.6,<4.0" [package.dependencies] -authlib = "0.11.0" +authlib = ">=0.11,<1.0" cached-property = ">=1.4,<2.0" cdiserrors = "<2.0.0" httpx = ">=0.12.1,<1.0.0" @@ -81,11 +81,11 @@ fastapi = ["fastapi (>=0.54.1,<0.55.0)"] [[package]] name = "azure-core" -version = "1.21.1" +version = "1.22.1" description = "Microsoft Azure Core Library for Python" category = "main" optional = false -python-versions = "*" +python-versions = ">=3.6" [package.dependencies] requests = ">=2.18.4" @@ -263,7 +263,7 @@ pycparser = "*" [[package]] name = "charset-normalizer" -version = "2.0.11" +version = "2.0.12" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." category = "main" optional = false @@ -514,7 +514,7 @@ python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" [[package]] name = "gen3authz" -version = "1.4.2" +version = "1.5.1" description = "Gen3 authz client" category = "main" optional = false @@ -687,7 +687,7 @@ testing = ["pytest"] [[package]] name = "google-resumable-media" -version = "2.1.0" +version = "2.2.1" description = "Utilities for Google Media Downloads and Resumable Uploads" category = "main" optional = false @@ -739,7 +739,7 @@ http2 = ["h2 (>=3,<5)"] [[package]] name = "httplib2" -version = "0.20.2" +version = "0.20.4" description = "A comprehensive HTTP client library." category = "main" optional = false @@ -1107,7 +1107,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" [[package]] name = "pycryptodome" -version = "3.14.0" +version = "3.14.1" description = "Cryptographic library for Python" category = "main" optional = false @@ -1416,7 +1416,7 @@ resolved_reference = "4d39265d6e478acd5e1afe6e5dc722418f887d78" [[package]] name = "typing-extensions" -version = "4.0.1" +version = "4.1.1" description = "Backported and Experimental Type Hints for Python 3.6+" category = "main" optional = false @@ -1504,7 +1504,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes [metadata] lock-version = "1.1" python-versions = "^3.6" -content-hash = "f6027931a95b41b5bdeec9c1420108387eadddd99056bfac007cc47ff0c45efe" +content-hash = "5c0ae6cc529d940e7f68498a5849ab702ab629c4a67a75b7043f40fed8121976" [metadata.files] addict = [ @@ -1532,12 +1532,12 @@ authlib = [ {file = "Authlib-0.11.tar.gz", hash = "sha256:9741db6de2950a0a5cefbdb72ec7ab12f7e9fd530ff47219f1530e79183cbaaf"}, ] authutils = [ - {file = "authutils-6.1.2-py3-none-any.whl", hash = "sha256:5e45b7098a40ee9650326d3f9488f867a538d53d1e03304b59634d5e77a3a258"}, - {file = "authutils-6.1.2.tar.gz", hash = "sha256:b029daffcc8d1bca481e7ba0528c8982d05c8b8dc7eee72831d37ddc08a36842"}, + {file = "authutils-6.1.0-py3-none-any.whl", hash = "sha256:682dba636694c36fb35af1d9ff576bb8436337c3899f0ef434cda5918d661db9"}, + {file = "authutils-6.1.0.tar.gz", hash = "sha256:7263af0b2ce3a0db19236fd123b34f795d07e07111b7bd18a51808568ddfdc2e"}, ] azure-core = [ - {file = "azure-core-1.21.1.zip", hash = "sha256:88d2db5cf9a135a7287dc45fdde6b96f9ca62c9567512a3bb3e20e322ce7deb2"}, - {file = "azure_core-1.21.1-py2.py3-none-any.whl", hash = "sha256:3d70e9ec64de92dfae330c15bc69085caceb2d83813ef6c01cc45326f2a4be83"}, + {file = "azure-core-1.22.1.zip", hash = "sha256:4b6e405268a33b873107796495cec3f2f1b1ffe935624ce0fbddff36d38d3a4d"}, + {file = "azure_core-1.22.1-py3-none-any.whl", hash = "sha256:407381c74e2ccc16adb1f29c4a1b381ebd39e8661bbf60422926d8252d5b757d"}, ] azure-storage-blob = [ {file = "azure-storage-blob-12.9.0.zip", hash = "sha256:cff66a115c73c90e496c8c8b3026898a3ce64100840276e9245434e28a864225"}, @@ -1548,13 +1548,10 @@ backoff = [ {file = "backoff-1.11.1.tar.gz", hash = "sha256:ccb962a2378418c667b3c979b504fdeb7d9e0d29c0579e3b13b86467177728cb"}, ] bcrypt = [ - {file = "bcrypt-3.2.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:b589229207630484aefe5899122fb938a5b017b0f4349f769b8c13e78d99a8fd"}, {file = "bcrypt-3.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c95d4cbebffafcdd28bd28bb4e25b31c50f6da605c81ffd9ad8a3d1b2ab7b1b6"}, {file = "bcrypt-3.2.0-cp36-abi3-manylinux1_x86_64.whl", hash = "sha256:63d4e3ff96188e5898779b6057878fecf3f11cfe6ec3b313ea09955d587ec7a7"}, {file = "bcrypt-3.2.0-cp36-abi3-manylinux2010_x86_64.whl", hash = "sha256:cd1ea2ff3038509ea95f687256c46b79f5fc382ad0aa3664d200047546d511d1"}, {file = "bcrypt-3.2.0-cp36-abi3-manylinux2014_aarch64.whl", hash = "sha256:cdcdcb3972027f83fe24a48b1e90ea4b584d35f1cc279d76de6fc4b13376239d"}, - {file = "bcrypt-3.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a0584a92329210fcd75eb8a3250c5a941633f8bfaf2a18f81009b097732839b7"}, - {file = "bcrypt-3.2.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:56e5da069a76470679f312a7d3d23deb3ac4519991a0361abc11da837087b61d"}, {file = "bcrypt-3.2.0-cp36-abi3-win32.whl", hash = "sha256:a67fb841b35c28a59cebed05fbd3e80eea26e6d75851f0574a9273c80f3e9b55"}, {file = "bcrypt-3.2.0-cp36-abi3-win_amd64.whl", hash = "sha256:81fec756feff5b6818ea7ab031205e1d323d8943d237303baca2c5f9c7846f34"}, {file = "bcrypt-3.2.0.tar.gz", hash = "sha256:5b93c1726e50a93a033c36e5ca7fdcd29a5c7395af50a6892f5d9e7c6cfbfb29"}, @@ -1651,8 +1648,8 @@ cffi = [ {file = "cffi-1.15.0.tar.gz", hash = "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954"}, ] charset-normalizer = [ - {file = "charset-normalizer-2.0.11.tar.gz", hash = "sha256:98398a9d69ee80548c762ba991a4728bfc3836768ed226b3945908d1a688371c"}, - {file = "charset_normalizer-2.0.11-py3-none-any.whl", hash = "sha256:2842d8f5e82a1f6aa437380934d5e1cd4fcf2003b06fed6940769c164a480a45"}, + {file = "charset-normalizer-2.0.12.tar.gz", hash = "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597"}, + {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"}, ] click = [ {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"}, @@ -1797,8 +1794,8 @@ future = [ {file = "future-0.18.2.tar.gz", hash = "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d"}, ] gen3authz = [ - {file = "gen3authz-1.4.2-py3-none-any.whl", hash = "sha256:c58e773fd40a6343797704e22c183ccc64c0c96f358c5cdc0a6145ef2f9e9336"}, - {file = "gen3authz-1.4.2.tar.gz", hash = "sha256:c641fe1f7b3f5d6c5f07f851c1f05486c86831fb73ee3bf134c39f0483207fdc"}, + {file = "gen3authz-1.5.1-py3-none-any.whl", hash = "sha256:249ab21471e1ae3c283a30f3e32819db218aaa63d8d7151ac7b6b83496ac3518"}, + {file = "gen3authz-1.5.1.tar.gz", hash = "sha256:897f804a2f9d29181a6074fa14335fc9f18e757dd2e5dcec2a68bb9a008bfcb8"}, ] gen3cirrus = [ {file = "gen3cirrus-2.0.0.tar.gz", hash = "sha256:0bd590c407c42dad5f0b896da0fa30bd01ea6bef5ff7dd11324ec59f14a71793"}, @@ -1879,8 +1876,8 @@ google-crc32c = [ {file = "google_crc32c-1.3.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:7f6fe42536d9dcd3e2ffb9d3053f5d05221ae3bbcefbe472bdf2c71c793e3183"}, ] google-resumable-media = [ - {file = "google-resumable-media-2.1.0.tar.gz", hash = "sha256:725b989e0dd387ef2703d1cc8e86217474217f4549593c477fd94f4024a0f911"}, - {file = "google_resumable_media-2.1.0-py2.py3-none-any.whl", hash = "sha256:cdc75ea0361e39704dc7df7da59fbd419e73c8bc92eac94d8a020d36baa9944b"}, + {file = "google-resumable-media-2.2.1.tar.gz", hash = "sha256:b1edfb98867c9fa25aa7af12d6468665b83c532b7349effab805a027ea8bbee5"}, + {file = "google_resumable_media-2.2.1-py2.py3-none-any.whl", hash = "sha256:fd616af31b83d48da040c8c09b6994606e1734efb8af9acc97cf5d6070e9ba72"}, ] googleapis-common-protos = [ {file = "googleapis-common-protos-1.54.0.tar.gz", hash = "sha256:a4031d6ec6c2b1b6dc3e0be7e10a1bd72fb0b18b07ef9be7b51f2c1004ce2437"}, @@ -1895,8 +1892,8 @@ httpcore = [ {file = "httpcore-0.13.3.tar.gz", hash = "sha256:5d674b57a11275904d4fd0819ca02f960c538e4472533620f322fc7db1ea0edc"}, ] httplib2 = [ - {file = "httplib2-0.20.2-py3-none-any.whl", hash = "sha256:6b937120e7d786482881b44b8eec230c1ee1c5c1d06bce8cc865f25abbbf713b"}, - {file = "httplib2-0.20.2.tar.gz", hash = "sha256:e404681d2fbcec7506bcb52c503f2b021e95bee0ef7d01e5c221468a2406d8dc"}, + {file = "httplib2-0.20.4-py3-none-any.whl", hash = "sha256:8b6a905cb1c79eefd03f8669fd993c36dc341f7c558f056cb5a33b5c2f458543"}, + {file = "httplib2-0.20.4.tar.gz", hash = "sha256:58a98e45b4b1a48273073f905d2961666ecf0fbac4250ea5b47aef259eb5c585"}, ] httpx = [ {file = "httpx-0.20.0-py3-none-any.whl", hash = "sha256:33af5aad9bdc82ef1fc89219c1e36f5693bf9cd0ebe330884df563445682c0f8"}, @@ -2140,36 +2137,36 @@ pycparser = [ {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, ] pycryptodome = [ - {file = "pycryptodome-3.14.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:bd800856e6dea6924504795ae4ec0d822e912e0a9a215e73b77b585c4d15a0f7"}, - {file = "pycryptodome-3.14.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:625f78ad69aa3c45e19b85b9e9cae3a30aa4a1de6b908981a63426b88e860489"}, - {file = "pycryptodome-3.14.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:a1c116dd7a00aac631f67920912fd8ef7a5ad3402cd4d497c6f5cc6b8115747b"}, - {file = "pycryptodome-3.14.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:0d0b6cca6b707b2c7cd4177c2d3cd950efa959ed8f01c30e676f102c68156f00"}, - {file = "pycryptodome-3.14.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:9d939a257117cc8c6840ad69f149b3ca5e07268cfe0429bd9feec0f91da2343d"}, - {file = "pycryptodome-3.14.0-cp27-cp27m-manylinux2014_aarch64.whl", hash = "sha256:41dbb8c2129d43f34ed555cbd365d5e8f023ef0f9238fd9cd0302086b15a38b3"}, - {file = "pycryptodome-3.14.0-cp27-cp27m-win32.whl", hash = "sha256:9b454af09914807cef1222d100a8c523737a160347cb8d699facc4bdfb9fe725"}, - {file = "pycryptodome-3.14.0-cp27-cp27m-win_amd64.whl", hash = "sha256:95bac6e55411650933f3b615e57bf0966bf08f3ce07c01f07482ced95f18cbec"}, - {file = "pycryptodome-3.14.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:0ffbca43c1788243421a8583d85acb59f4cd0b82b001c485fdc3fbfd8fd0804f"}, - {file = "pycryptodome-3.14.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:69b85d78f7db628370d2cc87f1c41a449f6460896ba95f412173618f75027c2c"}, - {file = "pycryptodome-3.14.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:bba348d2823315ab8ebe44f0b2fc2ff8dfac8de881713a08def3dadcfc8e92bb"}, - {file = "pycryptodome-3.14.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:7d667daa851b1f9a20f2b5cad3cff13fba5204bc2f857d12f27c25b178d8629b"}, - {file = "pycryptodome-3.14.0-cp27-cp27mu-manylinux2014_aarch64.whl", hash = "sha256:74918d5de06b12fef2255135bede41307a5f7b929b145ad867111525aea075dc"}, - {file = "pycryptodome-3.14.0-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c2b6faabd09d2876f9050f8af5d78046d81fe856f99e801c2ddab85b59602007"}, - {file = "pycryptodome-3.14.0-cp35-abi3-manylinux1_i686.whl", hash = "sha256:22a8629315c76d2bec57bc4fd67eb7e01664c3e3b9579df40f530ee5821db1de"}, - {file = "pycryptodome-3.14.0-cp35-abi3-manylinux1_x86_64.whl", hash = "sha256:7e3851e4fbbab72d9b30f98a504f450cc61e497e8e4b0be8205dc198703eee4d"}, - {file = "pycryptodome-3.14.0-cp35-abi3-manylinux2010_i686.whl", hash = "sha256:9006f17944efaacc3be364c01c2253c00a00f0b5fa5a1a85a1191efd861e764d"}, - {file = "pycryptodome-3.14.0-cp35-abi3-manylinux2010_x86_64.whl", hash = "sha256:8f0da308fca149b4c4da78e1388f82d8dd167e0ce12992a44f81b506cede3109"}, - {file = "pycryptodome-3.14.0-cp35-abi3-manylinux2014_aarch64.whl", hash = "sha256:d186e34747985fbd94df7ed4d621f8377165053a06872314c2a594af34741655"}, - {file = "pycryptodome-3.14.0-cp35-abi3-win32.whl", hash = "sha256:2ed4da8f8afe44895c1f49ae1141a55b15d81dc745b5baa7b7a7265d7b40b81e"}, - {file = "pycryptodome-3.14.0-cp35-abi3-win_amd64.whl", hash = "sha256:11167a1f892283e5320feb5e81589fd041a1822b94c047820f00bc03eb98a9f7"}, - {file = "pycryptodome-3.14.0-pp27-pypy_73-macosx_10_9_x86_64.whl", hash = "sha256:1714ea5f83bcff25e8ae4640e22359d7a0815157a29d9f4eebc2b9e975a3cda0"}, - {file = "pycryptodome-3.14.0-pp27-pypy_73-manylinux1_x86_64.whl", hash = "sha256:3a011b9fe674bd21056613e88a3e660c56f1b47263138ebf420aa3ee4b8b0107"}, - {file = "pycryptodome-3.14.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:3fd50e3682ac3a684ace5b90ba1aef8090a78eeadf38c1ec385aad3a599cfd68"}, - {file = "pycryptodome-3.14.0-pp27-pypy_73-win32.whl", hash = "sha256:08be50d4195edd595df580077bbeec5599d0e5aa0cc468083178ae870e0b29f4"}, - {file = "pycryptodome-3.14.0-pp36-pypy36_pp73-macosx_10_9_x86_64.whl", hash = "sha256:16c171dd969c9046b7b304c6ba0c643624dcf18093a66bd30b8b091703f177a2"}, - {file = "pycryptodome-3.14.0-pp36-pypy36_pp73-manylinux1_x86_64.whl", hash = "sha256:89bb56cfd1fb74663842710bc41a6be26dafceb60eb8d432536891aea08a3740"}, - {file = "pycryptodome-3.14.0-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:c30a98c8718ae93d44680a7038adb484a520319860747ba43b6cd0a20f6b5984"}, - {file = "pycryptodome-3.14.0-pp36-pypy36_pp73-win32.whl", hash = "sha256:e972f566ef7b821c8b958dab64174afa072f8271b779e32444ad7c127b0a84b2"}, - {file = "pycryptodome-3.14.0.tar.gz", hash = "sha256:ceea92a4b8ba6c50d8d70f2efbb4ea14b002dac4160ce4dda33f1b7442f8158a"}, + {file = "pycryptodome-3.14.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:75a3a364fee153e77ed889c957f6f94ec6d234b82e7195b117180dcc9fc16f96"}, + {file = "pycryptodome-3.14.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:aae395f79fa549fb1f6e3dc85cf277f0351e15a22e6547250056c7f0c990d6a5"}, + {file = "pycryptodome-3.14.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:f403a3e297a59d94121cb3ee4b1cf41f844332940a62d71f9e4a009cc3533493"}, + {file = "pycryptodome-3.14.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ce7a875694cd6ccd8682017a7c06c6483600f151d8916f2b25cf7a439e600263"}, + {file = "pycryptodome-3.14.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:a36ab51674b014ba03da7f98b675fcb8eabd709a2d8e18219f784aba2db73b72"}, + {file = "pycryptodome-3.14.1-cp27-cp27m-manylinux2014_aarch64.whl", hash = "sha256:50a5346af703330944bea503106cd50c9c2212174cfcb9939db4deb5305a8367"}, + {file = "pycryptodome-3.14.1-cp27-cp27m-win32.whl", hash = "sha256:36e3242c4792e54ed906c53f5d840712793dc68b726ec6baefd8d978c5282d30"}, + {file = "pycryptodome-3.14.1-cp27-cp27m-win_amd64.whl", hash = "sha256:c880a98376939165b7dc504559f60abe234b99e294523a273847f9e7756f4132"}, + {file = "pycryptodome-3.14.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:dcd65355acba9a1d0fc9b923875da35ed50506e339b35436277703d7ace3e222"}, + {file = "pycryptodome-3.14.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:766a8e9832128c70012e0c2b263049506cbf334fb21ff7224e2704102b6ef59e"}, + {file = "pycryptodome-3.14.1-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:2562de213960693b6d657098505fd4493c45f3429304da67efcbeb61f0edfe89"}, + {file = "pycryptodome-3.14.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d1b7739b68a032ad14c5e51f7e4e1a5f92f3628bba024a2bda1f30c481fc85d8"}, + {file = "pycryptodome-3.14.1-cp27-cp27mu-manylinux2014_aarch64.whl", hash = "sha256:27e92c1293afcb8d2639baf7eb43f4baada86e4de0f1fb22312bfc989b95dae2"}, + {file = "pycryptodome-3.14.1-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:f2772af1c3ef8025c85335f8b828d0193fa1e43256621f613280e2c81bfad423"}, + {file = "pycryptodome-3.14.1-cp35-abi3-manylinux1_i686.whl", hash = "sha256:9ec761a35dbac4a99dcbc5cd557e6e57432ddf3e17af8c3c86b44af9da0189c0"}, + {file = "pycryptodome-3.14.1-cp35-abi3-manylinux1_x86_64.whl", hash = "sha256:e64738207a02a83590df35f59d708bf1e7ea0d6adce712a777be2967e5f7043c"}, + {file = "pycryptodome-3.14.1-cp35-abi3-manylinux2010_i686.whl", hash = "sha256:e24d4ec4b029611359566c52f31af45c5aecde7ef90bf8f31620fd44c438efe7"}, + {file = "pycryptodome-3.14.1-cp35-abi3-manylinux2010_x86_64.whl", hash = "sha256:8b5c28058102e2974b9868d72ae5144128485d466ba8739abd674b77971454cc"}, + {file = "pycryptodome-3.14.1-cp35-abi3-manylinux2014_aarch64.whl", hash = "sha256:924b6aad5386fb54f2645f22658cb0398b1f25bc1e714a6d1522c75d527deaa5"}, + {file = "pycryptodome-3.14.1-cp35-abi3-win32.whl", hash = "sha256:53dedbd2a6a0b02924718b520a723e88bcf22e37076191eb9b91b79934fb2192"}, + {file = "pycryptodome-3.14.1-cp35-abi3-win_amd64.whl", hash = "sha256:ea56a35fd0d13121417d39a83f291017551fa2c62d6daa6b04af6ece7ed30d84"}, + {file = "pycryptodome-3.14.1-pp27-pypy_73-macosx_10_9_x86_64.whl", hash = "sha256:028dcbf62d128b4335b61c9fbb7dd8c376594db607ef36d5721ee659719935d5"}, + {file = "pycryptodome-3.14.1-pp27-pypy_73-manylinux1_x86_64.whl", hash = "sha256:69f05aaa90c99ac2f2af72d8d7f185f729721ad7c4be89e9e3d0ab101b0ee875"}, + {file = "pycryptodome-3.14.1-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:12ef157eb1e01a157ca43eda275fa68f8db0dd2792bc4fe00479ab8f0e6ae075"}, + {file = "pycryptodome-3.14.1-pp27-pypy_73-win32.whl", hash = "sha256:f572a3ff7b6029dd9b904d6be4e0ce9e309dcb847b03e3ac8698d9d23bb36525"}, + {file = "pycryptodome-3.14.1-pp36-pypy36_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9924248d6920b59c260adcae3ee231cd5af404ac706ad30aa4cd87051bf09c50"}, + {file = "pycryptodome-3.14.1-pp36-pypy36_pp73-manylinux1_x86_64.whl", hash = "sha256:e0c04c41e9ade19fbc0eff6aacea40b831bfcb2c91c266137bcdfd0d7b2f33ba"}, + {file = "pycryptodome-3.14.1-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:893f32210de74b9f8ac869ed66c97d04e7d351182d6d39ebd3b36d3db8bda65d"}, + {file = "pycryptodome-3.14.1-pp36-pypy36_pp73-win32.whl", hash = "sha256:7fb90a5000cc9c9ff34b4d99f7f039e9c3477700e309ff234eafca7b7471afc0"}, + {file = "pycryptodome-3.14.1.tar.gz", hash = "sha256:e04e40a7f8c1669195536a37979dd87da2c32dbdc73d6fe35f0077b0c17c803b"}, ] pyjwt = [ {file = "PyJWT-1.7.1-py2.py3-none-any.whl", hash = "sha256:5c6eca3c2940464d106b99ba83b00c6add741c9becaec087fb7ccdefea71350e"}, @@ -2320,8 +2317,8 @@ sqlalchemy = [ ] storageclient = [] typing-extensions = [ - {file = "typing_extensions-4.0.1-py3-none-any.whl", hash = "sha256:7f001e5ac290a0c0401508864c7ec868be4e701886d5b573a9528ed3973d9d3b"}, - {file = "typing_extensions-4.0.1.tar.gz", hash = "sha256:4ca091dea149f945ec56afb48dae714f21e8692ef22a395223bcd328961b6a0e"}, + {file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"}, + {file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"}, ] uritemplate = [ {file = "uritemplate-3.0.1-py2.py3-none-any.whl", hash = "sha256:07620c3f3f8eed1f12600845892b0e036a2420acf513c53f7de0abd911a5894f"}, diff --git a/pyproject.toml b/pyproject.toml index 82af7adfd..35e342b96 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fence" -version = "5.5.0" +version = "6.0.0" description = "Gen3 AuthN/AuthZ OIDC Service" authors = ["CTDS UChicago "] license = "Apache-2.0" @@ -13,7 +13,6 @@ include = [ [tool.poetry.dependencies] python = "^3.6" authlib = "^0.11" -authutils = "^6.0.2" bcrypt = "^3.1.4" boto3 = "~1.9.91" botocore = "^1.12.253" @@ -23,11 +22,11 @@ cdislogging = "^1.0.0" cdispyutils = "^1.0.5" cryptography = "==2.8" flask = "^1.1.1" -flask-cors = "^3.0.9" +flask-cors = "^3.0.3" flask-restful = "^0.3.6" flask_sqlalchemy_session = "^1.1" email_validator = "^1.1.1" -gen3authz = "^1.4.2" +gen3authz = "^1.5.1" gen3cirrus = "^2.0.0" gen3config = "^0.1.7" gen3users = "^0.6.0" @@ -40,16 +39,18 @@ psycopg2 = "^2.8.3" pyjwt = "^1.5.3" python_dateutil = "^2.6.1" python-jose = "^2.0.2" -pyyaml = "^5.4" +pyyaml = "^5.1" requests = "^2.18.0" retry = "^0.9.2" sqlalchemy = "^1.3.3" storageclient = {git = "https://github.com/uc-cdis/storage-client", rev = "1.0.2"} -userdatamodel = "^2.3.3" +userdatamodel = "^2.4.0" werkzeug = "^1.0.0" cachelib = "^0.2.0" azure-storage-blob = "^12.6.0" Flask-WTF = "^0.14.3" +authutils = "6.1.0" + [tool.poetry.dev-dependencies] addict = "^2.2.1" diff --git a/tests/admin/test_admin_projects.py b/tests/admin/test_admin_projects.py index 458dadba0..032ab3da6 100644 --- a/tests/admin/test_admin_projects.py +++ b/tests/admin/test_admin_projects.py @@ -1,12 +1,19 @@ import fence.resources.admin as adm from fence.models import Project, Bucket, ProjectToBucket, CloudProvider, StorageAccess +import pytest +@pytest.mark.skip( + reason="The AWG/admin/fence_as_authz support is deprecated in favor of authorization being handled fully by the policy engine" +) def test_get_project(db_session, awg_users): info = adm.get_project_info(db_session, "test_project_1") assert info["name"] == "test_project_1" +@pytest.mark.skip( + reason="The AWG/admin/fence_as_authz support is deprecated in favor of authorization being handled fully by the policy engine" +) def test_get_all_projects(db_session, awg_users): projects = adm.get_all_projects(db_session)["projects"] info = { diff --git a/tests/conftest.py b/tests/conftest.py index 43611b1f2..32473156a 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,7 @@ import os import copy import time +import flask from datetime import datetime import mock @@ -31,6 +32,10 @@ from sqlalchemy.ext.compiler import compiles from sqlalchemy.schema import DropTable +# Set FENCE_CONFIG_PATH *before* loading the configuration +CURRENT_DIR = os.path.dirname(os.path.realpath(__file__)) +os.environ["FENCE_CONFIG_PATH"] = os.path.join(CURRENT_DIR, "test-fence-config.yaml") + import fence from fence import app_init from fence import models @@ -38,10 +43,12 @@ from fence.config import config from fence.errors import NotFound from fence.resources.openid.microsoft_oauth2 import MicrosoftOauth2Client +from fence.jwt.validate import validate_jwt import tests from tests import test_settings from tests import utils +from tests.utils import TEST_RAS_USERNAME, TEST_RAS_SUB from tests.utils.oauth2.client import OAuth2TestClient @@ -176,7 +183,7 @@ class FakeAzureCredential: """ def __init__(self): - self.account_key = "FakefakeAccountKey" + self.account_key = "FakefakeAccountKey" # pragma: allowlist secret class FakeBlobServiceClient: @@ -255,6 +262,111 @@ def kid_2(): return "test-keypair-2" +def get_subjects_to_passports( + subject_to_encoded_visas=None, passport_exp=None, kid=None, rsa_private_key=None +): + subject_to_encoded_visas = subject_to_encoded_visas or {} + passport_exp = passport_exp or int(time.time()) + 1000 + subjects = subject_to_encoded_visas.keys() or [TEST_RAS_SUB] + + output = {} + for subject in subjects: + visas = [] + encoded_visas = subject_to_encoded_visas.get(subject) + if not encoded_visas: + visas = [ + { + "iss": "https://stsstg.nih.gov", + "sub": subject, + "iat": int(time.time()), + "exp": int(time.time()) + 1000, + "scope": "openid ga4gh_passport_v1 email profile", + "jti": "jtiajoidasndokmasdl", + "txn": "sapidjspa.asipidja", + "name": "", + "ga4gh_visa_v1": { + "type": "https://ras.nih.gov/visas/v1", + "asserted": int(time.time()), + "value": "https://stsstg.nih.gov/passport/dbgap/v1.1", + "source": "https://ncbi.nlm.nih.gov/gap", + }, + } + ] + + headers = {"kid": kid} + encoded_visas = [] + + for visa in visas: + encoded_visa = jwt.encode( + visa, key=rsa_private_key, headers=headers, algorithm="RS256" + ).decode("utf-8") + encoded_visas.append(encoded_visa) + + passport_header = { + "type": "JWT", + "alg": "RS256", + "kid": kid, + } + new_passport = { + "iss": "https://stsstg.nih.gov", + "sub": subject, + "iat": int(time.time()), + "scope": "openid ga4gh_passport_v1 email profile", + "exp": int(time.time()) + 1000, + "ga4gh_passport_v1": encoded_visas, + } + + encoded_passport = jwt.encode( + new_passport, + key=rsa_private_key, + headers=passport_header, + algorithm="RS256", + ).decode("utf-8") + + output[subject] = { + "visas": visas, + "encoded_visas": encoded_visas, + "new_passport": new_passport, + "encoded_passport": encoded_passport, + } + return output + + +@pytest.fixture(scope="function") +def no_app_context_no_public_keys(): + mock_validate_jwt = MagicMock()() + + # ensure we don't actually try to reach out to external sites to refresh public keys + def validate_jwt_no_key_refresh(*args, **kwargs): + kwargs.update({"attempt_refresh": False}) + return validate_jwt(*args, **kwargs) + + mock_validate_jwt.side_effect = validate_jwt_no_key_refresh + + # ensure there is no application context or cached keys + if flask.current_app and flask.current_app.jwt_public_keys: + temp_stored_public_keys = flask.current_app.jwt_public_keys + temp_app_context = flask.has_app_context + flask.current_app.jwt_public_keys = {} + + def return_false(): + return False + + flask.has_app_context = return_false + + patcher = patch("fence.resources.ga4gh.passports.validate_jwt", mock_validate_jwt) + patcher.start() + + yield mock_validate_jwt + + patcher.stop() + + # restore public keys and context + if flask.current_app: + flask.current_app.jwt_public_keys = temp_stored_public_keys + flask.has_app_context = temp_app_context + + @pytest.fixture(scope="function") def mock_arborist_requests(request): """ @@ -472,10 +584,20 @@ def db_session(db, patch_app_db_session): patch_app_db_session(session) + session.query(models.GA4GHPassportCache).delete() + session.commit() + yield session + # clear out user and project tables upon function close in case unit test didn't + session.query(models.User).delete() + session.query(models.IssSubPairToUser).delete() + session.query(models.Project).delete() + session.query(models.GA4GHVisaV1).delete() + session.query(models.GA4GHPassportCache).delete() + session.commit() + session.close() - transaction.rollback() connection.close() @@ -1148,6 +1270,8 @@ def do_patch(session): "fence.user", "fence.blueprints.login.synapse", "fence.blueprints.login.ras", + "fence.blueprints.data.indexd", + "fence.resources.ga4gh.passports", ] for module in modules_to_patch: monkeypatch.setattr("{}.current_session".format(module), session) diff --git a/tests/data/test_azure_blob_storage_indexed_file_location.py b/tests/data/test_azure_blob_storage_indexed_file_location.py index 9a9a0cc81..ee3599b8a 100755 --- a/tests/data/test_azure_blob_storage_indexed_file_location.py +++ b/tests/data/test_azure_blob_storage_indexed_file_location.py @@ -16,56 +16,32 @@ indirect=True, ) @pytest.mark.parametrize( - "action,expires_in,public_data,force_signed_url,azure_creds,user_id,storage_account_matches,expect_signed", + "action,expires_in,force_signed_url,azure_creds,user_id,storage_account_matches,expect_signed", [ - ("download", 5, True, None, "fake conn str", "some user", True, False), - ("download", 5, True, None, "fake conn str", "some user", False, False), - ("download", 5, True, True, "fake conn str", "some user", True, True), - ("download", 5, True, True, "fake conn str", "some user", False, False), - ("download", 5, True, False, "fake conn str", "some user", True, False), - ("download", 5, True, False, "fake conn str", "some user", False, False), - ("download", 5, False, None, "fake conn str", "some user", True, True), - ("download", 5, False, None, "fake conn str", "some user", False, False), - ("download", 5, False, True, "fake conn str", "some user", True, True), - ("download", 5, False, True, "fake conn str", "some user", False, False), - ("download", 5, False, None, "fake conn str", "some user", True, True), - ("download", 5, False, None, "fake conn str", "some user", False, False), - ("download", 5, False, None, "*", "some user", True, True), - ("download", 5, False, None, "*", "some user", False, False), - ("download", 5, False, None, "*", ANONYMOUS_USER_ID, True, True), - ("download", 5, False, None, "*", ANONYMOUS_USER_ID, False, False), - ("download", 5, None, None, "fake conn str", "some user", True, True), - ("download", 5, None, None, "fake conn str", "some user", False, False), - ("download", 5, None, True, "fake conn str", "some user", True, True), - ("download", 5, None, True, "fake conn str", "some user", False, False), - ("download", 5, None, False, "fake conn str", "some user", True, True), - ("download", 5, None, False, "fake conn str", "some user", False, False), - ("download", 5, True, None, "fake conn str", ANONYMOUS_USER_ID, True, False), - ("download", 5, True, None, "fake conn str", ANONYMOUS_USER_ID, False, False), - ("upload", 5, True, None, "fake conn str", "some user", True, False), - ("upload", 5, True, None, "fake conn str", "some user", False, False), - ("upload", 5, True, True, "fake conn str", "some user", True, True), - ("upload", 5, True, True, "fake conn str", "some user", False, False), - ("upload", 5, True, False, "fake conn str", "some user", True, False), - ("upload", 5, True, False, "fake conn str", "some user", False, False), - ("upload", 5, False, None, "fake conn str", "some user", True, True), - ("upload", 5, False, None, "fake conn str", "some user", False, False), - ("upload", 5, False, True, "fake conn str", "some user", True, True), - ("upload", 5, False, True, "fake conn str", "some user", False, False), - ("upload", 5, False, None, "fake conn str", "some user", True, True), - ("upload", 5, False, None, "fake conn str", "some user", False, False), - ("upload", 5, False, None, "*", "some user", True, True), - ("upload", 5, False, None, "*", "some user", False, False), - ("upload", 5, False, None, "*", ANONYMOUS_USER_ID, True, True), - ("upload", 5, False, None, "*", ANONYMOUS_USER_ID, False, False), - ("upload", 5, None, None, "fake conn str", "some user", True, True), - ("upload", 5, None, None, "fake conn str", "some user", False, False), - ("upload", 5, None, True, "fake conn str", "some user", True, True), - ("upload", 5, None, True, "fake conn str", "some user", False, False), - ("upload", 5, None, False, "fake conn str", "some user", True, True), - ("upload", 5, None, False, "fake conn str", "some user", False, False), - ("upload", 5, True, None, "fake conn str", ANONYMOUS_USER_ID, True, False), - ("upload", 5, True, None, "fake conn str", ANONYMOUS_USER_ID, False, False), + ("download", 5, None, "fake conn str", "some user", False, False), + ("download", 5, True, "fake conn str", "some user", True, True), + ("download", 5, True, "fake conn str", "some user", False, False), + ("download", 5, False, "fake conn str", "some user", True, False), + ("download", 5, False, "fake conn str", "some user", False, False), + ("download", 5, None, "fake conn str", "some user", True, True), + ("download", 5, None, "*", "some user", True, True), + ("download", 5, None, "*", "some user", False, False), + ("download", 5, None, "*", ANONYMOUS_USER_ID, True, True), + ("download", 5, None, "*", ANONYMOUS_USER_ID, False, False), + ("download", 5, None, "fake conn str", ANONYMOUS_USER_ID, True, True), + ("download", 5, None, "fake conn str", ANONYMOUS_USER_ID, False, False), + ("upload", 5, None, "fake conn str", "some user", False, False), + ("upload", 5, True, "fake conn str", "some user", True, True), + ("upload", 5, True, "fake conn str", "some user", False, False), + ("upload", 5, False, "fake conn str", "some user", True, False), + ("upload", 5, False, "fake conn str", "some user", False, False), + ("upload", 5, None, "fake conn str", "some user", True, True), + ("upload", 5, None, "*", "some user", True, True), + ("upload", 5, None, "*", "some user", False, False), + ("upload", 5, None, "*", ANONYMOUS_USER_ID, True, True), + ("upload", 5, None, "*", ANONYMOUS_USER_ID, False, False), + ("upload", 5, None, "fake conn str", ANONYMOUS_USER_ID, True, True), + ("upload", 5, None, "fake conn str", ANONYMOUS_USER_ID, False, False), ], ) def test_get_signed_url( @@ -73,7 +49,6 @@ def test_get_signed_url( indexd_client, action, expires_in, - public_data, force_signed_url, azure_creds, user_id, @@ -95,20 +70,27 @@ def test_get_signed_url( return_value=storage_account_matches, ): with patch( - "fence.blueprints.data.indexd._get_user_info", + "fence.blueprints.data.indexd._get_user_info_for_id_or_from_request", return_value={"user_id": user_id}, ): azure_blob_storage_indexed_file_location = ( AzureBlobStorageIndexedFileLocation(indexed_file_location_url) ) - return_url = ( - azure_blob_storage_indexed_file_location.get_signed_url( - action=action, - expires_in=expires_in, - public_data=public_data, - force_signed_url=force_signed_url, + if force_signed_url == None: + return_url = ( + azure_blob_storage_indexed_file_location.get_signed_url( + action=action, + expires_in=expires_in, + ) + ) + else: + return_url = ( + azure_blob_storage_indexed_file_location.get_signed_url( + action=action, + expires_in=expires_in, + force_signed_url=force_signed_url, + ) ) - ) if expect_signed: assert "?" in return_url diff --git a/tests/data/test_blank_index.py b/tests/data/test_blank_index.py index 8fb1c84cb..2315767f1 100755 --- a/tests/data/test_blank_index.py +++ b/tests/data/test_blank_index.py @@ -415,7 +415,7 @@ def test_generate_aws_presigned_url_for_part(app, indexd_client): blank_index = BlankIndex(uploader=uploader) assert blank_index with patch( - "fence.blueprints.data.indexd.S3IndexedFileLocation.generate_presigne_url_for_part_upload" + "fence.blueprints.data.indexd.S3IndexedFileLocation.generate_presigned_url_for_part_upload" ): blank_index.generate_aws_presigned_url_for_part( key="some key", uploadId="some id", partNumber=1, expires_in=10 diff --git a/tests/data/test_data.py b/tests/data/test_data.py index 7b1264609..f54f74b6d 100755 --- a/tests/data/test_data.py +++ b/tests/data/test_data.py @@ -1414,7 +1414,7 @@ def test_delete_file_locations( ) mock_check_auth = mock.patch.object( fence.blueprints.data.indexd.IndexedFile, - "check_authorization", + "check_legacy_authorization", return_value=True, ) @@ -1481,7 +1481,7 @@ def test_delete_file_locations_by_uploader( ) mock_check_auth = mock.patch.object( fence.blueprints.data.indexd.IndexedFile, - "check_authorization", + "check_legacy_authorization", return_value=True, ) diff --git a/tests/data/test_indexed_file.py b/tests/data/test_indexed_file.py index 531abecfc..1d8e84688 100755 --- a/tests/data/test_indexed_file.py +++ b/tests/data/test_indexed_file.py @@ -402,11 +402,11 @@ def test_set_acl_missing_unauthorized( indexed_file.set_acls -def test_check_authz_missing_value_error( +def test_get_authorized_with_username_missing_value_error( app, supported_action, supported_protocol, indexd_client_accepting_record ): """ - Test fence.blueprints.data.indexd.IndexedFile check_authz without authz in indexd record + Test fence.blueprints.data.indexd.IndexedFile get_authorized_with_username without authz in indexd record """ indexd_record_with_no_authz_and_public_acl_populated = { "urls": [f"{supported_protocol}://some/location"], @@ -418,7 +418,7 @@ def test_check_authz_missing_value_error( with patch("fence.blueprints.data.indexd.flask.current_app", return_value=app): indexed_file = IndexedFile(file_id="some id") with pytest.raises(ValueError): - indexed_file.check_authz(supported_action) + indexed_file.get_authorized_with_username(supported_action) @pytest.mark.parametrize( diff --git a/tests/dbgap_sync/conftest.py b/tests/dbgap_sync/conftest.py index 3a3e254da..d3f36166d 100644 --- a/tests/dbgap_sync/conftest.py +++ b/tests/dbgap_sync/conftest.py @@ -5,19 +5,30 @@ from unittest.mock import MagicMock, patch from yaml import safe_load as yaml_load +from cdislogging import get_logger from cirrus import GoogleCloudManager from cdisutilstest.code.storage_client_mock import get_client, StorageClientMocker import pytest from userdatamodel import Base from userdatamodel.models import * from userdatamodel.driver import SQLAlchemyDriver +from gen3authz.client.arborist.client import ArboristClient +from fence.config import config +from fence.resources.openid.ras_oauth2 import RASOauth2Client +from fence.auth import login_user from fence.sync.sync_users import UserSyncer from fence.resources import userdatamodel as udm +from fence.models import ( + AccessPrivilege, + AuthorizationProvider, + User, + GA4GHVisaV1, + create_user, + User, +) -from fence.models import AccessPrivilege, AuthorizationProvider, User, GA4GHVisaV1 - -from gen3authz.client.arborist.client import ArboristClient +logger = get_logger(__name__) LOCAL_CSV_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data/csv") @@ -72,6 +83,11 @@ def storage_client(): @pytest.fixture def syncer(db_session, request, rsa_private_key, kid): + # reset GA4GH visas and users table + db_session.query(User).delete() + db_session.query(GA4GHVisaV1).delete() + db_session.commit() + if request.param == "google": backend = "google" else: @@ -82,28 +98,47 @@ def syncer(db_session, request, rsa_private_key, kid): provider = [{"name": backend_name, "backend": backend}] users = [ - {"username": "TESTUSERB", "is_admin": True, "email": "userA@gmail.com"}, - {"username": "USER_1", "is_admin": True, "email": "user1@gmail.com"}, + { + "username": "TESTUSERB", + "is_admin": True, + "email": "userA@gmail.com", + "idp_name": "ras", + }, + { + "username": "USER_1", + "is_admin": True, + "email": "user1@gmail.com", + "idp_name": "ras", + }, { "username": "test_user1@gmail.com", "is_admin": False, "email": "test_user1@gmail.com", + "idp_name": "ras", }, { "username": "deleted_user@gmail.com", "is_admin": True, "email": "deleted_user@gmail.com", + "idp_name": "ras", + }, + { + "username": "TESTUSERD", + "is_admin": True, + "email": "userD@gmail.com", + "idp_name": "ras", }, - {"username": "TESTUSERD", "is_admin": True, "email": "userD@gmail.com"}, { "username": "expired_visa_user", "is_admin": False, "email": "expired@expired.com", + "idp_name": "ras", }, { "username": "invalid_visa_user", "is_admin": False, "email": "invalid@invalid.com", + "idp_name": "ras", }, ] @@ -182,6 +217,8 @@ def mocked_get(path, **kwargs): syncer_obj.arborist_client._user_url = "/user" + syncer_obj._create_arborist_resources = MagicMock() + for element in provider: udm.create_provider(db_session, element["name"], backend=element["backend"]) @@ -195,34 +232,42 @@ def mocked_get(path, **kwargs): sa["name"], db_session, bucket, p ) - for user in users: - user = User(**user) - db_session.add(user) - add_visa_manually(db_session, user, rsa_private_key, kid) - db_session.commit() return syncer_obj -def add_visa_manually(db_session, user, rsa_private_key, kid): - +def get_test_encoded_decoded_visa_and_exp( + db_session, + user, + rsa_private_key, + kid, + expires=None, + sub=None, + make_invalid=False, +): + """ + user can be a db user object or just a username + """ + expires = expires or int(time.time()) + 1000 headers = {"kid": kid} + sub = sub or "abcde12345aspdij" decoded_visa = { "iss": "https://stsstg.nih.gov", - "sub": "abcde12345aspdij", + "sub": sub, "iat": int(time.time()), - "exp": int(time.time()) + 1000, + "exp": expires, "scope": "openid ga4gh_passport_v1 email profile", - "jti": "jtiajoidasndokmasdl", - "txn": "sapidjspa.asipidja", + "jti": "jtiajoidasndokmasdl" + + str(expires), # expires to make unique from others + "txn": "sapidjspa.asipidja" + str(expires), "name": "", "ga4gh_visa_v1": { "type": "https://ras.nih.gov/visas/v1", "asserted": int(time.time()), - "value": "https://nig/passport/dbgap", - "source": "https://ncbi/gap", + "value": "https://stsstg.nih.gov/passport/dbgap/v1.1", + "source": "https://ncbi.nlm.nih.gov/gap", }, "ras_dbgap_permissions": [ { @@ -232,7 +277,7 @@ def add_visa_manually(db_session, user, rsa_private_key, kid): "participant_set": "p1", "consent_group": "c1", "role": "designated user", - "expiration": "2020-11-14 00:00:00", + "expiration": expires, }, { "consent_name": "General Research Use (IRB, PUB)", @@ -241,7 +286,7 @@ def add_visa_manually(db_session, user, rsa_private_key, kid): "participant_set": "p1", "consent_group": "c1", "role": "designated user", - "expiration": "2020-11-14 00:00:00", + "expiration": expires, }, { "consent_name": "Disease-Specific (Cardiovascular Disease)", @@ -250,7 +295,7 @@ def add_visa_manually(db_session, user, rsa_private_key, kid): "participant_set": "p1", "consent_group": "c1", "role": "designated user", - "expiration": "2020-11-14 00:00:00", + "expiration": expires, }, { "consent_name": "Health/Medical/Biomedical (IRB)", @@ -259,7 +304,7 @@ def add_visa_manually(db_session, user, rsa_private_key, kid): "participant_set": "p2", "consent_group": "c3", "role": "designated user", - "expiration": "2020-11-14 00:00:00", + "expiration": expires, }, { "consent_name": "Disease-Specific (Focused Disease Only, IRB, NPU)", @@ -268,7 +313,7 @@ def add_visa_manually(db_session, user, rsa_private_key, kid): "participant_set": "p2", "consent_group": "c2", "role": "designated user", - "expiration": "2020-11-14 00:00:00", + "expiration": expires, }, { "consent_name": "Disease-Specific (Autism Spectrum Disorder)", @@ -277,7 +322,7 @@ def add_visa_manually(db_session, user, rsa_private_key, kid): "participant_set": "p3", "consent_group": "c1", "role": "designated user", - "expiration": "2020-11-14 00:00:00", + "expiration": expires, }, ], } @@ -287,13 +332,34 @@ def add_visa_manually(db_session, user, rsa_private_key, kid): ).decode("utf-8") expires = int(decoded_visa["exp"]) - if user.username == "expired_visa_user": - expires -= 100000 - if user.username == "invalid_visa_user": - encoded_visa = encoded_visa[: len(encoded_visa) // 2] - if user.username == "TESTUSERD": + + if make_invalid: encoded_visa = encoded_visa[: len(encoded_visa) // 2] + return encoded_visa, decoded_visa, expires + + +def add_visa_manually(db_session, user, rsa_private_key, kid, expires=None, sub=None): + expires = expires or int(time.time()) + 1000 + make_invalid = False + + if getattr(user, "username", user) == "expired_visa_user": + expires -= 100000 + if getattr(user, "username", user) == "invalid_visa_user": + make_invalid = True + if getattr(user, "username", user) == "TESTUSERD": + make_invalid = True + + encoded_visa, decoded_visa, expires = get_test_encoded_decoded_visa_and_exp( + db_session, + user, + rsa_private_key, + kid, + expires=expires, + make_invalid=make_invalid, + sub=sub, + ) + visa = GA4GHVisaV1( user=user, source=decoded_visa["ga4gh_visa_v1"]["source"], @@ -305,3 +371,36 @@ def add_visa_manually(db_session, user, rsa_private_key, kid): db_session.add(visa) db_session.commit() + + return encoded_visa, visa + + +def fake_ras_login(username, subject, email=None, db_session=None): + """ + Mock a login by creating a sub/iss mapping in the db and logging them into a + session. + + Args: + username (str): Username from IdP + subject (str): sub id in tokens from IdP + email (None, optional): email if provided + db_session (None, optional): db session to use + """ + ras_client = RASOauth2Client( + config["OPENID_CONNECT"]["ras"], + HTTP_PROXY=config["HTTP_PROXY"], + logger=logger, + ) + actual_username = ras_client.map_iss_sub_pair_to_user( + issuer="https://stsstg.nih.gov", + subject_id=subject, + username=username, + email=email, + db_session=db_session, + ) + logger.debug( + f"subject: {subject}, username: {username}, actual_username: {actual_username}" + ) + login_user(actual_username, provider="ras", email=None, id_from_idp=subject) + + # todo sub to iss table diff --git a/tests/dbgap_sync/test_user_sync.py b/tests/dbgap_sync/test_user_sync.py index 0be912a2c..dbc1c41cb 100644 --- a/tests/dbgap_sync/test_user_sync.py +++ b/tests/dbgap_sync/test_user_sync.py @@ -1,14 +1,26 @@ import os import pytest import yaml +import collections +import asyncio +import flask from unittest.mock import MagicMock +import mock from fence import models from fence.sync.sync_users import _format_policy_id from fence.config import config +from fence.job.visa_update_cronjob import Visa_Token_Update from tests.dbgap_sync.conftest import LOCAL_YAML_DIR +from tests.utils import TEST_RAS_USERNAME, TEST_RAS_SUB +from tests.dbgap_sync.conftest import ( + get_test_encoded_decoded_visa_and_exp, + fake_ras_login, +) +from tests.conftest import get_subjects_to_passports + def equal_project_access(d1, d2): """ @@ -76,7 +88,9 @@ def test_sync( syncer.sync() users = db_session.query(models.User).all() - assert len(users) == 14 + + # 5 from user.yaml, 4 from fake dbgap SFTP + assert len(users) == 9 if parse_consent_code_config: user = models.query_for_user(session=db_session, username="USERC") @@ -152,12 +166,8 @@ def test_sync( } assert len(user_access) == 1 - # TODO: check user policy access (add in user sync changes) - user = models.query_for_user(session=db_session, username="deleted_user@gmail.com") - assert not user.is_admin - user_access = db_session.query(models.AccessPrivilege).filter_by(user=user).all() - assert not user_access + assert not user @pytest.mark.parametrize("syncer", ["google"], indirect=True) @@ -273,12 +283,15 @@ def test_dbgap_consent_codes( user.project_access, {"phs000179": ["read", "read-storage"]} ) - resource_to_parent_paths = {} - for call in syncer.arborist_client.update_resource.call_args_list: + resource_to_parent_paths = collections.defaultdict(list) + for call in syncer._create_arborist_resources.call_args_list: args, kwargs = call - parent_path = args[0] - resource = args[1].get("name") - resource_to_parent_paths.setdefault(resource, []).append(parent_path) + full_paths = args[0] + for full_path in full_paths: + resource_begin = full_path.rfind("/") + 1 + parent_path = full_path[:resource_begin] + resource = full_path[resource_begin:] + resource_to_parent_paths[resource].append(parent_path) if parse_consent_code_config: if enable_common_exchange_area: @@ -540,12 +553,22 @@ def test_update_arborist(syncer, db_session): "data_file", # comes from user.yaml file ] - resource_to_parent_paths = {} + resource_to_parent_paths = collections.defaultdict(list) for call in syncer.arborist_client.update_resource.call_args_list: args, kwargs = call parent_path = args[0] resource = args[1].get("name") resource_to_parent_paths.setdefault(resource, []).append(parent_path) + # usersync updates dbgap projects at once using _create_arborist_resources + # as opposed to individually with gen3authz's update_resource + for call in syncer._create_arborist_resources.call_args_list: + args, kwargs = call + full_paths = args[0] + for full_path in full_paths: + resource_begin = full_path.rfind("/") + 1 + parent_path = full_path[:resource_begin] + resource = full_path[resource_begin:] + resource_to_parent_paths[resource].append(parent_path) for resource in expect_resources: assert resource in list(resource_to_parent_paths.keys()) @@ -613,136 +636,234 @@ def mock_merge(dbgap_servers, sess): assert syncer._process_dbgap_files.call_count == 2 -@pytest.mark.parametrize("syncer", ["cleversafe", "google"], indirect=True) -@pytest.mark.parametrize("parse_consent_code_config", [False, True]) -@pytest.mark.parametrize("fallback_to_dbgap_sftp", [False, True]) -def test_user_sync_with_visas( - syncer, +def setup_ras_sync_testing( + mock_discovery, + mock_get_token, db_session, - storage_client, - parse_consent_code_config, - fallback_to_dbgap_sftp, - monkeypatch, + rsa_private_key, + kid, + mock_userinfo, + mock_arborist_requests, ): - # patch the sync to use the parameterized config value - monkeypatch.setitem( - syncer.dbGaP[0], "parse_consent_code", parse_consent_code_config + """ + BEGIN Setup + - make sure no app context + - setup mock RAS responses for various users + - setup fake access tokens + - make userinfo respond with passport and visas (some valid, some expired, some invalid) + """ + setup_info = {} + + mock_arborist_requests({"arborist/user/TESTUSERB": {"PATCH": (None, 204)}}) + mock_arborist_requests( + {"arborist/user/test_user1@gmail.com": {"PATCH": (None, 204)}} ) - monkeypatch.setattr(syncer, "parse_consent_code", parse_consent_code_config) - monkeypatch.setattr(syncer, "fallback_to_dbgap_sftp", fallback_to_dbgap_sftp) - monkeypatch.setattr(syncer, "sync_from_visas", True) + mock_arborist_requests({"arborist/user/TESTUSERD": {"PATCH": (None, 204)}}) + mock_arborist_requests({"arborist/user/USERF": {"PATCH": (None, 204)}}) - syncer.sync_visas() + mock_discovery.return_value = "https://ras/token_endpoint" - users = db_session.query(models.User).all() + def get_token_response_for_user(*args, **kwargs): + token_response = { + "access_token": f"{args[0].username}", + "id_token": f"{args[0].username}-id12345abcdef", + "refresh_token": f"{args[0].username}-refresh12345abcdefg", + } + return token_response - user = models.query_for_user( - session=db_session, username="TESTUSERB" - ) # contains only visa information + mock_get_token.side_effect = get_token_response_for_user - backup_user = models.query_for_user( - session=db_session, username="TESTUSERD" - ) # Contains invalid visa and also in telemetry file - - expired_user = models.query_for_user( - session=db_session, - username="expired_visa_user", - ) - invalid_user = models.query_for_user( - session=db_session, username="invalid_visa_user" - ) + usernames_to_ras_subjects = { + "TESTUSERB": "sub-TESTUSERB-1234", + "test_user1@gmail.com": "sub-test_user1@gmail.com-1234", + "TESTUSERD": "sub-TESTUSERD-1234", + "USERF": "sub-USERF-1234", + } - assert len(invalid_user.project_access) == 0 - assert len(expired_user.project_access) == 0 + setup_info["usernames_to_ras_subjects"] = usernames_to_ras_subjects + + subjects_to_encoded_visas = { + usernames_to_ras_subjects["TESTUSERB"]: [ + get_test_encoded_decoded_visa_and_exp( + db_session, + "TESTUSERB", + rsa_private_key, + kid, + sub=usernames_to_ras_subjects["TESTUSERB"], + )[0] + ], + usernames_to_ras_subjects["test_user1@gmail.com"]: [ + get_test_encoded_decoded_visa_and_exp( + db_session, + "test_user1@gmail.com", + rsa_private_key, + kid, + expires=1, + sub=usernames_to_ras_subjects["test_user1@gmail.com"], + )[0] + ], + # note: get_test_encoded_decoded_visa_and_exp makes the visas for the next 2 users completely invalid + usernames_to_ras_subjects["TESTUSERD"]: [ + get_test_encoded_decoded_visa_and_exp( + db_session, + "TESTUSERD", + rsa_private_key, + kid, + sub=usernames_to_ras_subjects["TESTUSERD"], + make_invalid=True, + )[0] + ], + usernames_to_ras_subjects["USERF"]: [ + get_test_encoded_decoded_visa_and_exp( + db_session, + "USERF", + rsa_private_key, + kid, + sub=usernames_to_ras_subjects["USERF"], + make_invalid=True, + )[0] + ], + } - assert len(invalid_user.ga4gh_visas_v1) == 0 - assert len(expired_user.ga4gh_visas_v1) == 0 + setup_info["subjects_to_encoded_visas"] = subjects_to_encoded_visas - if fallback_to_dbgap_sftp: - assert len(users) == 14 + subjects_to_passports = get_subjects_to_passports( + subjects_to_encoded_visas, kid=kid, rsa_private_key=rsa_private_key + ) - if parse_consent_code_config: - assert equal_project_access( - user.project_access, - { - "phs000991.c1": ["read", "read-storage"], - "phs000961.c1": ["read", "read-storage"], - "phs000279.c1": ["read", "read-storage"], - "phs000286.c3": ["read", "read-storage"], - "phs000289.c2": ["read", "read-storage"], - "phs000298.c1": ["read", "read-storage"], - }, - ) - assert equal_project_access( - backup_user.project_access, - { - "phs000179.c1": ["read", "read-storage"], - }, - ) - else: - assert equal_project_access( - user.project_access, - { - "phs000991": ["read", "read-storage"], - "phs000961": ["read", "read-storage"], - "phs000279": ["read", "read-storage"], - "phs000286": ["read", "read-storage"], - "phs000289": ["read", "read-storage"], - "phs000298": ["read", "read-storage"], - }, - ) - assert equal_project_access( - backup_user.project_access, - { - "phs000179": ["read", "read-storage"], - }, - ) + setup_info["subjects_to_passports"] = subjects_to_passports + + def get_userinfo_for_user(*args, **kwargs): + # username is the access token only b/c of the way the mocks are setup + username = args[0]["access_token"] + + # sub is likely different than username + sub = f"sub-{username}-1234" + userinfo_response = { + "sub": sub, + "name": "", + "preferred_username": "someuser@era.com", + "UID": "", + "UserID": username, + "email": "", + } + subject_to_passports = subjects_to_passports.get(sub) or {} + userinfo_response["passport_jwt_v11"] = subject_to_passports.get( + "encoded_passport" + ) + return userinfo_response - else: - assert len(users) == 12 - assert len(backup_user.project_access) == 0 - if parse_consent_code_config: - assert equal_project_access( - user.project_access, - { - "phs000991.c1": ["read", "read-storage"], - "phs000961.c1": ["read", "read-storage"], - "phs000279.c1": ["read", "read-storage"], - "phs000286.c3": ["read", "read-storage"], - "phs000289.c2": ["read", "read-storage"], - "phs000298.c1": ["read", "read-storage"], - }, - ) - else: - assert equal_project_access( - user.project_access, - { - "phs000991": ["read", "read-storage"], - "phs000961": ["read", "read-storage"], - "phs000279": ["read", "read-storage"], - "phs000286": ["read", "read-storage"], - "phs000289": ["read", "read-storage"], - "phs000298": ["read", "read-storage"], - }, - ) + mock_userinfo.side_effect = get_userinfo_for_user + return setup_info -@pytest.mark.parametrize("syncer", ["google"], indirect=True) -def test_sync_in_login( +@pytest.mark.parametrize("syncer", ["cleversafe", "google"], indirect=True) +@mock.patch("fence.resources.openid.ras_oauth2.RASOauth2Client.get_userinfo") +@mock.patch("fence.resources.openid.ras_oauth2.RASOauth2Client.get_access_token") +@mock.patch( + "fence.resources.openid.ras_oauth2.RASOauth2Client.get_value_from_discovery_doc" +) +def test_user_sync_with_visa_sync_job( + mock_discovery, + mock_get_token, + mock_userinfo, syncer, db_session, storage_client, - rsa_private_key, - kid, monkeypatch, + kid, + rsa_public_key, + rsa_private_key, + mock_arborist_requests, + no_app_context_no_public_keys, ): - user = models.query_for_user( - session=db_session, username="TESTUSERB" - ) # contains no information - syncer.sync_single_user_visas(user, db_session) - user = models.query_for_user( - session=db_session, username="TESTUSERB" - ) # contains only visa information - user1 = models.query_for_user(session=db_session, username="USER_1") - assert len(user1.project_access) == 0 # other users are not affected - assert len(user.project_access) == 6 + """ + Test that visas and authorization from them only get added to the database + after visa sync job and not by usersync alone. Ensure usersync does not + alter visa information. + + NOTE: syncer above creates users as if they already exist before this usersync + and they have a specified IdP == RAS (e.g. they should get visas synced) + """ + setup_info = setup_ras_sync_testing( + mock_discovery, + mock_get_token, + db_session, + rsa_private_key, + kid, + mock_userinfo, + mock_arborist_requests, + ) + + # Usersync + syncer.sync() + + users_after = db_session.query(models.User).all() + + # 5 from user.yaml, 4 from fake dbgap SFTP + assert len(users_after) == 9 + + for user in users_after: + if user.username in setup_info["usernames_to_ras_subjects"]: + # at this point, we will mock a login event by the user (at which point we'd get + # a refresh token we can update visas with later) + fake_ras_login( + user.username, + setup_info["usernames_to_ras_subjects"][user.username], + db_session=db_session, + ) + + # make sure no one has visas yet + assert not user.ga4gh_visas_v1 + + # use refresh tokens from users to call access token polling "fence-create update-visa" + # and sync authorization from visas + job = Visa_Token_Update() + job.pkey_cache = { + "https://stsstg.nih.gov": { + kid: rsa_public_key, + } + } + loop = asyncio.get_event_loop() + loop.run_until_complete(job.update_tokens(db_session)) + + users_after_visas_sync = db_session.query(models.User).all() + + # now let's check that actual authorization / visas got added as expected + valid_user = models.query_for_user(session=db_session, username="TESTUSERB") + + user_with_invalid_visa_also_in_telemetry_file = models.query_for_user( + session=db_session, username="TESTUSERD" + ) + + user_with_invalid_visa_also_in_telemetry_file_2 = models.query_for_user( + session=db_session, username="USERF" + ) + + user_with_expired_visa_also_in_telemetry_file = models.query_for_user( + session=db_session, + username="test_user1@gmail.com", + ) + + # make sure no access or visas for users not expected to have any + assert ( + user_with_invalid_visa_also_in_telemetry_file + and len(user_with_invalid_visa_also_in_telemetry_file.ga4gh_visas_v1) == 0 + ) + assert ( + user_with_invalid_visa_also_in_telemetry_file_2 + and len(user_with_invalid_visa_also_in_telemetry_file_2.ga4gh_visas_v1) == 0 + ) + assert ( + user_with_expired_visa_also_in_telemetry_file + and len(user_with_expired_visa_also_in_telemetry_file.ga4gh_visas_v1) == 0 + ) + + assert valid_user and valid_user.ga4gh_visas_v1 + assert len(valid_user.ga4gh_visas_v1) == 1 + assert ( + valid_user.ga4gh_visas_v1[0].ga4gh_visa + in setup_info["subjects_to_encoded_visas"][ + setup_info["usernames_to_ras_subjects"][valid_user.username] + ] + ) diff --git a/tests/ga4gh/test_ga4gh.py b/tests/ga4gh/test_ga4gh.py new file mode 100644 index 000000000..0d22b7477 --- /dev/null +++ b/tests/ga4gh/test_ga4gh.py @@ -0,0 +1,67 @@ +import time +import jwt + +from cdislogging import get_logger + +from fence.config import config +from fence.models import IdentityProvider, IssSubPairToUser +from fence.resources.openid.ras_oauth2 import RASOauth2Client +from fence.resources.ga4gh.passports import get_or_create_gen3_user_from_iss_sub + +logger = get_logger(__name__, log_level="debug") + + +def test_get_or_create_gen3_user_from_iss_sub_without_prior_login( + db_session, mock_arborist_requests +): + """ + Test get_or_create_gen3_user_from_iss_sub when the visa's + combination are not present in the mapping table beforehand (i.e. the user + has not previously logged in) + """ + mock_arborist_requests({"arborist/user/": {"PATCH": (None, 204)}}) + + iss = "https://stsstg.nih.gov" + sub = "123_abc" + + user = get_or_create_gen3_user_from_iss_sub(iss, sub, db_session=db_session) + + assert user.username == "123_abcstsstg.nih.gov" + assert user.identity_provider.name == IdentityProvider.ras + iss_sub_pair_to_user_records = db_session.query(IssSubPairToUser).all() + assert len(iss_sub_pair_to_user_records) == 1 + assert iss_sub_pair_to_user_records[0].user.username == "123_abcstsstg.nih.gov" + + +def test_get_or_create_gen3_user_from_iss_sub_after_prior_login( + db_session, mock_arborist_requests +): + """ + Test get_or_create_gen3_user_from_iss_sub when the visa's + combination are present in the mapping table beforehand (i.e. the user + has previously logged in) + """ + mock_arborist_requests({"arborist/user/": {"PATCH": (None, 204)}}) + + iss = "https://stsstg.nih.gov" + sub = "123_abc" + username = "johnsmith" + email = "johnsmith@domain.tld" + oidc = config["OPENID_CONNECT"] + ras_client = RASOauth2Client( + oidc["ras"], + HTTP_PROXY=config["HTTP_PROXY"], + logger=logger, + ) + ras_client.map_iss_sub_pair_to_user(iss, sub, username, email) + iss_sub_pair_to_user_records = db_session.query(IssSubPairToUser).all() + assert len(iss_sub_pair_to_user_records) == 1 + assert iss_sub_pair_to_user_records[0].user.username == username + + user = get_or_create_gen3_user_from_iss_sub(iss, sub, db_session=db_session) + + iss_sub_pair_to_user_records = db_session.query(IssSubPairToUser).all() + assert len(iss_sub_pair_to_user_records) == 1 + assert iss_sub_pair_to_user_records[0].user.username == username + assert user.username == username + assert user.email == email diff --git a/tests/ras/test_ras.py b/tests/ras/test_ras.py index 3adad6197..ad3c73d5a 100644 --- a/tests/ras/test_ras.py +++ b/tests/ras/test_ras.py @@ -4,31 +4,34 @@ import time import mock import jwt +import pytest from cdislogging import get_logger +from fence.blueprints.login.ras import RASCallback from fence.config import config -from fence.models import User, UpstreamRefreshToken, GA4GHVisaV1 +from fence.models import ( + query_for_user, + User, + UpstreamRefreshToken, + GA4GHVisaV1, + IdentityProvider, + IssSubPairToUser, +) +from fence.jwt.validate import validate_jwt from fence.resources.openid.ras_oauth2 import RASOauth2Client as RASClient -from fence.config import config +from fence.resources.ga4gh.passports import get_or_create_gen3_user_from_iss_sub +from fence.errors import InternalError +from tests.utils import add_test_ras_user, TEST_RAS_USERNAME, TEST_RAS_SUB from tests.dbgap_sync.conftest import add_visa_manually from fence.job.visa_update_cronjob import Visa_Token_Update import tests.utils +from tests.conftest import get_subjects_to_passports logger = get_logger(__name__, log_level="debug") -def add_test_user(db_session, username="admin_user", id="5678", is_admin=True): - test_user = User(username=username, id=id, is_admin=is_admin) - # id is part of primary key - check_user_exists = db_session.query(User).filter_by(id=id).first() - if not check_user_exists: - db_session.add(test_user) - db_session.commit() - return test_user - - def add_refresh_token(db_session, user): refresh_token = "abcde1234567kposjdas" expires = int(time.time()) + 1000 @@ -48,7 +51,7 @@ def test_store_refresh_token(db_session): Test to check if store_refresh_token replaces the existing token with a new one in the db """ - test_user = add_test_user(db_session) + test_user = add_test_ras_user(db_session) add_refresh_token(db_session, test_user) initial_query = db_session.query(UpstreamRefreshToken).first() assert initial_query.refresh_token @@ -63,7 +66,9 @@ def test_store_refresh_token(db_session): logger=logger, ) - ras_client.store_refresh_token(test_user, new_refresh_token, new_expire) + ras_client.store_refresh_token( + test_user, new_refresh_token, new_expire, db_session=db_session + ) final_query = db_session.query(UpstreamRefreshToken).first() assert final_query.refresh_token == new_refresh_token @@ -84,10 +89,30 @@ def test_update_visa_token( rsa_private_key, rsa_public_key, kid, + mock_arborist_requests, + no_app_context_no_public_keys, ): """ Test to check visa table is updated when getting new visa """ + # ensure we don't actually try to reach out to external sites to refresh public keys + def validate_jwt_no_key_refresh(*args, **kwargs): + kwargs.update({"attempt_refresh": False}) + return validate_jwt(*args, **kwargs) + + # ensure there is no application context or cached keys + temp_stored_public_keys = flask.current_app.jwt_public_keys + temp_app_context = flask.has_app_context + del flask.current_app.jwt_public_keys + + def return_false(): + return False + + flask.has_app_context = return_false + + mock_arborist_requests( + {f"arborist/user/{TEST_RAS_USERNAME}": {"PATCH": (None, 204)}} + ) mock_discovery.return_value = "https://ras/token_endpoint" new_token = "refresh12345abcdefg" @@ -99,16 +124,18 @@ def test_update_visa_token( mock_get_token.return_value = token_response userinfo_response = { - "sub": "abcd-asdj-sajpiasj12iojd-asnoin", + "sub": TEST_RAS_SUB, "name": "", "preferred_username": "someuser@era.com", "UID": "", - "UserID": "admin_user", + "UserID": TEST_RAS_USERNAME, "email": "", } - test_user = add_test_user(db_session) - add_visa_manually(db_session, test_user, rsa_private_key, kid) + test_user = add_test_ras_user(db_session) + existing_encoded_visa, _ = add_visa_manually( + db_session, test_user, rsa_private_key, kid + ) add_refresh_token(db_session, test_user) visa_query = db_session.query(GA4GHVisaV1).filter_by(user=test_user).first() @@ -122,48 +149,14 @@ def test_update_visa_token( logger=logger, ) - new_visa = { - "iss": "https://stsstg.nih.gov", - "sub": "abcde12345aspdij", - "iat": int(time.time()), - "exp": int(time.time()) + 1000, - "scope": "openid ga4gh_passport_v1 email profile", - "jti": "jtiajoidasndokmasdl", - "txn": "sapidjspa.asipidja", - "name": "", - "ga4gh_visa_v1": { - "type": "https://ras.nih.gov/visas/v1", - "asserted": int(time.time()), - "value": "https://nig/passport/dbgap", - "source": "https://ncbi/gap", - }, - } - - headers = {"kid": kid} - - encoded_visa = jwt.encode( - new_visa, key=rsa_private_key, headers=headers, algorithm="RS256" - ).decode("utf-8") - - passport_header = { - "type": "JWT", - "alg": "RS256", - "kid": kid, - } - new_passport = { - "iss": "https://stsstg.nih.gov", - "sub": "abcde12345aspdij", - "iat": int(time.time()), - "scope": "openid ga4gh_passport_v1 email profile", - "exp": int(time.time()) + 1000, - "ga4gh_passport_v1": [encoded_visa], - } - - encoded_passport = jwt.encode( - new_passport, key=rsa_private_key, headers=passport_header, algorithm="RS256" - ).decode("utf-8") + # use default user and passport + subjects_to_passports = get_subjects_to_passports( + kid=kid, rsa_private_key=rsa_private_key + ) - userinfo_response["passport_jwt_v11"] = encoded_passport + userinfo_response["passport_jwt_v11"] = subjects_to_passports[TEST_RAS_SUB][ + "encoded_passport" + ] mock_userinfo.return_value = userinfo_response pkey_cache = { @@ -171,11 +164,27 @@ def test_update_visa_token( kid: rsa_public_key, } } - ras_client.update_user_visas(test_user, pkey_cache=pkey_cache) + ras_client.update_user_authorization( + test_user, + pkey_cache=pkey_cache, + db_session=db_session, + ) + + # restore public keys and context + flask.current_app.jwt_public_keys = temp_stored_public_keys + flask.has_app_context = temp_app_context - query_visa = db_session.query(GA4GHVisaV1).first() - assert query_visa.ga4gh_visa - assert query_visa.ga4gh_visa == encoded_visa + query_visas = [ + item.ga4gh_visa + for item in db_session.query(GA4GHVisaV1).filter_by(user=test_user) + ] + + # at this point we expect the existing visa to stay around (since it hasn't expired) + # and the new visa should also show up + assert len(query_visas) == 2 + assert existing_encoded_visa in query_visas + for visa in subjects_to_passports[TEST_RAS_SUB]["encoded_visas"]: + assert visa in query_visas @mock.patch("fence.resources.openid.ras_oauth2.RASOauth2Client.get_userinfo") @@ -192,10 +201,15 @@ def test_update_visa_empty_passport_returned( rsa_private_key, rsa_public_key, kid, + mock_arborist_requests, ): """ Test to handle empty passport sent from RAS """ + mock_arborist_requests( + {f"arborist/user/{TEST_RAS_USERNAME}": {"PATCH": (None, 204)}} + ) + mock_discovery.return_value = "https://ras/token_endpoint" new_token = "refresh12345abcdefg" token_response = { @@ -206,18 +220,20 @@ def test_update_visa_empty_passport_returned( mock_get_token.return_value = token_response userinfo_response = { - "sub": "abcd-asdj-sajpiasj12iojd-asnoin", + "sub": TEST_RAS_SUB, "name": "", "preferred_username": "someuser@era.com", "UID": "", - "UserID": "admin_user", + "UserID": TEST_RAS_USERNAME, "email": "", "passport_jwt_v11": "", } mock_userinfo.return_value = userinfo_response - test_user = add_test_user(db_session) - add_visa_manually(db_session, test_user, rsa_private_key, kid) + test_user = add_test_ras_user(db_session) + existing_encoded_visa, _ = add_visa_manually( + db_session, test_user, rsa_private_key, kid + ) add_refresh_token(db_session, test_user) visa_query = db_session.query(GA4GHVisaV1).filter_by(user=test_user).first() @@ -236,10 +252,20 @@ def test_update_visa_empty_passport_returned( kid: rsa_public_key, } } - ras_client.update_user_visas(test_user, pkey_cache=pkey_cache) + ras_client.update_user_authorization( + test_user, + pkey_cache=pkey_cache, + db_session=db_session, + ) - query_visa = db_session.query(GA4GHVisaV1).first() - assert query_visa == None + # at this point we expect the existing visa to stay around (since it hasn't expired) + # but no new visas + query_visas = [ + item.ga4gh_visa + for item in db_session.query(GA4GHVisaV1).filter_by(user=test_user) + ] + assert len(query_visas) == 1 + assert existing_encoded_visa in query_visas @mock.patch("fence.resources.openid.ras_oauth2.RASOauth2Client.get_userinfo") @@ -255,10 +281,14 @@ def test_update_visa_empty_visa_returned( db_session, rsa_private_key, kid, + mock_arborist_requests, ): """ Test to check if the db is emptied if the ras userinfo sends back an empty visa """ + mock_arborist_requests( + {f"arborist/user/{TEST_RAS_USERNAME}": {"PATCH": (None, 204)}} + ) mock_discovery.return_value = "https://ras/token_endpoint" new_token = "refresh12345abcdefg" @@ -270,11 +300,11 @@ def test_update_visa_empty_visa_returned( mock_get_token.return_value = token_response userinfo_response = { - "sub": "abcd-asdj-sajpiasj12iojd-asnoin", + "sub": TEST_RAS_SUB, "name": "", "preferred_username": "someuser@era.com", "UID": "", - "UserID": "admin_user", + "UserID": TEST_RAS_USERNAME, "email": "", } @@ -285,7 +315,7 @@ def test_update_visa_empty_visa_returned( } new_passport = { "iss": "https://stsstg.nih.gov", - "sub": "abcde12345aspdij", + "sub": TEST_RAS_SUB, "iat": int(time.time()), "scope": "openid ga4gh_passport_v1 email profile", "exp": int(time.time()) + 1000, @@ -298,8 +328,10 @@ def test_update_visa_empty_visa_returned( userinfo_response["passport_jwt_v11"] = encoded_passport mock_userinfo.return_value = userinfo_response - test_user = add_test_user(db_session) - add_visa_manually(db_session, test_user, rsa_private_key, kid) + test_user = add_test_ras_user(db_session) + existing_encoded_visa, _ = add_visa_manually( + db_session, test_user, rsa_private_key, kid + ) add_refresh_token(db_session, test_user) visa_query = db_session.query(GA4GHVisaV1).filter_by(user=test_user).first() @@ -313,10 +345,18 @@ def test_update_visa_empty_visa_returned( logger=logger, ) - ras_client.update_user_visas(test_user, pkey_cache={}) + ras_client.update_user_authorization( + test_user, pkey_cache={}, db_session=db_session + ) - query_visa = db_session.query(GA4GHVisaV1).first() - assert query_visa == None + # at this point we expect the existing visa to stay around (since it hasn't expired) + # but no new visas + query_visas = [ + item.ga4gh_visa + for item in db_session.query(GA4GHVisaV1).filter_by(user=test_user) + ] + assert len(query_visas) == 1 + assert existing_encoded_visa in query_visas @mock.patch("fence.resources.openid.ras_oauth2.RASOauth2Client.get_userinfo") @@ -333,12 +373,17 @@ def test_update_visa_token_with_invalid_visa( rsa_private_key, rsa_public_key, kid, + mock_arborist_requests, + no_app_context_no_public_keys, ): """ Test to check the following case: Received visa: [good1, bad2, good3] Processed/stored visa: [good1, good3] """ + mock_arborist_requests( + {f"arborist/user/{TEST_RAS_USERNAME}": {"PATCH": (None, 204)}} + ) mock_discovery.return_value = "https://ras/token_endpoint" new_token = "refresh12345abcdefg" @@ -350,16 +395,18 @@ def test_update_visa_token_with_invalid_visa( mock_get_token.return_value = token_response userinfo_response = { - "sub": "abcd-asdj-sajpiasj12iojd-asnoin", + "sub": TEST_RAS_SUB, "name": "", "preferred_username": "someuser@era.com", "UID": "", - "UserID": "admin_user", + "UserID": TEST_RAS_USERNAME, "email": "", } - test_user = add_test_user(db_session) - add_visa_manually(db_session, test_user, rsa_private_key, kid) + test_user = add_test_ras_user(db_session) + existing_encoded_visa, _ = add_visa_manually( + db_session, test_user, rsa_private_key, kid + ) add_refresh_token(db_session, test_user) visa_query = db_session.query(GA4GHVisaV1).filter_by(user=test_user).first() @@ -375,7 +422,7 @@ def test_update_visa_token_with_invalid_visa( new_visa = { "iss": "https://stsstg.nih.gov", - "sub": "abcde12345aspdij", + "sub": TEST_RAS_SUB, "iat": int(time.time()), "exp": int(time.time()) + 1000, "scope": "openid ga4gh_passport_v1 email profile", @@ -385,8 +432,8 @@ def test_update_visa_token_with_invalid_visa( "ga4gh_visa_v1": { "type": "https://ras.nih.gov/visas/v1", "asserted": int(time.time()), - "value": "https://nig/passport/dbgap", - "source": "https://ncbi/gap", + "value": "https://stsstg.nih.gov/passport/dbgap/v1.1", + "source": "https://ncbi.nlm.nih.gov/gap", }, } @@ -403,7 +450,7 @@ def test_update_visa_token_with_invalid_visa( } new_passport = { "iss": "https://stsstg.nih.gov", - "sub": "abcde12345aspdij", + "sub": TEST_RAS_SUB, "iat": int(time.time()), "scope": "openid ga4gh_passport_v1 email profile", "exp": int(time.time()) + 1000, @@ -422,13 +469,21 @@ def test_update_visa_token_with_invalid_visa( kid: rsa_public_key, } } - ras_client.update_user_visas(test_user, pkey_cache=pkey_cache) - query_visas = db_session.query(GA4GHVisaV1).filter_by(user=test_user).all() - assert len(query_visas) == 2 + ras_client.update_user_authorization( + test_user, + pkey_cache=pkey_cache, + db_session=db_session, + ) + # at this point we expect the existing visa to stay around (since it hasn't expired) + # and 2 new good visas + query_visas = [ + item.ga4gh_visa + for item in db_session.query(GA4GHVisaV1).filter_by(user=test_user) + ] + assert len(query_visas) == 3 for query_visa in query_visas: - assert query_visa.ga4gh_visa - assert query_visa.ga4gh_visa == encoded_visa + assert query_visa == existing_encoded_visa or query_visa == encoded_visa @mock.patch("httpx.get") @@ -445,12 +500,27 @@ def test_update_visa_fetch_pkey( db_session, rsa_private_key, kid, + mock_arborist_requests, ): """ Test that when the RAS client's pkey cache is empty, the client's - update_user_visas can fetch and serialize the visa issuer's public keys and + update_user_authorization can fetch and serialize the visa issuer's public keys and validate a visa using the correct key. """ + # ensure there is no application context or cached keys + temp_stored_public_keys = flask.current_app.jwt_public_keys + temp_app_context = flask.has_app_context + del flask.current_app.jwt_public_keys + + def return_false(): + return False + + flask.has_app_context = return_false + + mock_arborist_requests( + {f"arborist/user/{TEST_RAS_USERNAME}": {"PATCH": (None, 204)}} + ) + mock_discovery.return_value = "https://ras/token_endpoint" mock_get_token.return_value = { "access_token": "abcdef12345", @@ -460,7 +530,7 @@ def test_update_visa_fetch_pkey( # New visa that will be returned by userinfo new_visa = { "iss": "https://stsstg.nih.gov", - "sub": "abcde12345aspdij", + "sub": TEST_RAS_SUB, "iat": int(time.time()), "exp": int(time.time()) + 1000, "scope": "openid ga4gh_passport_v1 email profile", @@ -470,8 +540,8 @@ def test_update_visa_fetch_pkey( "ga4gh_visa_v1": { "type": "https://ras.nih.gov/visas/v1", "asserted": int(time.time()), - "value": "https://nig/passport/dbgap", - "source": "https://ncbi/gap", + "value": "https://stsstg.nih.gov/passport/dbgap/v1.1", + "source": "https://ncbi.nlm.nih.gov/gap", }, } headers = {"kid": kid} @@ -486,7 +556,7 @@ def test_update_visa_fetch_pkey( } new_passport = { "iss": "https://stsstg.nih.gov", - "sub": "abcde12345aspdij", + "sub": TEST_RAS_SUB, "iat": int(time.time()), "scope": "openid ga4gh_passport_v1 email profile", "exp": int(time.time()) + 1000, @@ -513,14 +583,24 @@ def test_update_visa_fetch_pkey( HTTP_PROXY=config.get("HTTP_PROXY"), logger=logger, ) - test_user = add_test_user(db_session) + test_user = add_test_ras_user(db_session) # Pass in an empty pkey cache so that the client will have to hit the jwks endpoint. - ras_client.update_user_visas(test_user, pkey_cache={}) + ras_client.update_user_authorization( + test_user, pkey_cache={}, db_session=db_session + ) + + # restore public keys and context + flask.current_app.jwt_public_keys = temp_stored_public_keys + flask.has_app_context = temp_app_context # Check that the new visa passed validation, indicating a successful pkey fetch - query_visa = db_session.query(GA4GHVisaV1).first() - assert query_visa.ga4gh_visa == encoded_visa + query_visas = [ + item.ga4gh_visa + for item in db_session.query(GA4GHVisaV1).filter_by(user=test_user) + ] + for visa in query_visas: + assert visa == encoded_visa @mock.patch("fence.resources.openid.ras_oauth2.RASOauth2Client.get_userinfo") @@ -536,88 +616,101 @@ def test_visa_update_cronjob( rsa_private_key, rsa_public_key, kid, + mock_arborist_requests, + no_app_context_no_public_keys, ): """ Test to check visa table is updated when updating visas using cronjob """ + mock_arborist_requests( + {f"arborist/user/{TEST_RAS_USERNAME}": {"PATCH": (None, 204)}} + ) + # reset users table + db_session.query(User).delete() + db_session.query(GA4GHVisaV1).delete() + db_session.commit() - n_users = 20 - n_users_no_visa = 15 + n_users = 3 + n_users_no_visas = 2 mock_discovery.return_value = "https://ras/token_endpoint" new_token = "refresh12345abcdefg" - token_response = { - "access_token": "abcdef12345", - "id_token": "id12345abcdef", - "refresh_token": new_token, - } - mock_get_token.return_value = token_response - userinfo_response = { - "sub": "abcd-asdj-sajpiasj12iojd-asnoin", - "name": "", - "preferred_username": "someuser@era.com", - "UID": "", - "UserID": "admin_user", - "email": "", - } + def _get_token_response_for_user(*args, **kwargs): + token_response = { + "access_token": f"{args[0].id}", + "id_token": f"{args[0].id}-id12345abcdef", + "refresh_token": f"{args[0].id}-refresh12345abcdefg", + } + return token_response - for i in range(n_users): + mock_get_token.side_effect = _get_token_response_for_user + + user_id_to_ga4gh_info = {} + + for i in range(1, n_users + 1): username = "user_{}".format(i) - test_user = add_test_user(db_session, username, i) - add_visa_manually(db_session, test_user, rsa_private_key, kid) - add_refresh_token(db_session, test_user) - for j in range(n_users_no_visa): - username = "no_visa_{}".format(j) - test_user = add_test_user(db_session, username, j + n_users) + test_user = add_test_ras_user(db_session, username, subject_id=username) + encoded_visa, visa = add_visa_manually( + db_session, test_user, rsa_private_key, kid, sub=username + ) + user_id_to_ga4gh_info[str(test_user.id)] = {"encoded_visa": encoded_visa} - query_visas = db_session.query(GA4GHVisaV1).all() + passport_header = { + "type": "JWT", + "alg": "RS256", + "kid": kid, + } + new_passport = { + "iss": "https://stsstg.nih.gov", + "sub": username, + "iat": int(time.time()), + "scope": "openid ga4gh_passport_v1 email profile", + "exp": int(time.time()) + 1000, + "ga4gh_passport_v1": [ + user_id_to_ga4gh_info[str(test_user.id)]["encoded_visa"] + ], + } - assert len(query_visas) == n_users + userinfo_response = { + "sub": username, + "name": "", + "preferred_username": "someuser@era.com", + "UID": "", + "UserID": username + "_USERNAME", + "email": "", + } + encoded_passport = jwt.encode( + new_passport, + key=rsa_private_key, + headers=passport_header, + algorithm="RS256", + ).decode("utf-8") + user_id_to_ga4gh_info[str(test_user.id)]["encoded_passport"] = encoded_passport + + userinfo_response["passport_jwt_v11"] = encoded_passport + user_id_to_ga4gh_info[str(test_user.id)][ + "userinfo_response" + ] = userinfo_response - new_visa = { - "iss": "https://stsstg.nih.gov", - "sub": "abcde12345aspdij", - "iat": int(time.time()), - "exp": int(time.time()) + 1000, - "scope": "openid ga4gh_passport_v1 email profile", - "jti": "jtiajoidasndokmasdl", - "txn": "sapidjspa.asipidja", - "name": "", - "ga4gh_visa_v1": { - "type": "https://ras.nih.gov/visas/v1", - "asserted": int(time.time()), - "value": "https://nig/passport/dbgap", - "source": "https://ncbi/gap", - }, - } + add_refresh_token(db_session, test_user) - headers = {"kid": kid} + for j in range(1, n_users_no_visas + 1): + username = "no_existing_visa_{}".format(j) + test_user = add_test_ras_user(db_session, username, subject_id=username) - encoded_visa = jwt.encode( - new_visa, key=rsa_private_key, headers=headers, algorithm="RS256" - ).decode("utf-8") + query_visas = db_session.query(GA4GHVisaV1).all() - passport_header = { - "type": "JWT", - "alg": "RS256", - "kid": kid, - } - new_passport = { - "iss": "https://stsstg.nih.gov", - "sub": "abcde12345aspdij", - "iat": int(time.time()), - "scope": "openid ga4gh_passport_v1 email profile", - "exp": int(time.time()) + 1000, - "ga4gh_passport_v1": [encoded_visa], - } + assert len(query_visas) == n_users - encoded_passport = jwt.encode( - new_passport, key=rsa_private_key, headers=passport_header, algorithm="RS256" - ).decode("utf-8") + def _get_userinfo(*args, **kwargs): + # b/c of the setup in _get_token_response_for_user we know the + # access token will be the user.id + return user_id_to_ga4gh_info.get(str(args[0].get("access_token", {})), {})[ + "userinfo_response" + ] - userinfo_response["passport_jwt_v11"] = encoded_passport - mock_userinfo.return_value = userinfo_response + mock_userinfo.side_effect = _get_userinfo # test "fence-create update-visa" job = Visa_Token_Update() @@ -631,7 +724,165 @@ def test_visa_update_cronjob( query_visas = db_session.query(GA4GHVisaV1).all() - assert len(query_visas) == n_users + # this should not disturb previous manually added visas + # and should add a new visa per user (including users without existing visas) + assert len(query_visas) == n_users * 2 for visa in query_visas: - assert visa.ga4gh_visa == encoded_visa + assert ( + visa.ga4gh_visa == user_id_to_ga4gh_info[str(visa.user.id)]["encoded_visa"] + ) + + +def test_map_iss_sub_pair_to_user_with_no_prior_DRS_access(db_session): + """ + Test RASOauth2Client.map_iss_sub_pair_to_user when the username passed in + (e.g. eRA username) does not already exist in the Fence database and that + user's combination has not already been mapped through a prior + DRS access request. + """ + # reset users table + db_session.query(User).delete() + db_session.commit() + + iss = "https://domain.tld" + sub = "123_abc" + username = "johnsmith" + email = "johnsmith@domain.tld" + oidc = config.get("OPENID_CONNECT", {}) + ras_client = RASClient( + oidc["ras"], + HTTP_PROXY=config.get("HTTP_PROXY"), + logger=logger, + ) + + assert not query_for_user(db_session, username) + iss_sub_pair_to_user_records = db_session.query(IssSubPairToUser).all() + assert len(iss_sub_pair_to_user_records) == 0 + + username_to_log_in = ras_client.map_iss_sub_pair_to_user( + iss, sub, username, email, db_session=db_session + ) + + assert username_to_log_in == username + iss_sub_pair_to_user = db_session.query(IssSubPairToUser).get((iss, sub)) + assert iss_sub_pair_to_user.user.username == username + assert iss_sub_pair_to_user.user.email == email + iss_sub_pair_to_user_records = db_session.query(IssSubPairToUser).all() + assert len(iss_sub_pair_to_user_records) == 1 + + +def test_map_iss_sub_pair_to_user_with_prior_DRS_access( + db_session, mock_arborist_requests +): + """ + Test RASOauth2Client.map_iss_sub_pair_to_user when the username passed in + (e.g. eRA username) does not already exist in the Fence database but that + user's combination has already been mapped to an existing user + created during a prior DRS access request. In this case, that + existing user's username is changed from sub+iss to the username passed + in. + """ + mock_arborist_requests({"arborist/user/123_abcdomain.tld": {"PATCH": (None, 204)}}) + + # reset users table + db_session.query(User).delete() + db_session.commit() + + iss = "https://domain.tld" + sub = "123_abc" + username = "johnsmith" + email = "johnsmith@domain.tld" + oidc = config.get("OPENID_CONNECT", {}) + ras_client = RASClient( + oidc["ras"], + HTTP_PROXY=config.get("HTTP_PROXY"), + logger=logger, + ) + + get_or_create_gen3_user_from_iss_sub(iss, sub, db_session=db_session) + iss_sub_pair_to_user_records = db_session.query(IssSubPairToUser).all() + assert len(iss_sub_pair_to_user_records) == 1 + iss_sub_pair_to_user = db_session.query(IssSubPairToUser).get((iss, sub)) + assert iss_sub_pair_to_user.user.username == "123_abcdomain.tld" + + username_to_log_in = ras_client.map_iss_sub_pair_to_user( + iss, sub, username, email, db_session=db_session + ) + + assert username_to_log_in == username + iss_sub_pair_to_user_records = db_session.query(IssSubPairToUser).all() + assert len(iss_sub_pair_to_user_records) == 1 + iss_sub_pair_to_user = db_session.query(IssSubPairToUser).get((iss, sub)) + assert iss_sub_pair_to_user.user.username == username + assert iss_sub_pair_to_user.user.email == email + + +def test_map_iss_sub_pair_to_user_with_prior_DRS_access_and_arborist_error( + db_session, mock_arborist_requests +): + """ + Test that RASOauth2Client.map_iss_sub_pair_to_user raises an internal error + when Arborist fails to return a successful response. + """ + mock_arborist_requests({"arborist/user/123_abcdomain.tld": {"PATCH": (None, 500)}}) + + # reset users table + db_session.query(User).delete() + db_session.commit() + + iss = "https://domain.tld" + sub = "123_abc" + username = "johnsmith" + email = "johnsmith@domain.tld" + oidc = config.get("OPENID_CONNECT", {}) + ras_client = RASClient( + oidc["ras"], + HTTP_PROXY=config.get("HTTP_PROXY"), + logger=logger, + ) + get_or_create_gen3_user_from_iss_sub(iss, sub, db_session=db_session) + + with pytest.raises(InternalError): + ras_client.map_iss_sub_pair_to_user( + iss, sub, username, email, db_session=db_session + ) + + +def test_map_iss_sub_pair_to_user_with_prior_login_and_prior_DRS_access( + db_session, +): + """ + Test RASOauth2Client.map_iss_sub_pair_to_user when the username passed in + (e.g. eRA username) already exists in the Fence database and that + user's combination has already been mapped to a separate user + created during a prior DRS access request. In this case, + map_iss_sub_pair_to_user returns the user created from prior DRS/data + access, rendering the other user (e.g. the eRA one) inaccessible. + """ + iss = "https://domain.tld" + sub = "123_abc" + username = "johnsmith" + email = "johnsmith@domain.tld" + oidc = config.get("OPENID_CONNECT", {}) + ras_client = RASClient( + oidc["ras"], + HTTP_PROXY=config.get("HTTP_PROXY"), + logger=logger, + ) + + # reset users table + db_session.query(User).delete() + db_session.commit() + + user = User(username=username, email=email) + db_session.add(user) + db_session.commit() + + get_or_create_gen3_user_from_iss_sub(iss, sub, db_session=db_session) + username_to_log_in = ras_client.map_iss_sub_pair_to_user( + iss, sub, username, email, db_session=db_session + ) + assert username_to_log_in == "123_abcdomain.tld" + iss_sub_pair_to_user = db_session.query(IssSubPairToUser).get((iss, sub)) + assert iss_sub_pair_to_user.user.username == "123_abcdomain.tld" diff --git a/tests/scripting/test_fence-create.py b/tests/scripting/test_fence-create.py index 78bd4717c..2cd5911a1 100644 --- a/tests/scripting/test_fence-create.py +++ b/tests/scripting/test_fence-create.py @@ -23,9 +23,12 @@ GoogleBucketAccessGroup, CloudProvider, Bucket, + GoogleProxyGroup, ServiceAccountToGoogleBucketAccessGroup, + GoogleProxyGroupToGoogleBucketAccessGroup, GoogleServiceAccountKey, StorageAccess, + GA4GHVisaV1, ) from fence.scripting.fence_create import ( delete_users, @@ -33,6 +36,7 @@ create_client_action, delete_client_action, delete_expired_service_accounts, + delete_expired_google_access, link_external_bucket, remove_expired_google_service_account_keys, verify_bucket_access_group, @@ -42,8 +46,10 @@ modify_client_action, create_projects, create_group, + cleanup_expired_ga4gh_information, ) - +from tests.dbgap_sync.conftest import add_visa_manually +from tests.utils import add_test_ras_user ROOT_DIR = "./" @@ -384,9 +390,38 @@ def test_create_refresh_token_with_found_user( assert db_token is not None -def _setup_service_account_to_google_bucket_access_group(db_session): +def _setup_ga4gh_info( + db_session, rsa_private_key, kid, access_1_expires=None, access_2_expires=None +): + """ + Setup some testing data. + + Args: + access_1_expires (str, optional): expiration for the Proxy Group -> + Google Bucket Access Group for user 1, defaults to None + access_2_expires (str, optional): expiration for the Proxy Group -> + Google Bucket Access Group for user 2, defaults to None + """ + test_user = add_test_ras_user(db_session) + _, visa1 = add_visa_manually( + db_session, test_user, rsa_private_key, kid, expires=access_1_expires + ) + _, visa2 = add_visa_manually( + db_session, test_user, rsa_private_key, kid, expires=access_2_expires + ) + + return {"ga4gh_visas": {"1": visa1.id, "2": visa2.id, "test_user": test_user}} + + +def _setup_google_access(db_session, access_1_expires=None, access_2_expires=None): """ Setup some testing data. + + Args: + access_1_expires (str, optional): expiration for the Proxy Group -> + Google Bucket Access Group for user 1, defaults to None + access_2_expires (str, optional): expiration for the Proxy Group -> + Google Bucket Access Group for user 2, defaults to None """ cloud_provider = CloudProvider( name="test_provider", @@ -417,22 +452,40 @@ def _setup_service_account_to_google_bucket_access_group(db_session): db_session.add(bucket1) db_session.commit() + gpg1 = GoogleProxyGroup(id=1, email="test1@gmail.com") + gpg2 = GoogleProxyGroup(id=2, email="test2@gmail.com") + db_session.add(gpg1) + db_session.add(gpg2) + db_session.commit() + + gbag1 = GoogleBucketAccessGroup( + bucket_id=bucket1.id, + email="testgroup1@gmail.com", + privileges=["read-storage", "write-storage"], + ) + gbag2 = GoogleBucketAccessGroup( + bucket_id=bucket1.id, + email="testgroup2@gmail.com", + privileges=["read-storage"], + ) + db_session.add(gbag1) + db_session.add(gbag2) + db_session.commit() + db_session.add( - GoogleBucketAccessGroup( - bucket_id=bucket1.id, - email="testgroup1@gmail.com", - privileges=["read-storage", "write-storage"], + GoogleProxyGroupToGoogleBucketAccessGroup( + proxy_group_id=gpg1.id, access_group_id=gbag1.id, expires=access_1_expires ) ) db_session.add( - GoogleBucketAccessGroup( - bucket_id=bucket1.id, - email="testgroup2@gmail.com", - privileges=["read-storage"], + GoogleProxyGroupToGoogleBucketAccessGroup( + proxy_group_id=gpg2.id, access_group_id=gbag2.id, expires=access_2_expires ) ) db_session.commit() + return {"google_proxy_group_ids": {"1": gpg1.id, "2": gpg2.id}} + def test_delete_expired_service_accounts_with_one_fail_first( cloud_manager, app, db_session @@ -449,7 +502,7 @@ def test_delete_expired_service_accounts_with_one_fail_first( HttpError(mock.Mock(status=403), bytes("Permission denied", "utf-8")), {}, ] - _setup_service_account_to_google_bucket_access_group(db_session) + _setup_google_access(db_session) service_accounts = db_session.query(UserServiceAccount).all() google_bucket_access_grps = db_session.query(GoogleBucketAccessGroup).all() @@ -499,7 +552,7 @@ def test_delete_expired_service_accounts_with_one_fail_second( {}, HttpError(mock.Mock(status=403), bytes("Permission denied", "utf-8")), ] - _setup_service_account_to_google_bucket_access_group(db_session) + _setup_google_access(db_session) service_accounts = db_session.query(UserServiceAccount).all() google_bucket_access_grps = db_session.query(GoogleBucketAccessGroup).all() @@ -546,7 +599,7 @@ def test_delete_expired_service_accounts(cloud_manager, app, db_session): cloud_manager.return_value.__enter__.return_value.remove_member_from_group.return_value = ( {} ) - _setup_service_account_to_google_bucket_access_group(db_session) + _setup_google_access(db_session) service_accounts = db_session.query(UserServiceAccount).all() google_bucket_access_grps = db_session.query(GoogleBucketAccessGroup).all() @@ -593,7 +646,7 @@ def test_delete_not_expired_service_account(app, db_session): import fence fence.settings = MagicMock() - _setup_service_account_to_google_bucket_access_group(db_session) + _setup_google_access(db_session) service_account = db_session.query(UserServiceAccount).first() google_bucket_access_grp1 = db_session.query(GoogleBucketAccessGroup).first() @@ -618,6 +671,218 @@ def test_delete_not_expired_service_account(app, db_session): assert len(records) == 1 +def test_delete_not_expired_google_access(app, db_session): + """ + Test the case that there is no expired google access + """ + import fence + + fence.settings = MagicMock() + + current_time = int(time.time()) + # 1 not expired, 2 not expired + access_1_expires = current_time + 3600 + access_2_expires = current_time + 3600 + _setup_google_access( + db_session, access_1_expires=access_1_expires, access_2_expires=access_2_expires + ) + + google_access = db_session.query(GoogleProxyGroupToGoogleBucketAccessGroup).all() + google_proxy_groups = db_session.query(GoogleProxyGroup).all() + google_bucket_access_grps = db_session.query(GoogleBucketAccessGroup).all() + + # check database to make sure all the service accounts exist + pre_deletion_google_access_size = len(google_access) + pre_deletion_google_proxy_groups_size = len(google_proxy_groups) + pre_deletion_google_bucket_access_grps_size = len(google_bucket_access_grps) + + # call function to delete expired service account + delete_expired_google_access(config["DB"]) + + google_access = db_session.query(GoogleProxyGroupToGoogleBucketAccessGroup).all() + google_proxy_groups = db_session.query(GoogleProxyGroup).all() + google_bucket_access_grps = db_session.query(GoogleBucketAccessGroup).all() + + # check database again. Expect nothing is deleted + assert len(google_access) == pre_deletion_google_access_size + assert len(google_proxy_groups) == pre_deletion_google_proxy_groups_size + assert len(google_bucket_access_grps) == pre_deletion_google_bucket_access_grps_size + + +def test_delete_not_specified_expiration_google_access(app, db_session): + """ + Test the case that there is no expiration time specified in the db for google access + In this case, we expect backwards compatible behavior, e.g. they are NOT removed + """ + import fence + + fence.settings = MagicMock() + + current_time = int(time.time()) + access_1_expires = None + access_2_expires = None + _setup_google_access( + db_session, access_1_expires=access_1_expires, access_2_expires=access_2_expires + ) + + google_access = db_session.query(GoogleProxyGroupToGoogleBucketAccessGroup).all() + google_proxy_groups = db_session.query(GoogleProxyGroup).all() + google_bucket_access_grps = db_session.query(GoogleBucketAccessGroup).all() + + # check database to make sure all the service accounts exist + pre_deletion_google_access_size = len(google_access) + pre_deletion_google_proxy_groups_size = len(google_proxy_groups) + pre_deletion_google_bucket_access_grps_size = len(google_bucket_access_grps) + + # call function to delete expired service account + delete_expired_google_access(config["DB"]) + + google_access = db_session.query(GoogleProxyGroupToGoogleBucketAccessGroup).all() + google_proxy_groups = db_session.query(GoogleProxyGroup).all() + google_bucket_access_grps = db_session.query(GoogleBucketAccessGroup).all() + + # check database again. Expect nothing is deleted + assert len(google_access) == pre_deletion_google_access_size + assert len(google_proxy_groups) == pre_deletion_google_proxy_groups_size + assert len(google_bucket_access_grps) == pre_deletion_google_bucket_access_grps_size + + +def test_delete_expired_google_access(cloud_manager, app, db_session): + """ + Test deleting all expired service accounts + """ + import fence + + fence.settings = MagicMock() + cloud_manager.return_value.__enter__.return_value.remove_member_from_group.return_value = ( + {} + ) + + current_time = int(time.time()) + # 1 expired, 2 not expired + access_1_expires = current_time - 3600 + access_2_expires = current_time + 3600 + setup_results = _setup_google_access( + db_session, access_1_expires=access_1_expires, access_2_expires=access_2_expires + ) + + google_access = db_session.query(GoogleProxyGroupToGoogleBucketAccessGroup).all() + google_proxy_groups = db_session.query(GoogleProxyGroup).all() + google_bucket_access_grps = db_session.query(GoogleBucketAccessGroup).all() + + # check database to make sure all the service accounts exist + pre_deletion_google_access_size = len(google_access) + pre_deletion_google_proxy_groups_size = len(google_proxy_groups) + pre_deletion_google_bucket_access_grps_size = len(google_bucket_access_grps) + + # call function to delete expired service account + delete_expired_google_access(config["DB"]) + + google_access = db_session.query(GoogleProxyGroupToGoogleBucketAccessGroup).all() + google_proxy_groups = db_session.query(GoogleProxyGroup).all() + google_bucket_access_grps = db_session.query(GoogleBucketAccessGroup).all() + + # check database again. Expect 1 access is deleted - proxy group and gbag should be intact + assert len(google_access) == pre_deletion_google_access_size - 1 + remaining_ids = [str(gpg_to_gbag.proxy_group_id) for gpg_to_gbag in google_access] + + # b/c expired + assert str(setup_results["google_proxy_group_ids"]["1"]) not in remaining_ids + + # b/c not expired + assert str(setup_results["google_proxy_group_ids"]["2"]) in remaining_ids + + assert len(google_proxy_groups) == pre_deletion_google_proxy_groups_size + assert len(google_bucket_access_grps) == pre_deletion_google_bucket_access_grps_size + + +def test_delete_expired_google_access_with_one_fail_first( + cloud_manager, app, db_session +): + """ + Test the case that there is a failure of removing from google group in GCP. + In this case, we still want the expired record to exist in the db so we can try to + remove it again. + """ + from googleapiclient.errors import HttpError + import fence + + fence.settings = MagicMock() + cirrus.config.update = MagicMock() + cloud_manager.return_value.__enter__.return_value.remove_member_from_group.side_effect = [ + HttpError(mock.Mock(status=403), bytes("Permission denied", "utf-8")), + {}, + ] + + current_time = int(time.time()) + # 1 expired, 2 not expired + access_1_expires = current_time - 3600 + access_2_expires = current_time + 3600 + _setup_google_access( + db_session, access_1_expires=access_1_expires, access_2_expires=access_2_expires + ) + + google_access = db_session.query(GoogleProxyGroupToGoogleBucketAccessGroup).all() + google_proxy_groups = db_session.query(GoogleProxyGroup).all() + google_bucket_access_grps = db_session.query(GoogleBucketAccessGroup).all() + + # check database to make sure all the service accounts exist + pre_deletion_google_access_size = len(google_access) + pre_deletion_google_proxy_groups_size = len(google_proxy_groups) + pre_deletion_google_bucket_access_grps_size = len(google_bucket_access_grps) + + # call function to delete expired service account + delete_expired_google_access(config["DB"]) + + google_access = db_session.query(GoogleProxyGroupToGoogleBucketAccessGroup).all() + google_proxy_groups = db_session.query(GoogleProxyGroup).all() + google_bucket_access_grps = db_session.query(GoogleBucketAccessGroup).all() + + # check database again. Expect nothing is deleted + assert len(google_access) == pre_deletion_google_access_size + assert len(google_proxy_groups) == pre_deletion_google_proxy_groups_size + assert len(google_bucket_access_grps) == pre_deletion_google_bucket_access_grps_size + + +def test_cleanup_expired_ga4gh_information(app, db_session, rsa_private_key, kid): + """ + Test removal of expired ga4gh info + """ + import fence + + current_time = int(time.time()) + # 1 expired, 2 not expired + access_1_expires = current_time - 3600 + access_2_expires = current_time + 3600 + setup_results = _setup_ga4gh_info( + db_session, + rsa_private_key, + kid, + access_1_expires=access_1_expires, + access_2_expires=access_2_expires, + ) + + ga4gh_visas = db_session.query(GA4GHVisaV1).all() + + # check database to make sure all the service accounts exist + pre_deletion_ga4gh_visas_size = len(ga4gh_visas) + + # call function to delete expired service account + cleanup_expired_ga4gh_information(config["DB"]) + + ga4gh_visas = db_session.query(GA4GHVisaV1).all() + + # check database again. Expect 1 access is deleted - proxy group and gbag should be intact + assert len(ga4gh_visas) == pre_deletion_ga4gh_visas_size - 1 + remaining_ids = [str(item.id) for item in ga4gh_visas] + + # b/c expired + assert str(setup_results["ga4gh_visas"]["1"]) not in remaining_ids + + # b/c not expired + assert str(setup_results["ga4gh_visas"]["2"]) in remaining_ids + + def test_verify_bucket_access_group_no_interested_accounts( app, cloud_manager, db_session, setup_test_data ): @@ -1119,7 +1384,7 @@ def test_modify_client_action_modify_allowed_scopes(db_session): client_name = "test123" client = Client( client_id=client_id, - client_secret="secret", + client_secret="secret", # pragma: allowlist secret name=client_name, _allowed_scopes="openid user data", ) @@ -1147,7 +1412,7 @@ def test_modify_client_action_modify_allowed_scopes_append_true(db_session): client_name = "test123" client = Client( client_id=client_id, - client_secret="secret", + client_secret="secret", # pragma: allowlist secret name=client_name, _allowed_scopes="openid user data", ) @@ -1176,7 +1441,7 @@ def test_modify_client_action_modify_append_url(db_session): client_name = "test123" client = Client( client_id=client_id, - client_secret="secret", + client_secret="secret", # pragma: allowlist secret name=client_name, _allowed_scopes="openid user data", redirect_uris="abcd", diff --git a/tests/test-fence-config.yaml b/tests/test-fence-config.yaml index 103a4c4b8..fa535074a 100755 --- a/tests/test-fence-config.yaml +++ b/tests/test-fence-config.yaml @@ -97,7 +97,7 @@ OPENID_CONNECT: client_id: '' client_secret: '' redirect_url: '{{BASE_URL}}/login/ras/callback' - discovery_url: 'https://sts.nih.gov/.well-known/openid-configuration' + discovery_url: 'https://stsstg.nih.gov/.well-known/openid-configuration' microsoft: discovery_url: '' client_id: '' @@ -488,7 +488,7 @@ INDEXD: null # this is the username which fence uses to make authenticated requests to indexd INDEXD_USERNAME: 'gdcapi' # this is the password which fence uses to make authenticated requests to indexd -INDEXD_PASSWORD: 'fake_password' +INDEXD_PASSWORD: 'fake_password' # pragma: allowlist secret # url where authz microservice is running ARBORIST: '/arborist' @@ -632,6 +632,36 @@ GOOGLE_MANAGED_SERVICE_ACCOUNT_DOMAINS: MAX_ROLE_SESSION_INCREASE: true ASSUME_ROLE_CACHE_SECONDS: 1800 +# ////////////////////////////////////////////////////////////////////////////////////// +# GA4GH SUPPORT: DATA ACCESS AND AUTHORIZATION SYNCING +# ////////////////////////////////////////////////////////////////////////////////////// +# whether or not to accept GA4GH Passports as a means of AuthN/Z to the DRS data access endpoint +GA4GH_PASSPORTS_TO_DRS_ENABLED: true + +# RAS refresh_tokens expire in 15 days +RAS_REFRESH_EXPIRATION: 1296000 # List of JWT issuers from which Fence will accept GA4GH visas GA4GH_VISA_ISSUER_ALLOWLIST: + - '{{BASE_URL}}' + - 'https://sts.nih.gov' - 'https://stsstg.nih.gov' +GA4GH_VISA_V1_CLAIM_REQUIRED_FIELDS: + type: + - "https://ras.nih.gov/visas/v1.1" + - "https://ras.nih.gov/visas/v1" + value: + - "https://sts.nih.gov/passport/dbgap/v1.1" + - "https://stsstg.nih.gov/passport/dbgap/v1.1" + source: + - "https://ncbi.nlm.nih.gov/gap" +EXPIRED_AUTHZ_REMOVAL_JOB_FREQ_IN_SECONDS: 1 +# Global sync visas during login +# None(Default): Allow per client i.e. a fence client can pick whether or not to sync their visas during login with parse_visas param in /authorization endpoint +# True: Parse for all clients i.e. a fence client will always sync their visas during login +# False: Parse for no clients i.e. a fence client will not be able to sync visas during login even with parse_visas param +GLOBAL_PARSE_VISAS_ON_LOGIN: +# Settings for usersync with visas +USERSYNC: + visa_types: + ras: ["https://ras.nih.gov/visas/v1", "https://ras.nih.gov/visas/v1.1"] +RAS_USERINFO_ENDPOINT: '/openid/connect/v1.1/userinfo' diff --git a/tests/test_audit_service.py b/tests/test_audit_service.py index e6b48143d..23a8c85ed 100644 --- a/tests/test_audit_service.py +++ b/tests/test_audit_service.py @@ -435,7 +435,7 @@ def test_login_log_login_endpoint( get_user_id_value = {"username": username} callback_endpoint = "callback" # these should be populated by a /login/ call that we're skipping: - flask.g.userinfo = {} + flask.g.userinfo = {"sub": "testSub123"} flask.g.tokens = { "refresh_token": jwt_string, "id_token": jwt_string, diff --git a/tests/test_drs.py b/tests/test_drs.py index 2769784de..753ef0b6b 100644 --- a/tests/test_drs.py +++ b/tests/test_drs.py @@ -1,9 +1,20 @@ +import flask +import httpx +import hashlib import json import jwt import pytest import requests import responses from tests import utils +import time +from unittest.mock import MagicMock, patch + +from gen3authz.client.arborist.client import ArboristClient + +from fence.config import config +from fence.models import GA4GHPassportCache +from tests.utils import add_test_ras_user, TEST_RAS_USERNAME, TEST_RAS_SUB def get_doc(has_version=True, urls=list(), drs_list=0): @@ -230,3 +241,1465 @@ def test_get_presigned_url_with_query_params( headers=user, ) assert res.status_code == 200 + + +@responses.activate +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +@patch("httpx.get") +@patch("fence.resources.google.utils._create_proxy_group") +@patch("fence.scripting.fence_create.ArboristClient") +def test_passport_use_disabled( + mock_arborist, + mock_google_proxy_group, + mock_httpx_get, + client, + indexd_client, + kid, + rsa_private_key, + rsa_public_key, + indexd_client_accepting_record, + mock_arborist_requests, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + config["GA4GH_PASSPORTS_TO_DRS_ENABLED"] = False + indexd_record_with_non_public_authz_and_public_acl_populated = { + "did": "1", + "baseid": "", + "rev": "", + "size": 10, + "file_name": "file1", + "urls": ["s3://bucket1/key", "gs://bucket1/key"], + "hashes": {}, + "metadata": {}, + "authz": ["/orgA/programs/phs000991.c1"], + "acl": ["*"], + "form": "", + "created_date": "", + "updated_date": "", + } + indexd_client_accepting_record( + indexd_record_with_non_public_authz_and_public_acl_populated + ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) + mock_arborist.return_value = MagicMock(ArboristClient) + mock_google_proxy_group.return_value = google_proxy_group + + # Prepare Passport/Visa + headers = {"kid": kid} + decoded_visa = { + "iss": "https://stsstg.nih.gov", + "sub": "abcde12345aspdij", + "iat": int(time.time()), + "exp": int(time.time()) + 1000, + "scope": "openid ga4gh_passport_v1 email profile", + "jti": "jtiajoidasndokmasdl", + "txn": "sapidjspa.asipidja", + "name": "", + "ga4gh_visa_v1": { + "type": "https://ras.nih.gov/visas/v1.1", + "asserted": int(time.time()), + "value": "https://stsstg.nih.gov/passport/dbgap/v1.1", + "source": "https://ncbi.nlm.nih.gov/gap", + }, + "ras_dbgap_permissions": [ + { + "consent_name": "Health/Medical/Biomedical", + "phs_id": "phs000991", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "General Research Use (IRB, PUB)", + "phs_id": "phs000961", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "Disease-Specific (Cardiovascular Disease)", + "phs_id": "phs000279", + "version": "v2", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "Health/Medical/Biomedical (IRB)", + "phs_id": "phs000286", + "version": "v6", + "participant_set": "p2", + "consent_group": "c3", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "Disease-Specific (Focused Disease Only, IRB, NPU)", + "phs_id": "phs000289", + "version": "v6", + "participant_set": "p2", + "consent_group": "c2", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "Disease-Specific (Autism Spectrum Disorder)", + "phs_id": "phs000298", + "version": "v4", + "participant_set": "p3", + "consent_group": "c1", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + ], + } + encoded_visa = jwt.encode( + decoded_visa, key=rsa_private_key, headers=headers, algorithm="RS256" + ).decode("utf-8") + + passport_header = { + "type": "JWT", + "alg": "RS256", + "kid": kid, + } + passport = { + "iss": "https://stsstg.nih.gov", + "sub": "abcde12345aspdij", + "iat": int(time.time()), + "scope": "openid ga4gh_passport_v1 email profile", + "exp": int(time.time()) + 1000, + "ga4gh_passport_v1": [encoded_visa], + } + encoded_passport = jwt.encode( + passport, key=rsa_private_key, headers=passport_header, algorithm="RS256" + ).decode("utf-8") + + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + + passports = [encoded_passport] + + data = {"passports": passports} + + keys = [keypair.public_key_to_jwk() for keypair in flask.current_app.keypairs] + mock_httpx_get.return_value = httpx.Response(200, json={"keys": keys}) + + res = client.post( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps(data), + ) + assert res.status_code == 400 + + +@responses.activate +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +@patch("httpx.get") +@patch("fence.resources.google.utils._create_proxy_group") +@patch("fence.scripting.fence_create.ArboristClient") +def test_get_presigned_url_for_non_public_data_with_passport( + mock_arborist, + mock_google_proxy_group, + mock_httpx_get, + client, + indexd_client, + kid, + rsa_private_key, + rsa_public_key, + indexd_client_accepting_record, + mock_arborist_requests, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + config["GA4GH_PASSPORTS_TO_DRS_ENABLED"] = True + indexd_record_with_non_public_authz_and_public_acl_populated = { + "did": "1", + "baseid": "", + "rev": "", + "size": 10, + "file_name": "file1", + "urls": ["s3://bucket1/key", "gs://bucket1/key"], + "hashes": {}, + "metadata": {}, + "authz": ["/orgA/programs/phs000991.c1"], + "acl": ["*"], + "form": "", + "created_date": "", + "updated_date": "", + } + indexd_client_accepting_record( + indexd_record_with_non_public_authz_and_public_acl_populated + ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) + mock_arborist.return_value = MagicMock(ArboristClient) + mock_google_proxy_group.return_value = google_proxy_group + + # Prepare Passport/Visa + headers = {"kid": kid} + decoded_visa = { + "iss": "https://stsstg.nih.gov", + "sub": "abcde12345aspdij", + "iat": int(time.time()), + "exp": int(time.time()) + 1000, + "scope": "openid ga4gh_passport_v1 email profile", + "jti": "jtiajoidasndokmasdl", + "txn": "sapidjspa.asipidja", + "name": "", + "ga4gh_visa_v1": { + "type": "https://ras.nih.gov/visas/v1.1", + "asserted": int(time.time()), + "value": "https://stsstg.nih.gov/passport/dbgap/v1.1", + "source": "https://ncbi.nlm.nih.gov/gap", + }, + "ras_dbgap_permissions": [ + { + "consent_name": "Health/Medical/Biomedical", + "phs_id": "phs000991", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "General Research Use (IRB, PUB)", + "phs_id": "phs000961", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "Disease-Specific (Cardiovascular Disease)", + "phs_id": "phs000279", + "version": "v2", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "Health/Medical/Biomedical (IRB)", + "phs_id": "phs000286", + "version": "v6", + "participant_set": "p2", + "consent_group": "c3", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "Disease-Specific (Focused Disease Only, IRB, NPU)", + "phs_id": "phs000289", + "version": "v6", + "participant_set": "p2", + "consent_group": "c2", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "Disease-Specific (Autism Spectrum Disorder)", + "phs_id": "phs000298", + "version": "v4", + "participant_set": "p3", + "consent_group": "c1", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + ], + } + encoded_visa = jwt.encode( + decoded_visa, key=rsa_private_key, headers=headers, algorithm="RS256" + ).decode("utf-8") + + passport_header = { + "type": "JWT", + "alg": "RS256", + "kid": kid, + } + passport = { + "iss": "https://stsstg.nih.gov", + "sub": "abcde12345aspdij", + "iat": int(time.time()), + "scope": "openid ga4gh_passport_v1 email profile", + "exp": int(time.time()) + 1000, + "ga4gh_passport_v1": [encoded_visa], + } + encoded_passport = jwt.encode( + passport, key=rsa_private_key, headers=passport_header, algorithm="RS256" + ).decode("utf-8") + + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + + passports = [encoded_passport] + + data = {"passports": passports} + + keys = [keypair.public_key_to_jwk() for keypair in flask.current_app.keypairs] + mock_httpx_get.return_value = httpx.Response(200, json={"keys": keys}) + + res = client.post( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps(data), + ) + assert res.status_code == 200 + + +@responses.activate +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +@patch("httpx.get") +@patch("fence.resources.google.utils._create_proxy_group") +@patch("fence.scripting.fence_create.ArboristClient") +def test_get_presigned_url_with_passport_with_incorrect_authz( + mock_arborist, + mock_google_proxy_group, + mock_httpx_get, + client, + indexd_client, + kid, + rsa_private_key, + rsa_public_key, + indexd_client_accepting_record, + mock_arborist_requests, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + indexd_record_with_non_public_authz_and_public_acl_populated = { + "did": "1", + "baseid": "", + "rev": "", + "size": 10, + "file_name": "file1", + "urls": ["s3://bucket1/key", "gs://bucket1/key"], + "hashes": {}, + "metadata": {}, + "authz": ["/orgA/programs/phs000991.c1"], + "acl": ["*"], + "form": "", + "created_date": "", + "updated_date": "", + } + indexd_client_accepting_record( + indexd_record_with_non_public_authz_and_public_acl_populated + ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": False}, 200)}}) + mock_arborist.return_value = MagicMock(ArboristClient) + mock_google_proxy_group.return_value = google_proxy_group + + # Prepare Passport/Visa + headers = {"kid": kid} + decoded_visa = { + "iss": "https://stsstg.nih.gov", + "sub": "abcde12345aspdij", + "iat": int(time.time()), + "exp": int(time.time()) + 1000, + "scope": "openid ga4gh_passport_v1 email profile", + "jti": "jtiajoidasndokmasdl", + "txn": "sapidjspa.asipidja", + "name": "", + "ga4gh_visa_v1": { + "type": "https://ras.nih.gov/visas/v1.1", + "asserted": int(time.time()), + "value": "https://stsstg.nih.gov/passport/dbgap/v1.1", + "source": "https://ncbi.nlm.nih.gov/gap", + }, + "ras_dbgap_permissions": [ + { + "consent_name": "Health/Medical/Biomedical", + "phs_id": "phs000991", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "General Research Use (IRB, PUB)", + "phs_id": "phs000961", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "Disease-Specific (Cardiovascular Disease)", + "phs_id": "phs000279", + "version": "v2", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "Health/Medical/Biomedical (IRB)", + "phs_id": "phs000286", + "version": "v6", + "participant_set": "p2", + "consent_group": "c3", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "Disease-Specific (Focused Disease Only, IRB, NPU)", + "phs_id": "phs000289", + "version": "v6", + "participant_set": "p2", + "consent_group": "c2", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + { + "consent_name": "Disease-Specific (Autism Spectrum Disorder)", + "phs_id": "phs000298", + "version": "v4", + "participant_set": "p3", + "consent_group": "c1", + "role": "designated user", + "expiration": int(time.time()) + 1001, + }, + ], + } + encoded_visa = jwt.encode( + decoded_visa, key=rsa_private_key, headers=headers, algorithm="RS256" + ).decode("utf-8") + + passport_header = { + "type": "JWT", + "alg": "RS256", + "kid": kid, + } + passport = { + "iss": "https://stsstg.nih.gov", + "sub": "abcde12345aspdij", + "iat": int(time.time()), + "scope": "openid ga4gh_passport_v1 email profile", + "exp": int(time.time()) + 1000, + "ga4gh_passport_v1": [encoded_visa], + } + encoded_passport = jwt.encode( + passport, key=rsa_private_key, headers=passport_header, algorithm="RS256" + ).decode("utf-8") + + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + + passports = [encoded_passport] + + data = {"passports": passports} + + keys = [keypair.public_key_to_jwk() for keypair in flask.current_app.keypairs] + mock_httpx_get.return_value = httpx.Response(200, json={"keys": keys}) + + res = client.post( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps(data), + ) + assert res.status_code == 401 + + +@responses.activate +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +@patch("httpx.get") +@patch("fence.resources.google.utils._create_proxy_group") +@patch("fence.scripting.fence_create.ArboristClient") +def test_get_presigned_url_for_public_data_with_no_passport( + mock_arborist, + mock_google_proxy_group, + mock_httpx_get, + client, + indexd_client, + kid, + rsa_private_key, + rsa_public_key, + indexd_client_accepting_record, + mock_arborist_requests, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + indexd_record_with_public_authz_and_public_acl_populated = { + "did": "1", + "baseid": "", + "rev": "", + "size": 10, + "file_name": "file1", + "urls": ["s3://bucket1/key", "gs://bucket1/key"], + "hashes": {}, + "metadata": {}, + "authz": ["/open"], + "acl": ["*"], + "form": "", + "created_date": "", + "updated_date": "", + } + indexd_client_accepting_record( + indexd_record_with_public_authz_and_public_acl_populated + ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) + mock_arborist.return_value = MagicMock(ArboristClient) + mock_google_proxy_group.return_value = google_proxy_group + + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + + passports = [] + + data = {"passports": passports} + + res = client.post( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps(data), + ) + assert res.status_code == 200 + + +@responses.activate +@pytest.mark.parametrize("indexd_client", ["s3", "gs"], indirect=True) +@patch("httpx.get") +@patch("fence.resources.google.utils._create_proxy_group") +@patch("fence.scripting.fence_create.ArboristClient") +def test_get_presigned_url_for_non_public_data_with_no_passport( + mock_arborist, + mock_google_proxy_group, + mock_httpx_get, + client, + indexd_client, + kid, + rsa_private_key, + rsa_public_key, + indexd_client_accepting_record, + mock_arborist_requests, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, +): + indexd_record_with_public_authz_and_non_public_acl_populated = { + "did": "1", + "baseid": "", + "rev": "", + "size": 10, + "file_name": "file1", + "urls": ["s3://bucket1/key", "gs://bucket1/key"], + "hashes": {}, + "metadata": {}, + "authz": ["/orgA/programs/phs000991.c1"], + "acl": ["*"], + "form": "", + "created_date": "", + "updated_date": "", + } + indexd_client_accepting_record( + indexd_record_with_public_authz_and_non_public_acl_populated + ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": False}, 200)}}) + mock_arborist.return_value = MagicMock(ArboristClient) + mock_google_proxy_group.return_value = google_proxy_group + + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + + passports = [] + + data = {"passports": passports} + + res = client.post( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps(data), + ) + assert res.status_code == 401 + + +@responses.activate +@patch("httpx.get") +@patch("fence.resources.google.utils._create_proxy_group") +@patch("fence.scripting.fence_create.ArboristClient") +def test_passport_cache_valid_passport( + mock_arborist, + mock_google_proxy_group, + mock_httpx_get, + client, + indexd_client, + kid, + rsa_private_key, + rsa_public_key, + indexd_client_accepting_record, + mock_arborist_requests, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, + db_session, + monkeypatch, +): + """ + Test that when a passport is provided a second time, the in-memory cache gets used + and the database cache is populated. + + NOTE: This is very similar to the test_get_presigned_url_for_non_public_data_with_passport + test with added stuff to check cache functionality + """ + # reset caches + PASSPORT_CACHE = {} + from fence.resources.ga4gh import passports as passports_module + + monkeypatch.setattr(passports_module, "PASSPORT_CACHE", PASSPORT_CACHE) + db_session.query(GA4GHPassportCache).delete() + db_session.commit() + + config["GA4GH_PASSPORTS_TO_DRS_ENABLED"] = True + indexd_record_with_non_public_authz_and_public_acl_populated = { + "did": "1", + "baseid": "", + "rev": "", + "size": 10, + "file_name": "file1", + "urls": ["s3://bucket1/key", "gs://bucket1/key"], + "hashes": {}, + "metadata": {}, + "authz": ["/orgA/programs/phs000991.c1"], + "acl": ["*"], + "form": "", + "created_date": "", + "updated_date": "", + } + indexd_client_accepting_record( + indexd_record_with_non_public_authz_and_public_acl_populated + ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) + mock_arborist.return_value = MagicMock(ArboristClient) + mock_google_proxy_group.return_value = google_proxy_group + + # Prepare Passport/Visa + current_time = int(time.time()) + headers = {"kid": kid} + decoded_visa = { + "iss": "https://stsstg.nih.gov", + "sub": TEST_RAS_SUB, + "iat": current_time, + "exp": current_time + 1000, + "scope": "openid ga4gh_passport_v1 email profile", + "jti": "jtiajoidasndokmasdl", + "txn": "sapidjspa.asipidja", + "name": "", + "ga4gh_visa_v1": { + "type": "https://ras.nih.gov/visas/v1.1", + "asserted": current_time, + "value": "https://stsstg.nih.gov/passport/dbgap/v1.1", + "source": "https://ncbi.nlm.nih.gov/gap", + }, + "ras_dbgap_permissions": [ + { + "consent_name": "Health/Medical/Biomedical", + "phs_id": "phs000991", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "General Research Use (IRB, PUB)", + "phs_id": "phs000961", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "Disease-Specific (Cardiovascular Disease)", + "phs_id": "phs000279", + "version": "v2", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "Health/Medical/Biomedical (IRB)", + "phs_id": "phs000286", + "version": "v6", + "participant_set": "p2", + "consent_group": "c3", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "Disease-Specific (Focused Disease Only, IRB, NPU)", + "phs_id": "phs000289", + "version": "v6", + "participant_set": "p2", + "consent_group": "c2", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "Disease-Specific (Autism Spectrum Disorder)", + "phs_id": "phs000298", + "version": "v4", + "participant_set": "p3", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 1000, + }, + ], + } + encoded_visa = jwt.encode( + decoded_visa, key=rsa_private_key, headers=headers, algorithm="RS256" + ).decode("utf-8") + + passport_header = { + "type": "JWT", + "alg": "RS256", + "kid": kid, + } + passport = { + "iss": "https://stsstg.nih.gov", + "sub": TEST_RAS_SUB, + "iat": current_time, + "scope": "openid ga4gh_passport_v1 email profile", + "exp": current_time + 1000, + "ga4gh_passport_v1": [encoded_visa], + } + encoded_passport = jwt.encode( + passport, key=rsa_private_key, headers=passport_header, algorithm="RS256" + ).decode("utf-8") + + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + + passports = [encoded_passport] + + data = {"passports": passports} + + keys = [keypair.public_key_to_jwk() for keypair in flask.current_app.keypairs] + mock_httpx_get.return_value = httpx.Response(200, json={"keys": keys}) + + passport_hash = hashlib.sha256(encoded_passport.encode("utf-8")).hexdigest() + + # check database cache + cached_passports = [ + item.passport_hash for item in db_session.query(GA4GHPassportCache).all() + ] + assert passport_hash not in cached_passports + + # check in-memory cache + assert not PASSPORT_CACHE.get(passport_hash) + + before_cache_start = time.time() + res = client.post( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps(data), + ) + before_cache_end = time.time() + before_cache_time = before_cache_end - before_cache_start + assert res.status_code == 200 + + # check that database cache populated + cached_passports = [ + item.passport_hash for item in db_session.query(GA4GHPassportCache).all() + ] + assert passport_hash in cached_passports + + # check that in-memory cache populated + assert PASSPORT_CACHE.get(passport_hash) + + after_cache_start = time.time() + res = client.post( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps(data), + ) + after_cache_end = time.time() + after_cache_time = after_cache_end - after_cache_start + assert res.status_code == 200 + # make sure using the cache is faster + assert after_cache_time < before_cache_time + + +@responses.activate +@patch("httpx.get") +@patch("fence.resources.google.utils._create_proxy_group") +@patch("fence.scripting.fence_create.ArboristClient") +def test_passport_cache_invalid_passport( + mock_arborist, + mock_google_proxy_group, + mock_httpx_get, + client, + indexd_client, + kid, + rsa_private_key, + rsa_public_key, + indexd_client_accepting_record, + mock_arborist_requests, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, + db_session, + monkeypatch, +): + """ + Test that when an invalid passport is provided a second time, the in-memory cache + does NOT get used and the database cache is NOT populated. + + NOTE: This is very similar to the test_get_presigned_url_for_non_public_data_with_passport + test with added stuff to check cache functionality + """ + # reset caches + PASSPORT_CACHE = {} + from fence.resources.ga4gh import passports as passports_module + + monkeypatch.setattr(passports_module, "PASSPORT_CACHE", PASSPORT_CACHE) + db_session.query(GA4GHPassportCache).delete() + db_session.commit() + + config["GA4GH_PASSPORTS_TO_DRS_ENABLED"] = True + indexd_record_with_non_public_authz_and_public_acl_populated = { + "did": "1", + "baseid": "", + "rev": "", + "size": 10, + "file_name": "file1", + "urls": ["s3://bucket1/key", "gs://bucket1/key"], + "hashes": {}, + "metadata": {}, + "authz": ["/orgA/programs/phs000991.c1"], + "acl": [""], + "form": "", + "created_date": "", + "updated_date": "", + } + indexd_client_accepting_record( + indexd_record_with_non_public_authz_and_public_acl_populated + ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": False}, 200)}}) + mock_arborist.return_value = MagicMock(ArboristClient) + mock_google_proxy_group.return_value = google_proxy_group + + # Prepare Passport/Visa + current_time = int(time.time()) + headers = {"kid": kid} + decoded_visa = { + "iss": "https://stsstg.nih.gov", + "sub": TEST_RAS_SUB, + "iat": current_time, + "exp": current_time + 1000, + "scope": "openid ga4gh_passport_v1 email profile", + "jti": "jtiajoidasndokmasdl", + "txn": "sapidjspa.asipidja", + "name": "", + "ga4gh_visa_v1": { + "type": "https://ras.nih.gov/visas/v1.1", + "asserted": current_time, + "value": "https://stsstg.nih.gov/passport/dbgap/v1.1", + "source": "https://ncbi.nlm.nih.gov/gap", + }, + "ras_dbgap_permissions": [ + { + "consent_name": "Health/Medical/Biomedical", + "phs_id": "phs000991", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "General Research Use (IRB, PUB)", + "phs_id": "phs000961", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "Disease-Specific (Cardiovascular Disease)", + "phs_id": "phs000279", + "version": "v2", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "Health/Medical/Biomedical (IRB)", + "phs_id": "phs000286", + "version": "v6", + "participant_set": "p2", + "consent_group": "c3", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "Disease-Specific (Focused Disease Only, IRB, NPU)", + "phs_id": "phs000289", + "version": "v6", + "participant_set": "p2", + "consent_group": "c2", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "Disease-Specific (Autism Spectrum Disorder)", + "phs_id": "phs000298", + "version": "v4", + "participant_set": "p3", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 1000, + }, + ], + } + encoded_visa = jwt.encode( + decoded_visa, key=rsa_private_key, headers=headers, algorithm="RS256" + ).decode("utf-8") + + passport_header = { + "type": "JWT", + "alg": "RS256", + "kid": kid, + } + passport = { + "iss": "https://stsstg.nih.gov", + "sub": TEST_RAS_SUB, + "iat": current_time, + "scope": "openid ga4gh_passport_v1 email profile", + "exp": current_time + 1000, + "ga4gh_passport_v1": [encoded_visa], + } + invalid_encoded_passport = "invalid" + jwt.encode( + passport, key=rsa_private_key, headers=passport_header, algorithm="RS256" + ).decode("utf-8") + + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + + passports = [invalid_encoded_passport] + + data = {"passports": passports} + + keys = [keypair.public_key_to_jwk() for keypair in flask.current_app.keypairs] + mock_httpx_get.return_value = httpx.Response(200, json={"keys": keys}) + + passport_hash = hashlib.sha256(invalid_encoded_passport.encode("utf-8")).hexdigest() + + # check database cache + cached_passports = [ + item.passport_hash for item in db_session.query(GA4GHPassportCache).all() + ] + assert passport_hash not in cached_passports + + # check in-memory cache + assert not PASSPORT_CACHE.get(passport_hash) + + res = client.post( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps(data), + ) + assert res.status_code != 200 + + # check that database cache NOT populated + cached_passports = [ + item.passport_hash for item in db_session.query(GA4GHPassportCache).all() + ] + assert passport_hash not in cached_passports + + # check that in-memory cache NOT populated + assert not PASSPORT_CACHE.get(passport_hash) + + res = client.post( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps(data), + ) + assert res.status_code != 200 + + +@responses.activate +@patch("httpx.get") +@patch("fence.resources.google.utils._create_proxy_group") +@patch("fence.scripting.fence_create.ArboristClient") +def test_passport_cache_expired_in_memory_valid_in_db( + mock_arborist, + mock_google_proxy_group, + mock_httpx_get, + client, + indexd_client, + kid, + rsa_private_key, + rsa_public_key, + indexd_client_accepting_record, + mock_arborist_requests, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, + db_session, + monkeypatch, +): + """ + Test that when a passport is provided a second time when the the in-memory cache + is expired but the database cache is valid, we still get a successful response. + + Check that cached database is updated and placed in in-memory cache. + + NOTE: This is very similar to the test_get_presigned_url_for_non_public_data_with_passport + test with added stuff to check cache functionality + """ + # reset cache + # PASSPORT_CACHE = {} + from fence.resources.ga4gh import passports as passports_module + + # monkeypatch.setattr(passports_module, "PASSPORT_CACHE", PASSPORT_CACHE) + db_session.query(GA4GHPassportCache).delete() + db_session.commit() + + # # add test user + # test_user = add_test_ras_user(db_session=db_session) + # test_user.username = "abcd-asdj-sajpiasj12iojd-asnoinstsstg.nih.gov" + test_username = "abcd-asdj-sajpiasj12iojd-asnoinstsstg.nih.gov" + # mocked_method = MagicMock(return_value=test_user) + # patch_method = patch( + # "fence.resources.ga4gh.passports.query_for_user", mocked_method + # ) + # patch_method.start() + + config["GA4GH_PASSPORTS_TO_DRS_ENABLED"] = True + indexd_record_with_non_public_authz_and_public_acl_populated = { + "did": "1", + "baseid": "", + "rev": "", + "size": 10, + "file_name": "file1", + "urls": ["s3://bucket1/key", "gs://bucket1/key"], + "hashes": {}, + "metadata": {}, + "authz": ["/orgA/programs/phs000991.c1"], + "acl": [""], + "form": "", + "created_date": "", + "updated_date": "", + } + indexd_client_accepting_record( + indexd_record_with_non_public_authz_and_public_acl_populated + ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) + mock_arborist.return_value = MagicMock(ArboristClient) + mock_google_proxy_group.return_value = google_proxy_group + + # Prepare Passport/Visa + current_time = int(time.time()) + headers = {"kid": kid} + decoded_visa = { + "iss": "https://stsstg.nih.gov", + "sub": TEST_RAS_SUB, + "iat": current_time, + "exp": current_time + 1000, + "scope": "openid ga4gh_passport_v1 email profile", + "jti": "jtiajoidasndokmasdl", + "txn": "sapidjspa.asipidja", + "name": "", + "ga4gh_visa_v1": { + "type": "https://ras.nih.gov/visas/v1.1", + "asserted": current_time, + "value": "https://stsstg.nih.gov/passport/dbgap/v1.1", + "source": "https://ncbi.nlm.nih.gov/gap", + }, + "ras_dbgap_permissions": [ + { + "consent_name": "Health/Medical/Biomedical", + "phs_id": "phs000991", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "General Research Use (IRB, PUB)", + "phs_id": "phs000961", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "Disease-Specific (Cardiovascular Disease)", + "phs_id": "phs000279", + "version": "v2", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "Health/Medical/Biomedical (IRB)", + "phs_id": "phs000286", + "version": "v6", + "participant_set": "p2", + "consent_group": "c3", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "Disease-Specific (Focused Disease Only, IRB, NPU)", + "phs_id": "phs000289", + "version": "v6", + "participant_set": "p2", + "consent_group": "c2", + "role": "designated user", + "expiration": current_time + 1000, + }, + { + "consent_name": "Disease-Specific (Autism Spectrum Disorder)", + "phs_id": "phs000298", + "version": "v4", + "participant_set": "p3", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 1000, + }, + ], + } + encoded_visa = jwt.encode( + decoded_visa, key=rsa_private_key, headers=headers, algorithm="RS256" + ).decode("utf-8") + + passport_header = { + "type": "JWT", + "alg": "RS256", + "kid": kid, + } + passport = { + "iss": "https://stsstg.nih.gov", + "sub": TEST_RAS_SUB, + "iat": current_time, + "scope": "openid ga4gh_passport_v1 email profile", + "exp": current_time + 1000, + "ga4gh_passport_v1": [encoded_visa], + } + encoded_passport = jwt.encode( + passport, key=rsa_private_key, headers=passport_header, algorithm="RS256" + ).decode("utf-8") + + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + + passports = [encoded_passport] + + data = {"passports": passports} + + keys = [keypair.public_key_to_jwk() for keypair in flask.current_app.keypairs] + mock_httpx_get.return_value = httpx.Response(200, json={"keys": keys}) + + passport_hash = hashlib.sha256(encoded_passport.encode("utf-8")).hexdigest() + + # simulate db cache with a valid passport by first calling the endpoint to cache + # res = client.post( + # "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + # headers={ + # "Content-Type": "application/json", + # }, + # data=json.dumps(data), + # ) + # assert res.status_code == 200 + passports_module.put_gen3_usernames_for_passport_into_cache( + encoded_passport, [test_username], current_time + 1000, db_session=db_session + ) + + # double-check database cache + cached_passport = ( + db_session.query(GA4GHPassportCache) + .filter(GA4GHPassportCache.passport_hash == passport_hash) + .first() + ) + # greater and NOT == b/c of logic to set internal expiration less than real to allow + # time for expiration job to run + assert cached_passport and cached_passport.expires_at > current_time + + # simulate in-memory cache with an expired passport by overriding the in-memory cache + from fence.resources.ga4gh import passports as passports_module + + PASSPORT_CACHE = {f"{passport_hash}": ([test_username], current_time - 1)} + assert PASSPORT_CACHE.get(passport_hash, ("", 0))[1] == current_time - 1 + monkeypatch.setattr(passports_module, "PASSPORT_CACHE", PASSPORT_CACHE) + + res = client.post( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps(data), + ) + assert res.status_code == 200 + + # check that database cache still populated + assert ( + len([item.passport_hash for item in db_session.query(GA4GHPassportCache).all()]) + == 1 + ) + cached_passport = ( + db_session.query(GA4GHPassportCache) + .filter(GA4GHPassportCache.passport_hash == passport_hash) + .first() + ) + # greater and NOT == b/c of logic to set internal expiration less than real to allow + # time for expiration job to run + assert cached_passport and cached_passport.expires_at > current_time + + # check that in-memory cache populated with db expiration + # greater and NOT == b/c of logic to set internal expiration less than real to allow + # time for expiration job to run + if PASSPORT_CACHE.get(passport_hash, ("", 0))[1] == 0: + from fence.resources.ga4gh.passports import PASSPORT_CACHE as import_cache + + assert PASSPORT_CACHE == None + assert PASSPORT_CACHE.get(passport_hash, ("", 0))[1] > current_time + + +@responses.activate +@patch("httpx.get") +@patch("fence.resources.google.utils._create_proxy_group") +@patch("fence.scripting.fence_create.ArboristClient") +def test_passport_cache_expired( + mock_arborist, + mock_google_proxy_group, + mock_httpx_get, + client, + indexd_client, + kid, + rsa_private_key, + rsa_public_key, + indexd_client_accepting_record, + mock_arborist_requests, + google_proxy_group, + primary_google_service_account, + cloud_manager, + google_signed_url, + db_session, + monkeypatch, +): + """ + Test that when a passport is expired, we don't get a successful response, even + if the passport was previously cached. + + NOTE: This is very similar to the test_get_presigned_url_for_non_public_data_with_passport + test with added stuff to check cache functionality + """ + # reset cache + PASSPORT_CACHE = {} + from fence.resources.ga4gh import passports as passports_module + + monkeypatch.setattr(passports_module, "PASSPORT_CACHE", PASSPORT_CACHE) + db_session.query(GA4GHPassportCache).delete() + db_session.commit() + + config["GA4GH_PASSPORTS_TO_DRS_ENABLED"] = True + indexd_record_with_non_public_authz_and_public_acl_populated = { + "did": "1", + "baseid": "", + "rev": "", + "size": 10, + "file_name": "file1", + "urls": ["s3://bucket1/key", "gs://bucket1/key"], + "hashes": {}, + "metadata": {}, + "authz": ["/orgA/programs/phs000991.c1"], + "acl": [""], + "form": "", + "created_date": "", + "updated_date": "", + } + indexd_client_accepting_record( + indexd_record_with_non_public_authz_and_public_acl_populated + ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) + mock_arborist.return_value = MagicMock(ArboristClient) + mock_google_proxy_group.return_value = google_proxy_group + + # Prepare Passport/Visa + current_time = int(time.time()) + headers = {"kid": kid} + decoded_visa = { + "iss": "https://stsstg.nih.gov", + "sub": TEST_RAS_SUB, + "iat": current_time, + "exp": current_time + 2, + "scope": "openid ga4gh_passport_v1 email profile", + "jti": "jtiajoidasndokmasdl", + "txn": "sapidjspa.asipidja", + "name": "", + "ga4gh_visa_v1": { + "type": "https://ras.nih.gov/visas/v1.1", + "asserted": current_time, + "value": "https://stsstg.nih.gov/passport/dbgap/v1.1", + "source": "https://ncbi.nlm.nih.gov/gap", + }, + "ras_dbgap_permissions": [ + { + "consent_name": "Health/Medical/Biomedical", + "phs_id": "phs000991", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 2, + }, + { + "consent_name": "General Research Use (IRB, PUB)", + "phs_id": "phs000961", + "version": "v1", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 2, + }, + { + "consent_name": "Disease-Specific (Cardiovascular Disease)", + "phs_id": "phs000279", + "version": "v2", + "participant_set": "p1", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 2, + }, + { + "consent_name": "Health/Medical/Biomedical (IRB)", + "phs_id": "phs000286", + "version": "v6", + "participant_set": "p2", + "consent_group": "c3", + "role": "designated user", + "expiration": current_time + 2, + }, + { + "consent_name": "Disease-Specific (Focused Disease Only, IRB, NPU)", + "phs_id": "phs000289", + "version": "v6", + "participant_set": "p2", + "consent_group": "c2", + "role": "designated user", + "expiration": current_time + 2, + }, + { + "consent_name": "Disease-Specific (Autism Spectrum Disorder)", + "phs_id": "phs000298", + "version": "v4", + "participant_set": "p3", + "consent_group": "c1", + "role": "designated user", + "expiration": current_time + 2, + }, + ], + } + encoded_visa = jwt.encode( + decoded_visa, key=rsa_private_key, headers=headers, algorithm="RS256" + ).decode("utf-8") + + passport_header = { + "type": "JWT", + "alg": "RS256", + "kid": kid, + } + passport = { + "iss": "https://stsstg.nih.gov", + "sub": TEST_RAS_SUB, + "iat": current_time, + "scope": "openid ga4gh_passport_v1 email profile", + "exp": current_time + 2, + "ga4gh_passport_v1": [encoded_visa], + } + encoded_passport = jwt.encode( + passport, key=rsa_private_key, headers=passport_header, algorithm="RS256" + ).decode("utf-8") + + access_id = indexd_client["indexed_file_location"] + test_guid = "1" + + passports = [encoded_passport] + + data = {"passports": passports} + + keys = [keypair.public_key_to_jwk() for keypair in flask.current_app.keypairs] + mock_httpx_get.return_value = httpx.Response(200, json={"keys": keys}) + + passport_hash = hashlib.sha256(encoded_passport.encode("utf-8")).hexdigest() + + # check database cache + cached_passports = [ + item.passport_hash for item in db_session.query(GA4GHPassportCache).all() + ] + assert passport_hash not in cached_passports + + # check in-memory cache + assert not PASSPORT_CACHE.get(passport_hash) + + res = client.post( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps(data), + ) + assert res.status_code == 200 + + # ensure passport is expired by sleeping + expire_time = current_time + 2 + current_time = int(time.time()) + if current_time < expire_time: + sleep_time = expire_time - current_time + time.sleep(sleep_time) + + # try again + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": False}, 200)}}) + res = client.post( + "/ga4gh/drs/v1/objects/" + test_guid + "/access/" + access_id, + headers={ + "Content-Type": "application/json", + }, + data=json.dumps(data), + ) + assert res.status_code != 200 diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py index c7193a4f3..adebec098 100644 --- a/tests/utils/__init__.py +++ b/tests/utils/__init__.py @@ -6,7 +6,7 @@ from flask import current_app from fence.config import config - +from fence.resources.ga4gh.passports import get_or_create_gen3_user_from_iss_sub from fence.models import ( User, Project, @@ -24,6 +24,23 @@ import tests import tests.utils.oauth2 +TEST_RAS_USERNAME = "admin_user" +TEST_RAS_SUB = "abcd-asdj-sajpiasj12iojd-asnoin" + + +def add_test_ras_user( + db_session, username=TEST_RAS_USERNAME, is_admin=True, subject_id=TEST_RAS_SUB +): + # pre-populate mapping table, as login would do + test_user = get_or_create_gen3_user_from_iss_sub( + issuer="https://stsstg.nih.gov", subject_id=subject_id, db_session=db_session + ) + test_user.username = username + test_user.is_admin = is_admin + db_session.add(test_user) + db_session.commit() + return test_user + def read_file(filename): """Read the contents of a file in the tests directory."""