Skip to content

Commit

Permalink
Merge branch 'master' into fix/gs-cache
Browse files Browse the repository at this point in the history
  • Loading branch information
BinamB committed Jun 14, 2022
2 parents f3ac91e + c523375 commit 954fc89
Show file tree
Hide file tree
Showing 51 changed files with 5,070 additions and 1,380 deletions.
92 changes: 45 additions & 47 deletions .secrets.baseline
Expand Up @@ -22,7 +22,7 @@
},
{
"name": "HexHighEntropyString",
"limit": 3.0
"limit": 3
},
{
"name": "IbmCloudIamDetector"
Expand Down Expand Up @@ -100,16 +100,33 @@
},
{
"path": "detect_secrets.filters.heuristic.is_templated_secret"
},
{
"path": "detect_secrets.filters.regex.should_exclude_file",
"pattern": [
"poetry.lock"
]
}
],
"results": {
"deployment/scripts/postgresql/postgresql_init.sql": [
{
"type": "Secret Keyword",
"filename": "deployment/scripts/postgresql/postgresql_init.sql",
"hashed_secret": "afc848c316af1a89d49826c5ae9d00ed769415f3",
"is_verified": false,
"line_number": 7,
"is_secret": false
}
],
"fence/blueprints/storage_creds/google.py": [
{
"type": "Private Key",
"filename": "fence/blueprints/storage_creds/google.py",
"hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9",
"is_verified": false,
"line_number": 139
"line_number": 139,
"is_secret": false
}
],
"fence/blueprints/storage_creds/other.py": [
Expand All @@ -118,14 +135,16 @@
"filename": "fence/blueprints/storage_creds/other.py",
"hashed_secret": "98c144f5ecbb4dbe575147a39698b6be1a5649dd",
"is_verified": false,
"line_number": 66
"line_number": 66,
"is_secret": false
},
{
"type": "Secret Keyword",
"filename": "fence/blueprints/storage_creds/other.py",
"hashed_secret": "98c144f5ecbb4dbe575147a39698b6be1a5649dd",
"is_verified": false,
"line_number": 66
"line_number": 66,
"is_secret": false
}
],
"fence/config-default.yaml": [
Expand All @@ -134,7 +153,8 @@
"filename": "fence/config-default.yaml",
"hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
"is_verified": false,
"line_number": 31
"line_number": 31,
"is_secret": false
}
],
"fence/local_settings.example.py": [
Expand All @@ -143,14 +163,16 @@
"filename": "fence/local_settings.example.py",
"hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
"is_verified": false,
"line_number": 6
"line_number": 6,
"is_secret": false
},
{
"type": "Secret Keyword",
"filename": "fence/local_settings.example.py",
"hashed_secret": "5d07e1b80e448a213b392049888111e1779a52db",
"is_verified": false,
"line_number": 63
"line_number": 63,
"is_secret": false
}
],
"fence/resources/google/utils.py": [
Expand All @@ -159,7 +181,7 @@
"filename": "fence/resources/google/utils.py",
"hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9",
"is_verified": false,
"line_number": 125
"line_number": 129
}
],
"fence/utils.py": [
Expand All @@ -168,44 +190,24 @@
"filename": "fence/utils.py",
"hashed_secret": "8318df9ecda039deac9868adf1944a29a95c7114",
"is_verified": false,
"line_number": 105
},
{
"type": "Secret Keyword",
"filename": "fence/utils.py",
"hashed_secret": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8",
"is_verified": false,
"line_number": 249
},
{
"type": "Secret Keyword",
"filename": "fence/utils.py",
"hashed_secret": "8954f53c9dc3f57137230a016d65bfaee24f8bc5",
"is_verified": false,
"line_number": 250
"line_number": 105,
"is_secret": false
}
],
"tests/conftest.py": [
{
"type": "Secret Keyword",
"filename": "tests/conftest.py",
"hashed_secret": "9801ff058ba790388c9efc095cb3e89a819d5ed6",
"is_verified": false,
"line_number": 160
},
{
"type": "Private Key",
"filename": "tests/conftest.py",
"hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9",
"is_verified": false,
"line_number": 1358
"line_number": 1482
},
{
"type": "Base64 High Entropy String",
"filename": "tests/conftest.py",
"hashed_secret": "227dea087477346785aefd575f91dd13ab86c108",
"is_verified": false,
"line_number": 1381
"line_number": 1505
}
],
"tests/credentials/google/test_credentials.py": [
Expand All @@ -214,21 +216,24 @@
"filename": "tests/credentials/google/test_credentials.py",
"hashed_secret": "a06bdb09c0106ab559bd6acab2f1935e19f7e939",
"is_verified": false,
"line_number": 381
"line_number": 381,
"is_secret": false
},
{
"type": "Secret Keyword",
"filename": "tests/credentials/google/test_credentials.py",
"hashed_secret": "93aa43c580f5347782e17fba5091f944767b15f0",
"is_verified": false,
"line_number": 474
"line_number": 474,
"is_secret": false
},
{
"type": "Secret Keyword",
"filename": "tests/credentials/google/test_credentials.py",
"hashed_secret": "768b7fe00de4fd233c0c72375d12f87ce9670144",
"is_verified": false,
"line_number": 476
"line_number": 476,
"is_secret": false
}
],
"tests/keys/2018-05-01T21:29:02Z/jwt_private_key.pem": [
Expand All @@ -237,7 +242,8 @@
"filename": "tests/keys/2018-05-01T21:29:02Z/jwt_private_key.pem",
"hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9",
"is_verified": false,
"line_number": 1
"line_number": 1,
"is_secret": false
}
],
"tests/login/test_fence_login.py": [
Expand All @@ -246,7 +252,8 @@
"filename": "tests/login/test_fence_login.py",
"hashed_secret": "d300421e208bfd0d432294de15169fd9b8975def",
"is_verified": false,
"line_number": 48
"line_number": 48,
"is_secret": false
}
],
"tests/ras/test_ras.py": [
Expand All @@ -255,16 +262,7 @@
"filename": "tests/ras/test_ras.py",
"hashed_secret": "d9db6fe5c14dc55edd34115cdf3958845ac30882",
"is_verified": false,
"line_number": 95
}
],
"tests/scripting/test_fence-create.py": [
{
"type": "Secret Keyword",
"filename": "tests/scripting/test_fence-create.py",
"hashed_secret": "e5e9fa1ba31ecd1ae84f75caaa474f3a663f05f4",
"is_verified": false,
"line_number": 1122
"line_number": 120
}
],
"tests/test-fence-config.yaml": [
Expand Down
17 changes: 10 additions & 7 deletions bin/fence_create.py
Expand Up @@ -17,6 +17,8 @@
create_sample_data,
delete_client_action,
delete_users,
delete_expired_google_access,
cleanup_expired_ga4gh_information,
google_init,
list_client_action,
link_external_bucket,
Expand All @@ -33,7 +35,7 @@
force_update_google_link,
migrate_database,
google_list_authz_groups,
update_user_visas,
access_token_polling_job,
)
from fence.settings import CONFIG_SEARCH_FOLDERS

Expand Down Expand Up @@ -147,6 +149,8 @@ def parse_arguments():

subparsers.add_parser("expired-service-account-delete")
subparsers.add_parser("bucket-access-group-verify")
subparsers.add_parser("delete-expired-google-access")
subparsers.add_parser("cleanup-expired-ga4gh-information")

hmac_create = subparsers.add_parser("hmac-create")
hmac_create.add_argument("yaml-input")
Expand Down Expand Up @@ -405,9 +409,6 @@ def main():
STORAGE_CREDENTIALS = os.environ.get("STORAGE_CREDENTIALS") or config.get(
"STORAGE_CREDENTIALS"
)
usersync = config.get("USERSYNC", {})
sync_from_visas = usersync.get("sync_from_visas", False)
fallback_to_dbgap_sftp = usersync.get("fallback_to_dbgap_sftp", False)

arborist = None
if args.arborist:
Expand Down Expand Up @@ -459,6 +460,10 @@ def main():
delete_expired_service_accounts(DB)
elif args.action == "bucket-access-group-verify":
verify_bucket_access_group(DB)
elif args.action == "delete-expired-google-access":
delete_expired_google_access(DB)
elif args.action == "cleanup-expired-ga4gh-information":
cleanup_expired_ga4gh_information(DB)
elif args.action == "sync":
sync_users(
dbGaP,
Expand All @@ -470,8 +475,6 @@ def main():
sync_from_local_yaml_file=args.yaml,
folder=args.folder,
arborist=arborist,
sync_from_visas=sync_from_visas,
fallback_to_dbgap_sftp=fallback_to_dbgap_sftp,
)
elif args.action == "dbgap-download-access-files":
download_dbgap_files(
Expand Down Expand Up @@ -572,7 +575,7 @@ def main():
elif args.action == "migrate":
migrate_database(DB)
elif args.action == "update-visas":
update_user_visas(
access_token_polling_job(
DB,
chunk_size=args.chunk_size,
concurrency=args.concurrency,
Expand Down
73 changes: 73 additions & 0 deletions docs/ga4gh_passports.md
@@ -0,0 +1,73 @@
# Passport Support in Gen3 Framework Services (G3FS)

G3FS will support a data access flow accepting Global Alliance for Genomics and Health (GA4GH) Passport(s) as means of authentication and authorization to access file objects.

For National Institutes of Health (NIH) data, we will no longer rely on dbGaP User Access Telemetry files from the hourly usersync for authorization, but instead on NIH's Researcher Auth Service (RAS) Passports.

The adoption of GA4GH specifications across NIH-funded Platforms is a strategic initiative that is pushed for on numerous fronts.

> Our overall goal is interoperability through accepted standards (like GA4GH).
As we are a GA4GH Driver Project, throughout the process of implementing passport support, we have identified numerous gaps and concerns with GA4GH’s specifications along the way. We are at a point now where most have been either addressed, waived, risks accepted, or solutions punted to a future version. There are ongoing discussions about modifications for the future.

Please refer to official documentation about RAS Milestones for all historic and official decisions and designs related to RAS. This document will serve as an **unofficial technical overview** to maintainers of Gen3 and **may not be updated as regularly or represented as clearly as other public facing documents**.

## Passport and Visa JSON Web Token (JWT) Handling

Overview of the standards-based verification and validation flow for JWTs.

References:

* [GA4GH AAI](https://github.com/ga4gh/data-security/blob/master/AAI/AAIConnectProfile.md)
* [GA4GH Passport](https://github.com/ga4gh-duri/ga4gh-duri.github.io/blob/master/researcher_ids/ga4gh_passport_v1.md)
* [OpenID Connect Core](https://openid.net/specs/openid-connect-core-1_0.html)
* [Internet Engineering Task Force (IETF) RCF: JSON Web Token (JWT)](https://datatracker.ietf.org/doc/html/rfc7519)

This shows external DRS Client(s) communicating with Gen3 Framework Services (as a GA4GH DRS Server) and how G3FS interacts with Passport Brokers to validate and verify JWTs.

![Passport and Visa JWT Handling](images/ga4gh/passport_jwt_handling.png)

## G3FS: Configurable Roles for Data Access

Gen3 Framework Services are capable of acting in many different roles. As data repositories (or DRS Servers in GA4GH terminology), as authorization decision makers (GA4GH Claims Clearinghouses), and/or as token issuers (GA4GH Passport Brokers). G3FS is also capable of being a client to other Passport Brokers. G3FS must be a client to an upstream Identity Provider (IdP) as it does not ever store user passwords but relies on authentication from another trusted source.

In order to describe the role of the passport in these various configurations, the following diagrams may help.

![Gen3 as DRS Server](images/ga4gh/gen3_as_drs.png)

![Gen3 as Client](images/ga4gh/gen3_as_client.png)

![Gen3 as Both](images/ga4gh/gen3_as_client_and_drs_server.png)

## Performance Improvements

In some respect, the support for passports required an auth re-architecture to:

1. accept third-party generated token(s) to be a source of truth for authentication and authorization
2. parse that authorization information at the time of data access request (rather than synced before)

Passports can be provided to our data access APIs before we've ever seen that user, whereas previously we used to bulk sync all authorization **before** data access (behind the scenes as a cronjob). Becuase of this new, dynamic authorization decision making upon data requests, we knew that we'd need to take extra steps to ensure non-degraded performance.

We added a number of things to mitigate the performance impact on researchers' workflows. Most notably, we introduced a cache for valid passports such that when we recieve thousands of requests to access data and the _exact same_ passport is sent thousands of times over a few minutes, we are able to validate and parse it once and rely on that for subsequent requests. The cache only lives as long as policy and standards allow (which is usually less than an hour).

To illustrate the need for such a cache, see the images below for before and after.

![Before Caching](images/ga4gh/caching_before.png)

![After Caching](images/ga4gh/caching_after.png)

## User Identities

Different GA4GH Visas may refer to the same subject differently. In order to maintain the known mappings between different representations of the same identity, we are creating an Issuer+Subject to User mapping table. The primary key on this table is the combination of the `iss` and `sub` from JWTs.

![User Identities](images/ga4gh/users.png)

## Backend Updates and Expiration

In order to ensure the removal of access at the right time, the cronjobs we have are updated based on the figure and notes below. We are requiring movement away from the deprecated, legacy, limited Fence authorization support in favor of the new policy engine (which allows expiration of policies out of the box).

There is an argument here for event-based architecture, but Gen3 does not currently support such an architecture. We are instead extending the support of our cronjobs to ensure expirations occur at the right time.

![Cronjobs and Expirations](images/ga4gh/expiration.png)

> _All diagrams are originally from an **internal** CTDS Document. The link to that document is [here](https://lucid.app/lucidchart/5c52b868-5cd2-4c6e-b53b-de2981f7da98/edit?invitationId=inv_9a757cb1-fc81-4189-934d-98c3db06d2fc) for internal people who need to edit the above diagrams._
Binary file added docs/images/ga4gh/caching_after.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/ga4gh/caching_before.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/ga4gh/expiration.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/ga4gh/gen3_as_client.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/ga4gh/gen3_as_drs.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/ga4gh/passport_jwt_handling.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/ga4gh/passport_to_drs_flow.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/ga4gh/users.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 954fc89

Please sign in to comment.