Skip to content

Commit

Permalink
Merge pull request #774 from uc-cdis/assume-role-cache
Browse files Browse the repository at this point in the history
add assume_role cache
  • Loading branch information
MichaelLukowski committed May 26, 2021
2 parents 007a38e + 1e48af6 commit f1a571d
Show file tree
Hide file tree
Showing 8 changed files with 329 additions and 51 deletions.
3 changes: 2 additions & 1 deletion fence/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,8 @@ def app_config(

_setup_oidc_clients(app)

_check_s3_buckets(app)
with app.app_context():
_check_s3_buckets(app)


def _setup_data_endpoint_and_boto(app):
Expand Down
176 changes: 155 additions & 21 deletions fence/blueprints/data/indexd.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import time
import json
from urllib.parse import urlparse

from cached_property import cached_property
Expand Down Expand Up @@ -37,7 +38,8 @@
)
from fence.utils import get_valid_expiration_from_request
from . import multipart_upload

from ...models import AssumeRoleCacheAWS
from ...models import AssumeRoleCacheGCP

logger = get_logger(__name__)

Expand Down Expand Up @@ -595,8 +597,13 @@ def get_signed_url(
class S3IndexedFileLocation(IndexedFileLocation):
"""
An indexed file that lives in an AWS S3 bucket.
_assume_role_cache is used as an in mem cache for holding role credentials
"""

# expected structure { role_arn: (rv, expires_at) }
_assume_role_cache = {}

@classmethod
def assume_role(cls, bucket_cred, expires_in, aws_creds_config, boto=None):
"""
Expand All @@ -608,16 +615,53 @@ def assume_role(cls, bucket_cred, expires_in, aws_creds_config, boto=None):
outside of application context, to avoid errors when
using `flask.current_app`.
"""
boto = boto or flask.current_app.boto

role_arn = get_value(
bucket_cred, "role-arn", InternalError("role-arn of that bucket is missing")
)
assumed_role = boto.assume_role(role_arn, expires_in, aws_creds_config)
expiry = time.time() + expires_in

# try to retrieve from local in-memory cache
rv, expires_at = cls._assume_role_cache.get(role_arn, (None, 0))
if expires_at > expiry:
return rv

# try to retrieve from database cache
if hasattr(flask.current_app, "db"): # we don't have db in startup
with flask.current_app.db.session as session:
cache = (
session.query(AssumeRoleCacheAWS)
.filter(AssumeRoleCacheAWS.arn == role_arn)
.first()
)
if cache and cache.expires_at and cache.expires_at > expiry:
rv = dict(
aws_access_key_id=cache.aws_access_key_id,
aws_secret_access_key=cache.aws_secret_access_key,
aws_session_token=cache.aws_session_token,
)
cls._assume_role_cache[role_arn] = rv, cache.expires_at
return rv

# retrieve from AWS, with additional ASSUME_ROLE_CACHE_SECONDS buffer for cache
boto = boto or flask.current_app.boto

# checking fence config if aws session can be longer than one hour
role_cache_increase = 0
if flask.current_app.config["MAX_ROLE_SESSION_INCREASE"]:
role_cache_increase = int(
flask.current_app.config["ASSUME_ROLE_CACHE_SECONDS"]
)

assumed_role = boto.assume_role(
role_arn,
expires_in + role_cache_increase,
aws_creds_config,
)

cred = get_value(
assumed_role, "Credentials", InternalError("fail to assume role")
)
return {
rv = {
"aws_access_key_id": get_value(
cred,
"AccessKeyId",
Expand All @@ -631,9 +675,39 @@ def assume_role(cls, bucket_cred, expires_in, aws_creds_config, boto=None):
"aws_session_token": get_value(
cred,
"SessionToken",
InternalError("outdated format. Sesssion token missing"),
InternalError("outdated format. Session token missing"),
),
}
expires_at = get_value(
cred, "Expiration", InternalError("outdated format. Expiration missing")
).timestamp()

# stores back to cache
cls._assume_role_cache[role_arn] = rv, expires_at
if hasattr(flask.current_app, "db"): # we don't have db in startup
with flask.current_app.db.session as session:
session.execute(
"""\
INSERT INTO assume_role_cache (
arn,
expires_at,
aws_access_key_id,
aws_secret_access_key,
aws_session_token
) VALUES (
:arn,
:expires_at,
:aws_access_key_id,
:aws_secret_access_key,
:aws_session_token
) ON CONFLICT (arn) DO UPDATE SET
expires_at = EXCLUDED.expires_at,
aws_access_key_id = EXCLUDED.aws_access_key_id,
aws_secret_access_key = EXCLUDED.aws_secret_access_key,
aws_session_token = EXCLUDED.aws_session_token;""",
dict(arn=role_arn, expires_at=expires_at, **rv),
)
return rv

def bucket_name(self):
"""
Expand Down Expand Up @@ -862,8 +936,13 @@ def delete(self, bucket, file_id):
class GoogleStorageIndexedFileLocation(IndexedFileLocation):
"""
An indexed file that lives in a Google Storage bucket.
_assume_role_cache_gs is used for in mem caching of GCP role credentials
"""

# expected structore { proxy_group_id: (private_key, key_db_entry) }
_assume_role_cache_gs = {}

def get_resource_path(self):
return self.parsed_url.netloc.strip("/") + "/" + self.parsed_url.path.strip("/")

Expand Down Expand Up @@ -947,27 +1026,82 @@ def _generate_google_storage_signed_url(
username,
r_pays_project=None,
):

proxy_group_id = get_or_create_proxy_group_id()

private_key, key_db_entry = get_or_create_primary_service_account_key(
user_id=user_id, username=username, proxy_group_id=proxy_group_id
)
is_cached = False

if proxy_group_id in self._assume_role_cache_gs:
private_key, key_db_entry = self._assume_role_cache_gs.get(proxy_group_id)
is_cached = True
elif hasattr(flask.current_app, "db"):
with flask.current_app.db.session as session:
cache = (
session.query(AssumeRoleCacheGCP)
.filter(AssumeRoleCacheGCP.gcp_proxy_group_id == proxy_group_id)
.first()
)
if cache and cache.expires_at > expiration_time:
rv = (
json.loads(cache.gcp_private_key),
json.loads(cache.gcp_key_db_entry),
)
self._assume_role_cache_gs[proxy_group_id] = rv
private_key, key_db_entry = self._assume_role_cache_gs.get(
proxy_group_id
)
is_cached = True

# Make sure the service account key expiration is later
# than the expiration for the signed url. If it's not, we need to
# provision a new service account key.
#
# NOTE: This should occur very rarely: only when the service account key
# already exists and is very close to expiring.
#
# If our scheduled maintainence script removes the url-signing key
# before the expiration of the url then the url will NOT work
# (even though the url itself isn't expired)
if key_db_entry and key_db_entry.expires < expiration_time:
private_key = create_primary_service_account_key(
# check again to see if we cached the creds if not we need to
if is_cached == False:
private_key, key_db_entry = get_or_create_primary_service_account_key(
user_id=user_id, username=username, proxy_group_id=proxy_group_id
)

# Make sure the service account key expiration is later
# than the expiration for the signed url. If it's not, we need to
# provision a new service account key.
#
# NOTE: This should occur very rarely: only when the service account key
# already exists and is very close to expiring.
#
# If our scheduled maintainence script removes the url-signing key
# before the expiration of the url then the url will NOT work
# (even though the url itself isn't expired)
if key_db_entry and key_db_entry.expires < expiration_time:
private_key = create_primary_service_account_key(
user_id=user_id, username=username, proxy_group_id=proxy_group_id
)
self._assume_role_cache_gs[proxy_group_id] = (private_key, key_db_entry)

db_entry = {}
db_entry["gcp_proxy_group_id"] = proxy_group_id
db_entry["gcp_private_key"] = str(private_key)
db_entry["gcp_key_db_entry"] = str(key_db_entry)
db_entry["expires_at"] = expiration_time

if hasattr(flask.current_app, "db"): # we don't have db in startup
with flask.current_app.db.session as session:
session.execute(
"""\
INSERT INTO gcp_assume_role_cache (
expires_at,
gcp_proxy_group_id,
gcp_private_key,
gcp_key_db_entry
) VALUES (
:expires_at,
:gcp_proxy_group_id,
:gcp_private_key,
:gcp_key_db_entry
) ON CONFLICT (gcp_proxy_group_id) DO UPDATE SET
expires_at = EXCLUDED.expires_at,
gcp_proxy_group_id = EXCLUDED.gcp_proxy_group_id,
gcp_private_key = EXCLUDED.gcp_private_key,
gcp_key_db_entry = EXCLUDED.gcp_key_db_entry;""",
db_entry,
)

if config["ENABLE_AUTOMATIC_BILLING_PERMISSION_SIGNED_URLS"]:
give_service_account_billing_access_if_necessary(
private_key,
Expand Down
6 changes: 6 additions & 0 deletions fence/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,12 @@ SYNAPSE_URI: 'https://repo-prod.prod.sagebase.org/auth/v1'
SYNAPSE_JWKS_URI:
SYNAPSE_DISCOVERY_URL:
SYNAPSE_AUTHZ_TTL: 86400

# Role caching for generating presigned urls if max role session increase is true
# then we can increase the amount of time that a session is valid for
MAX_ROLE_SESSION_INCREASE: false
ASSUME_ROLE_CACHE_SECONDS: 1800

# RAS refresh_tokens expire in 15 days
RAS_REFRESH_EXPIRATION: 1296000
# Number of projects that can be registered to a Google Service Accont
Expand Down
19 changes: 19 additions & 0 deletions fence/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,25 @@ class ServiceAccountToGoogleBucketAccessGroup(Base):
)


class AssumeRoleCacheAWS(Base):
__tablename__ = "assume_role_cache"

arn = Column(String(), primary_key=True)
expires_at = Column(Integer())
aws_access_key_id = Column(String())
aws_secret_access_key = Column(String())
aws_session_token = Column(String())


class AssumeRoleCacheGCP(Base):
__tablename__ = "gcp_assume_role_cache"

gcp_proxy_group_id = Column(String(), primary_key=True)
expires_at = Column(Integer())
gcp_private_key = Column(String())
gcp_key_db_entry = Column(String())


class GA4GHVisaV1(Base):

__tablename__ = "ga4gh_visa_v1"
Expand Down

0 comments on commit f1a571d

Please sign in to comment.