diff --git a/.secrets.baseline b/.secrets.baseline index 696fada09b..b8219ad1aa 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,20 +3,17 @@ "files": "poetry.lock", "lines": null }, - "generated_at": "2021-05-25T21:22:19Z", + "generated_at": "2021-06-01T15:16:48Z", "plugins_used": [ - { - "name": "ArtifactoryDetector" - }, { "name": "AWSKeyDetector" }, { - "name": "AzureStorageKeyDetector" + "name": "ArtifactoryDetector" }, { - "name": "Base64HighEntropyString", - "limit": 4.5 + "base64_limit": 4.5, + "name": "Base64HighEntropyString" }, { "name": "BasicAuthDetector" @@ -25,8 +22,8 @@ "name": "CloudantDetector" }, { - "name": "HexHighEntropyString", - "limit": 3.0 + "hex_limit": 3, + "name": "HexHighEntropyString" }, { "name": "IbmCloudIamDetector" @@ -38,15 +35,12 @@ "name": "JwtTokenDetector" }, { - "name": "KeywordDetector", - "keyword_exclude": "" + "keyword_exclude": null, + "name": "KeywordDetector" }, { "name": "MailchimpDetector" }, - { - "name": "NpmDetector" - }, { "name": "PrivateKeyDetector" }, @@ -56,9 +50,6 @@ { "name": "SoftlayerDetector" }, - { - "name": "SquareOAuthDetector" - }, { "name": "StripeDetector" }, @@ -66,253 +57,157 @@ "name": "TwilioKeyDetector" } ], - "filters_used": [ - { - "path": "detect_secrets.filters.allowlist.is_line_allowlisted" - }, - { - "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", - "min_level": 2 - }, - { - "path": "detect_secrets.filters.heuristic.is_indirect_reference" - }, - { - "path": "detect_secrets.filters.heuristic.is_likely_id_string" - }, - { - "path": "detect_secrets.filters.heuristic.is_potential_uuid" - }, - { - "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" - }, - { - "path": "detect_secrets.filters.heuristic.is_sequential_string" - }, - { - "path": "detect_secrets.filters.heuristic.is_templated_secret" - } - ], "results": { "fence/blueprints/storage_creds/google.py": [ { - "type": "Private Key", - "filename": "fence/blueprints/storage_creds/google.py", "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", "is_verified": false, - "line_number": 139 + "line_number": 139, + "type": "Private Key" } ], "fence/blueprints/storage_creds/other.py": [ { - "type": "Base64 High Entropy String", - "filename": "fence/blueprints/storage_creds/other.py", "hashed_secret": "98c144f5ecbb4dbe575147a39698b6be1a5649dd", "is_verified": false, - "line_number": 66 + "line_number": 66, + "type": "Base64 High Entropy String" } ], "fence/config-default.yaml": [ { - "type": "Basic Auth Credentials", - "filename": "fence/config-default.yaml", "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", "is_verified": false, - "line_number": 31 + "line_number": 31, + "type": "Basic Auth Credentials" }, { - "type": "Secret Keyword", - "filename": "fence/config-default.yaml", - "hashed_secret": "dd29ecf524b030a65261e3059c48ab9e1ecb2585", + "hashed_secret": "5d07e1b80e448a213b392049888111e1779a52db", "is_verified": false, - "line_number": 101 + "line_number": 554, + "type": "Secret Keyword" } ], "fence/local_settings.example.py": [ { - "type": "Basic Auth Credentials", - "filename": "fence/local_settings.example.py", "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", "is_verified": false, - "line_number": 6 + "line_number": 6, + "type": "Basic Auth Credentials" }, { - "type": "Secret Keyword", - "filename": "fence/local_settings.example.py", "hashed_secret": "5d07e1b80e448a213b392049888111e1779a52db", "is_verified": false, - "line_number": 63 + "line_number": 63, + "type": "Secret Keyword" } ], "fence/resources/google/utils.py": [ { - "type": "Private Key", - "filename": "fence/resources/google/utils.py", "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", "is_verified": false, - "line_number": 125 + "line_number": 277, + "type": "Private Key" } ], "fence/utils.py": [ { - "type": "Secret Keyword", - "filename": "fence/utils.py", "hashed_secret": "8318df9ecda039deac9868adf1944a29a95c7114", "is_verified": false, - "line_number": 104 - }, - { - "type": "Secret Keyword", - "filename": "fence/utils.py", - "hashed_secret": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8", - "is_verified": false, - "line_number": 248 + "line_number": 105, + "type": "Secret Keyword" } ], "openapis/swagger.yaml": [ { - "type": "Private Key", - "filename": "openapis/swagger.yaml", "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", "is_verified": false, - "line_number": 1927 + "line_number": 1927, + "type": "Private Key" }, { - "type": "Secret Keyword", - "filename": "openapis/swagger.yaml", - "hashed_secret": "8cb81a55dff48721f04fe341f33ee5b623dd9144", + "hashed_secret": "bb8e48bd1e73662027a0f0b876b695d4c18f5ed4", "is_verified": false, - "line_number": 1927 + "line_number": 1927, + "type": "Secret Keyword" }, { - "type": "Secret Keyword", - "filename": "openapis/swagger.yaml", - "hashed_secret": "41c39979fd01095376b3e9456f1058c33483dbbe", + "hashed_secret": "7861ab65194de92776ab9cd06d4d7e7e1ec2c36d", "is_verified": false, - "line_number": 1994 + "line_number": 2007, + "type": "Secret Keyword" }, { - "type": "JSON Web Token", - "filename": "openapis/swagger.yaml", "hashed_secret": "d6b66ddd9ea7dbe760114bfe9a97352a5e139134", "is_verified": false, - "line_number": 1994 + "line_number": 2029, + "type": "JSON Web Token" }, { - "type": "Base64 High Entropy String", - "filename": "openapis/swagger.yaml", "hashed_secret": "98c144f5ecbb4dbe575147a39698b6be1a5649dd", "is_verified": false, - "line_number": 2041 - }, - { - "type": "Base64 High Entropy String", - "filename": "openapis/swagger.yaml", - "hashed_secret": "2f58edc671a89190115ecebddf4c70bdd87e3267", - "is_verified": false, - "line_number": 2084 + "line_number": 2041, + "type": "Base64 High Entropy String" } ], - "poetry.lock": [ + "tests/conftest.py": [ { - "type": "Hex High Entropy String", - "filename": "poetry.lock", - "hashed_secret": "640e60795f08744221f6816fe9dc949c58465256", + "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", "is_verified": false, "line_number": 1177, "type": "Private Key" }, { - "type": "Hex High Entropy String", - "filename": "poetry.lock", - "hashed_secret": "6642e431aaa417100a91214385af6657acb3fab7", - "is_verified": false, - "line_number": 1368 - }, - { - "type": "Hex High Entropy String", - "filename": "poetry.lock", - "hashed_secret": "205b95ce89ff252c6045d78ca9d007e73b45dc00", + "hashed_secret": "227dea087477346785aefd575f91dd13ab86c108", "is_verified": false, "line_number": 1200, "type": "Base64 High Entropy String" } ], - "tests/conftest.py": [ - { - "type": "Private Key", - "filename": "tests/conftest.py", - "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", - "is_verified": false, - "line_number": 1151 - }, + "tests/credentials/google/test_credentials.py": [ { - "type": "Base64 High Entropy String", - "filename": "tests/conftest.py", - "hashed_secret": "227dea087477346785aefd575f91dd13ab86c108", + "hashed_secret": "22afbfecd4124e2eb0e2a79fafdf62b207a8f8c7", "is_verified": false, - "line_number": 1174 + "line_number": 579, + "type": "Secret Keyword" } ], "tests/keys/2018-05-01T21:29:02Z/jwt_private_key.pem": [ { - "type": "Private Key", - "filename": "tests/keys/2018-05-01T21:29:02Z/jwt_private_key.pem", "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", "is_verified": false, - "line_number": 1 + "line_number": 1, + "type": "Private Key" } ], "tests/login/test_fence_login.py": [ { - "type": "Secret Keyword", - "filename": "tests/login/test_fence_login.py", "hashed_secret": "d300421e208bfd0d432294de15169fd9b8975def", "is_verified": false, - "line_number": 41 + "line_number": 41, + "type": "Secret Keyword" } ], "tests/ras/test_ras.py": [ { - "type": "Hex High Entropy String", - "filename": "tests/ras/test_ras.py", "hashed_secret": "d9db6fe5c14dc55edd34115cdf3958845ac30882", "is_verified": false, - "line_number": 90 - } - ], - "tests/scripting/test_fence-create.py": [ - { - "type": "Secret Keyword", - "filename": "tests/scripting/test_fence-create.py", - "hashed_secret": "e5e9fa1ba31ecd1ae84f75caaa474f3a663f05f4", - "is_verified": false, - "line_number": 1117 + "line_number": 327, + "type": "Hex High Entropy String" } ], "tests/test-fence-config.yaml": [ { - "type": "Basic Auth Credentials", - "filename": "tests/test-fence-config.yaml", "hashed_secret": "afc848c316af1a89d49826c5ae9d00ed769415f3", "is_verified": false, - "line_number": 31 - }, - { - "type": "Secret Keyword", - "filename": "tests/test-fence-config.yaml", - "hashed_secret": "dd29ecf524b030a65261e3059c48ab9e1ecb2585", - "is_verified": false, - "line_number": 85 - }, - { - "type": "Secret Keyword", - "filename": "tests/test-fence-config.yaml", - "hashed_secret": "1627df13b5cd8b3521d02bd8eb2ca31334b3aef2", - "is_verified": false, - "line_number": 472 + "line_number": 31, + "type": "Basic Auth Credentials" } ] }, - "generated_at": "2021-05-26T14:24:12Z" + "version": "0.13.1", + "word_list": { + "file": null, + "hash": null + } } diff --git a/fence/__init__.py b/fence/__init__.py index 78bc298adc..22e6e6a13d 100644 --- a/fence/__init__.py +++ b/fence/__init__.py @@ -1,16 +1,17 @@ from collections import OrderedDict -import os -import tempfile - -from authutils.oauth2.client import OAuthClient import flask from flask_cors import CORS from flask_sqlalchemy_session import flask_scoped_session, current_session +import os +import tempfile from urllib.parse import urljoin + +from authutils.oauth2.client import OAuthClient +from cdislogging import get_logger +from gen3authz.client.arborist.client import ArboristClient from userdatamodel.driver import SQLAlchemyDriver from werkzeug.middleware.dispatcher import DispatcherMiddleware - from fence.auth import logout, build_redirect_url from fence.blueprints.data.indexd import S3IndexedFileLocation from fence.blueprints.login.utils import allowed_login_redirects, domain @@ -19,7 +20,7 @@ from fence.models import migrate from fence.oidc.client import query_client from fence.oidc.server import server -from fence.resources.audit_service_client import AuditServiceClient +from fence.resources.audit.client import AuditServiceClient from fence.resources.aws.boto_manager import BotoManager from fence.resources.openid.cilogon_oauth2 import CilogonOauth2Client as CilogonClient from fence.resources.openid.cognito_oauth2 import CognitoOauth2Client as CognitoClient @@ -49,12 +50,6 @@ import fence.blueprints.google import fence.blueprints.privacy -from cdislogging import get_logger - -from cdispyutils.config import get_value - -from gen3authz.client.arborist.client import ArboristClient - # for some reason the temp dir does not get created properly if we move # this statement to `_setup_prometheus()` diff --git a/fence/blueprints/data/blueprint.py b/fence/blueprints/data/blueprint.py index 9114a0441f..f983c68456 100644 --- a/fence/blueprints/data/blueprint.py +++ b/fence/blueprints/data/blueprint.py @@ -3,14 +3,15 @@ from cdislogging import get_logger from fence.auth import login_required, require_auth_header, current_token, get_jwt +from fence.authz.auth import check_arborist_auth from fence.blueprints.data.indexd import ( BlankIndex, IndexedFile, get_signed_url_for_file, ) from fence.errors import Forbidden, InternalError, UserError, Forbidden +from fence.resources.audit.utils import enable_audit_logging from fence.utils import get_valid_expiration -from fence.authz.auth import check_arborist_auth logger = get_logger(__name__) @@ -292,6 +293,7 @@ def upload_file(file_id): @blueprint.route("/download/", methods=["GET"]) +@enable_audit_logging def download_file(file_id): """ Get a presigned url to download a file given by file_id. diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index 08cbad4730..9779b42387 100644 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -65,6 +65,14 @@ def get_signed_url_for_file(action, file_id, file_name=None): if no_force_sign_param and no_force_sign_param.lower() == "true": force_signed_url = False + # add the user details to `flask.g.audit_data` first, so they are + # included in the audit log if `IndexedFile(file_id)` raises a 404 + user_info = _get_user_info(sub_type=int) + flask.g.audit_data = { + "username": user_info["username"], + "sub": user_info["user_id"], + } + indexed_file = IndexedFile(file_id) default_expires_in = config.get("MAX_PRESIGNED_URL_TTL", 3600) expires_in = get_valid_expiration_from_request( @@ -72,6 +80,8 @@ def get_signed_url_for_file(action, file_id, file_name=None): default=default_expires_in, ) + prepare_presigned_url_audit_log(requested_protocol, indexed_file) + signed_url = indexed_file.get_signed_url( requested_protocol, action, @@ -86,30 +96,21 @@ def get_signed_url_for_file(action, file_id, file_name=None): if counter: counter.labels(requested_protocol).inc() - if action == "download": # for now only record download requests - create_presigned_url_audit_log( - protocol=requested_protocol, indexed_file=indexed_file, action=action - ) - return {"url": signed_url} -def create_presigned_url_audit_log(indexed_file, action, protocol): - user_info = _get_user_info(sub_to_string=False) +def prepare_presigned_url_audit_log(protocol, indexed_file): + """ + Store in `flask.g.audit_data` the data needed to record an audit log. + """ resource_paths = indexed_file.index_document.get("authz", []) if not resource_paths: # fall back on ACL resource_paths = indexed_file.index_document.get("acl", []) if not protocol and indexed_file.indexed_file_locations: protocol = indexed_file.indexed_file_locations[0].protocol - flask.current_app.audit_service_client.create_presigned_url_log( - username=user_info["username"], - sub=user_info["user_id"], - guid=indexed_file.file_id, - resource_paths=resource_paths, - action=action, - protocol=protocol, - ) + flask.g.audit_data["resource_paths"] = resource_paths + flask.g.audit_data["protocol"] = protocol class BlankIndex(object): @@ -1142,21 +1143,22 @@ def delete(self, bucket, file_id): return ("Failed to delete data file.", status_code) -def _get_user_info(sub_to_string=True): +def _get_user_info(sub_type=str): """ Attempt to parse the request for token to authenticate the user. fallback to populated information about an anonymous user. + By default, cast `sub` to str. Use `sub_type` to override this behavior. """ try: set_current_token(validate_request(aud={"user"})) user_id = current_token["sub"] - if sub_to_string: - user_id = str(user_id) + if sub_type: + user_id = sub_type(user_id) username = current_token["context"]["user"]["name"] except JWTError: # this is fine b/c it might be public data, sign with anonymous username/id user_id = None - if sub_to_string: + if sub_type == str: user_id = ANONYMOUS_USER_ID username = ANONYMOUS_USERNAME diff --git a/fence/blueprints/login/__init__.py b/fence/blueprints/login/__init__.py index 8b5f56baa0..a8f61a3473 100644 --- a/fence/blueprints/login/__init__.py +++ b/fence/blueprints/login/__init__.py @@ -23,6 +23,7 @@ from fence.blueprints.login.ras import RASLogin, RASCallback from fence.blueprints.login.synapse import SynapseLogin, SynapseCallback from fence.errors import InternalError +from fence.resources.audit.utils import enable_audit_logging from fence.restful import RestfulApi from fence.config import config @@ -56,7 +57,7 @@ def make_login_blueprint(app): """ blueprint = flask.Blueprint("login", __name__) - blueprint_api = RestfulApi(blueprint) + blueprint_api = RestfulApi(blueprint, decorators=[enable_audit_logging]) @blueprint.route("", methods=["GET"]) def default_login(): @@ -70,7 +71,7 @@ def default_login(): # fall back on ENABLED_IDENTITY_PROVIDERS.default default_idp = config["ENABLED_IDENTITY_PROVIDERS"]["default"] else: - logger.warn("DEFAULT_LOGIN_IDP not configured") + logger.warning("DEFAULT_LOGIN_IDP not configured") default_idp = None # other login options @@ -89,7 +90,7 @@ def default_login(): for idp, details in enabled_providers.items() ] else: - logger.warn("LOGIN_OPTIONS not configured or empty") + logger.warning("LOGIN_OPTIONS not configured or empty") login_options = [] def absolute_login_url(provider_id, fence_idp=None, shib_idp=None): @@ -325,7 +326,7 @@ def get_all_shib_idps(): all_shib_idps = [] for shib_idp in res.json(): if "entityID" not in shib_idp: - logger.warn( + logger.warning( f"get_all_shib_idps(): 'entityID' field not in IDP data: {shib_idp}. Skipping this IDP." ) continue @@ -333,7 +334,7 @@ def get_all_shib_idps(): if len(shib_idp.get("DisplayNames", [])) > 0: name = get_shib_idp_en_name(shib_idp["DisplayNames"]) else: - logger.warn( + logger.warning( f"get_all_shib_idps(): 'DisplayNames' field not in IDP data: {shib_idp}. Using IDP ID '{idp}' as IDP name." ) name = idp diff --git a/fence/blueprints/login/base.py b/fence/blueprints/login/base.py index 215164b759..436b771639 100644 --- a/fence/blueprints/login/base.py +++ b/fence/blueprints/login/base.py @@ -49,7 +49,7 @@ def get(self): config.get("DEV_LOGIN_COOKIE_NAME"), mock_default_user ) resp = _login(username, self.idp_name) - create_login_log(self.idp_name) + prepare_login_log(self.idp_name) return resp return flask.redirect(self.client.get_auth_url()) @@ -104,18 +104,18 @@ def get(self): raise UserError(result) def post_login(self, user=None, token_result=None): - create_login_log(self.idp_name) - - -def create_login_log(idp_name): - flask.current_app.audit_service_client.create_login_log( - username=flask.g.user.username, - sub=flask.g.user.id, - idp=idp_name, - fence_idp=flask.session.get("fence_idp"), - shib_idp=flask.session.get("shib_idp"), - client_id=flask.session.get("client_id"), - ) + prepare_login_log(self.idp_name) + + +def prepare_login_log(idp_name): + flask.g.audit_data = { + "username": flask.g.user.username, + "sub": flask.g.user.id, + "idp": idp_name, + "fence_idp": flask.session.get("fence_idp"), + "shib_idp": flask.session.get("shib_idp"), + "client_id": flask.session.get("client_id"), + } def _login(username, idp_name): diff --git a/fence/blueprints/oauth2.py b/fence/blueprints/oauth2.py index 6903d51789..07f56f55f2 100644 --- a/fence/blueprints/oauth2.py +++ b/fence/blueprints/oauth2.py @@ -105,7 +105,7 @@ def authorize(*args, **kwargs): elif idp == "shibboleth" and shib_idp: params["shib_idp"] = shib_idp - # store client_id for later use in login endpoint create_login_log() + # store client_id for later use in login endpoint prepare_login_log() flask.session["client_id"] = flask.request.args.get("client_id") login_url = add_params_to_uri(login_url, params) diff --git a/fence/config-default.yaml b/fence/config-default.yaml index e3f401ef0a..f98ffe4b8b 100644 --- a/fence/config-default.yaml +++ b/fence/config-default.yaml @@ -624,6 +624,17 @@ AUDIT_SERVICE: 'http://audit-service' ENABLE_AUDIT_LOGS: presigned_url: false login: false +# `PUSH_AUDIT_LOGS_CONFIG.type` is one of: [api, aws_sqs]. +# - if type == api: logs are created by hitting the log creation endpoint. +# - if type == aws_sqs: logs are pushed to an SQS and fields `sqs_url` and +# `region` are required. Fields `aws_access_key_id` and +# `aws_secret_access_key` are optional. +PUSH_AUDIT_LOGS_CONFIG: + type: aws_sqs + sqs_url: + region: + aws_access_key_id: + aws_secret_access_key: # ////////////////////////////////////////////////////////////////////////////////////// # CLOUD API LIBRARY (CIRRUS) AND GOOGLE CONFIGURATION diff --git a/fence/models.py b/fence/models.py index d8ea5fca40..eafebb0c93 100644 --- a/fence/models.py +++ b/fence/models.py @@ -753,7 +753,7 @@ def migrate(driver): _set_on_delete_cascades(driver, delete_user_session, md) delete_user_session.commit() - except: + except Exception: delete_user_session.rollback() raise finally: diff --git a/fence/resources/admin/admin_users.py b/fence/resources/admin/admin_users.py index a5c9e2e511..f86f45178c 100644 --- a/fence/resources/admin/admin_users.py +++ b/fence/resources/admin/admin_users.py @@ -60,7 +60,7 @@ def connect_user_to_project(current_session, usr, project=None): ) msg = "Success: user access" " created for a bucket in the project {0}" response.append(msg.format(proj.name)) - except: + except Exception: msg = "Error user access not" " created for project {0} and bucket {2}" response.append(msg.format(proj.name, bucket["name"])) return response diff --git a/fence/resources/audit/client.py b/fence/resources/audit/client.py new file mode 100644 index 0000000000..eff1f832dc --- /dev/null +++ b/fence/resources/audit/client.py @@ -0,0 +1,192 @@ +import backoff +import boto3 +import json +import requests +import traceback + +from fence.config import config +from fence.errors import InternalError +from fence.resources.audit.utils import is_audit_enabled +from fence.utils import DEFAULT_BACKOFF_SETTINGS + + +class AuditServiceClient: + def __init__(self, service_url, logger): + self.service_url = service_url.rstrip("/") + self.logger = logger + self.push_type = config["PUSH_AUDIT_LOGS_CONFIG"].get("type", "api") + + # audit logs should not be enabled if the audit-service is unavailable + if is_audit_enabled(): + self.logger.info("Enabling audit logs") + self._validate_config() + try: + self._ping() + except Exception: + if self.push_type == "api": + # the audit-service must be available when fence + # is configured to make API calls to it + raise + else: + traceback.print_exc() + self.logger.warning( + "Audit logs are enabled but audit-service is unreachable. Continuing anyway..." + ) + else: + self.logger.warning("NOT enabling audit logs") + return + + if self.push_type == "aws_sqs": + self.sqs = boto3.client( + "sqs", + region_name=config["PUSH_AUDIT_LOGS_CONFIG"]["region"], + aws_access_key_id=config["PUSH_AUDIT_LOGS_CONFIG"].get( + "aws_access_key_id" + ), + aws_secret_access_key=config["PUSH_AUDIT_LOGS_CONFIG"].get( + "aws_secret_access_key" + ), + ) + + @backoff.on_exception(backoff.expo, Exception, **DEFAULT_BACKOFF_SETTINGS) + def _ping(self): + """ + Hit the audit-service status endpoint. + """ + status_url = f"{self.service_url}/_status" + self.logger.debug(f"Checking audit-service availability at {status_url}") + requests.get(status_url) + + def _validate_config(self): + """ + Validate the audit configuration, making sure required fields + are populated. + """ + allowed_push_types = ["api", "aws_sqs"] + if self.push_type not in allowed_push_types: + raise Exception( + f"Configured PUSH_AUDIT_LOGS_CONFIG.type '{self.push_type}' is not one of known types {allowed_push_types}" + ) + + if self.push_type == "aws_sqs": + assert config["PUSH_AUDIT_LOGS_CONFIG"].get( + "sqs_url" + ), f"PUSH_AUDIT_LOGS_CONFIG.type is 'aws_sqs' but PUSH_AUDIT_LOGS_CONFIG.sqs_url is not configured" + assert config["PUSH_AUDIT_LOGS_CONFIG"].get( + "region" + ), f"PUSH_AUDIT_LOGS_CONFIG.type is 'aws_sqs' but PUSH_AUDIT_LOGS_CONFIG.region is not configured" + + def _check_response(self, resp, body): + """ + Check the status code after an audit log creation call, and in case + of error, log details and raise an exception. + + Args: + resp (requests.Response): response from the audit log creation call + body (dict): audit log body for logging in case of error + """ + # The audit-service returns 201 before inserting the log in the DB. + # The requests should only error if the input is incorrect (status + # code 422) or if the service is unreachable. + if resp.status_code != 201: + try: + err = resp.json() + except Exception: + err = resp.text + self.logger.error( + f"Unable to POST audit log `{body}`. Status code: {resp.status_code} - Details:\n{err}" + ) + raise InternalError("Unable to create audit log") + + def _create_audit_log(self, category, data): + """ + Create an audit log - make an API call or push to a queue depending + on the configuration. + + Args: + category (str): audit log category + data (dict): audit log data + """ + self.logger.debug( + f"Creating {category} audit log (push type: {self.push_type})" + ) + if self.push_type == "api": + url = f"{self.service_url}/log/{category}" + resp = requests.post(url, json=data) + self._check_response(resp, data) + elif self.push_type == "aws_sqs": + data["category"] = category + sqs_url = config["PUSH_AUDIT_LOGS_CONFIG"]["sqs_url"] + try: + self.sqs.send_message(QueueUrl=sqs_url, MessageBody=json.dumps(data)) + except Exception: + self.logger.error(f"Error pushing audit log to SQS '{sqs_url}'") + raise + + def create_presigned_url_log( + self, + request_url, + status_code, + username, + sub, + guid, + action, + resource_paths=None, + protocol=None, + ): + """ + Create a presigned URL audit log, or do nothing if auditing is + disabled. + + Args: presigned URL audit log data fields + """ + if not is_audit_enabled("presigned_url"): + return + + data = { + "request_url": request_url, + "status_code": status_code, + "username": username, + "sub": sub, + "guid": guid, + "resource_paths": resource_paths, + "action": action, + "protocol": protocol, + } + self._create_audit_log("presigned_url", data) + + def create_login_log( + self, + request_url, + status_code, + username, + sub, + idp, + fence_idp=None, + shib_idp=None, + client_id=None, + ): + """ + Create a login audit log, or do nothing if auditing is disabled. + + Args: login audit log data fields + """ + if not is_audit_enabled("login"): + return + + # special case for idp=fence when falling back on + # fence_idp=shibboleth and shib_idp=NIH + if shib_idp == "None": + shib_idp = None + + data = { + "request_url": request_url, + "status_code": status_code, + "username": username, + "sub": sub, + "idp": idp, + "fence_idp": fence_idp, + "shib_idp": shib_idp, + "client_id": client_id, + } + self._create_audit_log("login", data) diff --git a/fence/resources/audit/utils.py b/fence/resources/audit/utils.py new file mode 100644 index 0000000000..ffa0d36cda --- /dev/null +++ b/fence/resources/audit/utils.py @@ -0,0 +1,103 @@ +import flask +from functools import wraps +import traceback +from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse + +from cdislogging import get_logger + +from fence.config import config + + +logger = get_logger(__name__) + + +def is_audit_enabled(category=None): + enable_audit_logs = config["ENABLE_AUDIT_LOGS"] or {} + if category: + return enable_audit_logs and enable_audit_logs.get(category, False) + return enable_audit_logs and any(v for v in enable_audit_logs.values()) + + +def _clean_authorization_request_url(request_url): + """ + Remove sensitive data from a login request URL. + """ + parsed_url = urlparse(request_url) + query_params = dict(parse_qsl(parsed_url.query, keep_blank_values=True)) + for param in ["code", "state"]: + if param in query_params: + query_params[param] = "redacted" + url_parts = list(parsed_url) # cast to list to override query params + url_parts[4] = urlencode(query=query_params) + request_url = urlunparse(url_parts) + return request_url + + +def create_audit_log_for_request(response): + """ + Right before returning the response to the user (see `after_this_request` + in `enable_audit_logging` decorator), record an audit log. The data we + need to record the logs are stored in `flask.g.audit_data` before reaching + this code. + """ + try: + method = flask.request.method + endpoint = flask.request.path + audit_data = getattr(flask.g, "audit_data", {}) + request_url = endpoint + if flask.request.query_string: + # could use `flask.request.url` but we don't want the root URL + request_url += f"?{flask.request.query_string.decode('utf-8')}" + + if method == "GET" and endpoint.startswith("/data/download/"): + flask.current_app.audit_service_client.create_presigned_url_log( + status_code=response.status_code, + request_url=request_url, + guid=endpoint[len("/data/download/") :], + action="download", + **audit_data, + ) + elif method == "GET" and endpoint.startswith("/login/"): + request_url = _clean_authorization_request_url(request_url) + flask.current_app.audit_service_client.create_login_log( + status_code=response.status_code, + request_url=request_url, + **audit_data, + ) + except Exception: + # TODO monitor this somehow + traceback.print_exc() + logger.error(f"!!! Unable to create audit log! Returning response anyway...") + + return response + + +def enable_audit_logging(f): + """ + This decorator should be added to any API endpoint for which we + record audit logs. It should not be added to non-audited endpoints, + so that performance is not impacted. + The `create_audit_log_for_request_decorator` decorator is only added + if auditing is enabled, so that performance is not impacted when auditing + is disabled. + Possible improvement: pass a "category" argument to `is_audit_enabled`. + + /!\ This decorator is not enough to enable audit logging for an endpoint. + Logic must be added to `create_audit_log_for_request()` and the audit + service might need to be updated to handle new types of data. + """ + + @wraps(f) + def wrapper(*args, **kwargs): + def create_audit_log_for_request_decorator(response): + return create_audit_log_for_request(response) + + if is_audit_enabled(): + # we can't add the `after_this_request` and + # `create_audit_log_for_request_decorator` decorators to the + # functions directly, because `is_audit_enabled` depends on + # the config being loaded + flask.after_this_request(create_audit_log_for_request_decorator) + return f(*args, **kwargs) + + return wrapper diff --git a/fence/resources/audit_service_client.py b/fence/resources/audit_service_client.py deleted file mode 100644 index b06217985d..0000000000 --- a/fence/resources/audit_service_client.py +++ /dev/null @@ -1,120 +0,0 @@ -import flask -import requests -import time - -from fence.config import config -from fence.errors import InternalError - - -def get_request_url(): - request_url = flask.request.url - base_url = config.get("BASE_URL", "") - if request_url.startswith(base_url): - request_url = request_url[len(base_url) :] - return request_url - - -def is_audit_enabled(category=None): - enable_audit_logs = config.get("ENABLE_AUDIT_LOGS") or {} - if category: - return enable_audit_logs and enable_audit_logs.get(category, False) - return enable_audit_logs and any(v for v in enable_audit_logs.values()) - - -class AuditServiceClient: - def __init__(self, service_url, logger): - self.service_url = service_url.rstrip("/") - self.logger = logger - - # audit logs should not be enabled if the audit-service is unavailable - if is_audit_enabled(): - logger.info("Enabling audit logs") - self.ping() - else: - logger.warn("NOT enabling audit logs") - - def ping(self): - max_tries = 3 - status_url = f"{self.service_url}/_status" - self.logger.debug(f"Checking audit-service availability at {status_url}") - wait_time = 1 - for t in range(max_tries): - r = requests.get(status_url) - if r.status_code == 200: - return # all good! - if t + 1 < max_tries: - self.logger.debug(f"Retrying... (got status code {r.status_code})") - time.sleep(wait_time) - wait_time *= 2 - raise Exception( - f"Audit logs are enabled but audit-service is unreachable at {status_url}: {r.text}" - ) - - def check_response(self, resp, body): - # The audit-service returns 201 before inserting the log in the DB. - # This request should only error if the input is incorrect (status - # code 422) or if the service is unreachable. - if resp.status_code != 201: - try: - err = resp.json() - except Exception: - err = resp.text - self.logger.error(f"Unable to POST audit log `{body}`. Details:\n{err}") - raise InternalError("Unable to create audit log") - - def create_presigned_url_log( - self, - username, - sub, - guid, - resource_paths, - action, - protocol, - ): - if not is_audit_enabled("presigned_url"): - return - - url = f"{self.service_url}/log/presigned_url" - body = { - "request_url": get_request_url(), - "status_code": 200, # only record successful requests for now - "username": username, - "sub": sub, - "guid": guid, - "resource_paths": resource_paths, - "action": action, - "protocol": protocol, - } - resp = requests.post(url, json=body) - self.check_response(resp, body) - - def create_login_log( - self, - username, - sub, - idp, - fence_idp=None, - shib_idp=None, - client_id=None, - ): - if not is_audit_enabled("login"): - return - - # special case for idp=fence when falling back on - # fence_idp=shibboleth and shib_idp=NIH - if shib_idp == "None": - shib_idp = None - - url = f"{self.service_url}/log/login" - body = { - "request_url": get_request_url(), - "status_code": 200, # only record successful requests for now - "username": username, - "sub": sub, - "idp": idp, - "fence_idp": fence_idp, - "shib_idp": shib_idp, - "client_id": client_id, - } - resp = requests.post(url, json=body) - self.check_response(resp, body) diff --git a/fence/resources/openid/ras_oauth2.py b/fence/resources/openid/ras_oauth2.py index 4f1e06bcc2..cd34a4a3dd 100644 --- a/fence/resources/openid/ras_oauth2.py +++ b/fence/resources/openid/ras_oauth2.py @@ -50,7 +50,7 @@ def get_userinfo(self, token, userinfo_endpoint): msg = res.text try: msg = res.json() - except: + except Exception: pass self.logger.error( "Unable to get visa: status_code: {}, message: {}".format( diff --git a/fence/sync/sync_users.py b/fence/sync/sync_users.py index 5c72a6f685..9eaa9434c6 100644 --- a/fence/sync/sync_users.py +++ b/fence/sync/sync_users.py @@ -1806,7 +1806,7 @@ def _add_dbgap_study_to_arborist(self, dbgap_study, dbgap_config): def _is_arborist_healthy(self): if not self.arborist_client: - self.logger.warn("no arborist client set; skipping arborist dbgap sync") + self.logger.warning("no arborist client set; skipping arborist dbgap sync") return False if not self.arborist_client.healthy(): # TODO (rudyardrichter, 2019-01-07): add backoff/retry here diff --git a/fence/utils.py b/fence/utils.py index 232736e343..8dc949f140 100644 --- a/fence/utils.py +++ b/fence/utils.py @@ -9,6 +9,7 @@ import requests from urllib.parse import urlencode from urllib.parse import parse_qs, urlsplit, urlunsplit +import sys from cdislogging import get_logger import flask diff --git a/tests/conftest.py b/tests/conftest.py index 5fb46dcc46..cf7f38ceea 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -185,7 +185,7 @@ def mock_arborist_requests(request): "GET": ("", 200) }, "arborist/auth/request": { - "POST": ('{"auth": "false"}', 403) + "POST": ({"auth": False}, 403) } } """ diff --git a/tests/data/test_data.py b/tests/data/test_data.py index 8adda89ae1..53725702f3 100644 --- a/tests/data/test_data.py +++ b/tests/data/test_data.py @@ -1463,9 +1463,7 @@ def test_abac( cloud_manager, google_signed_url, ): - mock_arborist_requests( - {"arborist/auth/request": {"POST": ('{"auth": "true"}', 200)}} - ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) indexd_client = indexd_client_with_arborist("test_abac") indexed_file_location = indexd_client["indexed_file_location"] path = "/data/download/1" @@ -1485,9 +1483,7 @@ def test_abac( assert response.status_code == 200 assert "url" in list(response.json.keys()) - mock_arborist_requests( - {"arborist/auth/request": {"POST": ('{"auth": "false"}', 403)}} - ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": False}, 403)}}) response = client.get(path, headers=headers, query_string=query_string) assert response.status_code == 403 diff --git a/tests/login/test_login_redirect.py b/tests/login/test_login_redirect.py index 0fff7c0796..149995d9ad 100644 --- a/tests/login/test_login_redirect.py +++ b/tests/login/test_login_redirect.py @@ -11,65 +11,85 @@ import mock import pytest +from unittest.mock import MagicMock, patch +from fence.blueprints.login import IDP_URL_MAP -@pytest.mark.parametrize( - "idp", ["google", "shib", "microsoft", "okta", "orcid", "ras", "cilogon"] -) + +@pytest.mark.parametrize("idp", list(IDP_URL_MAP.values())) @mock.patch( "fence.resources.openid.ras_oauth2.RASOauth2Client.get_value_from_discovery_doc" ) -def test_valid_redirect_base(mock_discovery, app, client, idp): +@mock.patch( + "fence.resources.openid.okta_oauth2.OktaOauth2Client.get_value_from_discovery_doc" +) +@mock.patch( + "fence.resources.openid.cognito_oauth2.CognitoOauth2Client.get_value_from_discovery_doc" +) +def test_valid_redirect_base( + mock_cognito_discovery, mock_okta_discovery, mock_ras_discovery, app, client, idp +): """ Check that a valid redirect is allowed, using the base URL for this application as the destination for the redirect. """ - mock_discovery.return_value = "https://ras/token_endpoint" + if idp == "fence": + mocked_generate_authorize_redirect = MagicMock( + return_value=("authorization_url", "state") + ) + patch( + f"flask.current_app.fence_client.generate_authorize_redirect", + mocked_generate_authorize_redirect, + ).start() + elif idp == "ras": + mock_ras_discovery.return_value = "https://ras/token_endpoint" + elif idp == "cognito": + mock_cognito_discovery.return_value = "" + elif idp == "okta": + mock_okta_discovery.return_value = "" redirect = app.config["BASE_URL"] response = client.get("/login/{}?redirect={}".format(idp, redirect)) - - if idp == "okta": - assert response.status_code == 500 - else: - assert response.status_code == 302 - - """ assert response.status_code == 302 - """ -@pytest.mark.parametrize( - "idp", ["google", "shib", "microsoft", "okta", "orcid", "ras", "cilogon"] -) +@pytest.mark.parametrize("idp", list(IDP_URL_MAP.values())) @mock.patch( "fence.resources.openid.ras_oauth2.RASOauth2Client.get_value_from_discovery_doc" ) -def test_valid_redirect_oauth(mock_discovery, client, oauth_client, idp): +@mock.patch( + "fence.resources.openid.okta_oauth2.OktaOauth2Client.get_value_from_discovery_doc" +) +@mock.patch( + "fence.resources.openid.cognito_oauth2.CognitoOauth2Client.get_value_from_discovery_doc" +) +def test_valid_redirect_oauth( + mock_cognito_discovery, + mock_okta_discovery, + mock_ras_discovery, + client, + oauth_client, + idp, +): """ Check that a valid redirect is allowed. Here we use the URL from the test OAuth client. """ - mock_discovery.return_value = "https://ras/token_endpoint" + if idp == "ras": + mock_ras_discovery.return_value = "https://ras/token_endpoint" + elif idp == "cognito": + mock_cognito_discovery.return_value = "" + elif idp == "okta": + mock_okta_discovery.return_value = "" response = client.get("/login/{}?redirect={}".format(idp, oauth_client.url)) - - if idp == "okta": - assert response.status_code == 500 - else: - assert response.status_code == 302 - """ assert response.status_code == 302 - """ -@pytest.mark.parametrize( - "idp", ["google", "shib", "microsoft", "okta", "orcid", "ras", "cilogon"] -) +@pytest.mark.parametrize("idp", list(IDP_URL_MAP.values())) def test_invalid_redirect_fails(client, idp): """ Check that giving a bogus redirect to the login endpoint returns an error. """ response = client.get("/login/{}?redirect=https://evil-site.net".format(idp)) - assert response.status_code == 400 diff --git a/tests/test-fence-config.yaml b/tests/test-fence-config.yaml index 8cf0897683..b11e518559 100644 --- a/tests/test-fence-config.yaml +++ b/tests/test-fence-config.yaml @@ -479,6 +479,8 @@ AUDIT_SERVICE: 'http://audit-service' ENABLE_AUDIT_LOGS: presigned_url: false login: false +PUSH_AUDIT_LOGS_CONFIG: + type: api # ////////////////////////////////////////////////////////////////////////////////////// # CLOUD API LIBRARY (CIRRUS) CONFIGURATION diff --git a/tests/test_audit_service.py b/tests/test_audit_service.py index bae8543e6d..6d5634bddb 100644 --- a/tests/test_audit_service.py +++ b/tests/test_audit_service.py @@ -2,6 +2,7 @@ Tests for the Audit Service integration: - test the creation of presigned URL audit logs - test the creation of login audit logs +- test the SQS flow Note 1: there is no test for the /oauth2 endpoint: the /oauth2 endpoint should redirect the user to the /login endpoint (tested in @@ -16,18 +17,84 @@ """ +import boto3 import flask +import json import jwt import mock import pytest import time from unittest.mock import ANY, MagicMock, patch +import fence from fence.config import config from fence.blueprints.login import IDP_URL_MAP +from fence.resources.audit.utils import _clean_authorization_request_url from tests import utils +def test_clean_authorization_request_url(): + """ + Test that "code" and "state" query parameters in login URLs are redacted. + """ + redacted_url = _clean_authorization_request_url( + "https://my-data-commons.com/login/fence/login?code=my-secret-code&state=my-secret-state&abc=my-other-param" + ) + assert ( + redacted_url + == "https://my-data-commons.com/login/fence/login?code=redacted&state=redacted&abc=my-other-param" + ) + + +@pytest.mark.parametrize("indexd_client_with_arborist", ["s3_and_gs"], indirect=True) +def test_disabled_audit( + client, + user_client, + mock_arborist_requests, + indexd_client_with_arborist, + kid, + rsa_private_key, + primary_google_service_account, + cloud_manager, + google_signed_url, + monkeypatch, +): + """ + Disable all audit logs, get a presigned URL from Fence and make sure the + logic to create audit logs did not run. + """ + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) + + protocol = "gs" + guid = "dg.hello/abc" + path = f"/data/download/{guid}" + if protocol: + path += f"?protocol={protocol}" + resource_paths = ["/my/resource/path1", "/path2"] + indexd_client_with_arborist(resource_paths) + headers = { + "Authorization": "Bearer " + + jwt.encode( + utils.authorized_download_context_claims( + user_client.username, str(user_client.user_id) + ), + key=rsa_private_key, + headers={"kid": kid}, + algorithm="RS256", + ).decode("utf-8") + } + + audit_decorator_mocker = mock.patch( + "fence.resources.audit.utils.create_audit_log_for_request", + new_callable=mock.Mock, + ) + with audit_decorator_mocker as audit_decorator: + response = client.get(path, headers=headers) + assert response.status_code == 200, response + assert response.json.get("url") + audit_decorator.assert_not_called() + + ############################ # Presigned URL audit logs # ############################ @@ -59,11 +126,9 @@ def test_presigned_url_log( was made to create an audit log. Test with and without a requested protocol. """ - mock_arborist_requests( - {"arborist/auth/request": {"POST": ('{"auth": "true"}', 200)}} - ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) audit_service_mocker = mock.patch( - "fence.resources.audit_service_client.requests", new_callable=mock.Mock + "fence.resources.audit.client.requests", new_callable=mock.Mock ) monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True}) @@ -72,12 +137,15 @@ def test_presigned_url_log( if protocol: path += f"?protocol={protocol}" resource_paths = ["/my/resource/path1", "/path2"] - indexd_client = indexd_client_with_arborist(resource_paths) + indexd_client_with_arborist(resource_paths) headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims( - user_client.username, user_client.user_id + # cast user_id to str because that's what we get back + # from the DB, but audit-service expects an int. + user_client.username, + str(user_client.user_id), ), key=rsa_private_key, headers={"kid": kid}, @@ -102,7 +170,7 @@ def test_presigned_url_log( "request_url": path, "status_code": 200, "username": user_client.username, - "sub": user_client.user_id, + "sub": user_client.user_id, # it's an int now "guid": guid, "resource_paths": resource_paths, "action": "download", @@ -130,23 +198,21 @@ def test_presigned_url_log_acl( Same as `test_presigned_url_log`, but the record contains `acl` instead of `authz`. The ACL is ["phs000178", "phs000218"] as defined in conftest. """ - mock_arborist_requests( - {"arborist/auth/request": {"POST": ('{"auth": "true"}', 200)}} - ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) audit_service_mocker = mock.patch( - "fence.resources.audit_service_client.requests", new_callable=mock.Mock + "fence.resources.audit.client.requests", new_callable=mock.Mock ) monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True}) protocol = "gs" guid = "dg.hello/abc" path = f"/data/download/{guid}?protocol={protocol}" - indexd_client = indexd_client_with_arborist(None) + indexd_client_with_arborist(None) headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims( - user_client.username, user_client.user_id + user_client.username, str(user_client.user_id) ), key=rsa_private_key, headers={"kid": kid}, @@ -184,7 +250,7 @@ def test_presigned_url_log_public(client, public_indexd_client, monkeypatch): public data. """ audit_service_mocker = mock.patch( - "fence.resources.audit_service_client.requests", new_callable=mock.Mock + "fence.resources.audit.client.requests", new_callable=mock.Mock ) monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True}) @@ -228,13 +294,12 @@ def test_presigned_url_log_disabled( monkeypatch, ): """ - Disable presigned URL logs, enable login logs, get a presigned URL from Fence and make sure no audit log was created. + Disable presigned URL logs, enable login logs, get a presigned URL from + Fence and make sure no audit log was created. """ - mock_arborist_requests( - {"arborist/auth/request": {"POST": ('{"auth": "true"}', 200)}} - ) + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) audit_service_mocker = mock.patch( - "fence.resources.audit_service_client.requests", new_callable=mock.Mock + "fence.resources.audit.client.requests", new_callable=mock.Mock ) monkeypatch.setitem( config, "ENABLE_AUDIT_LOGS", {"presigned_url": False, "login": True} @@ -246,12 +311,12 @@ def test_presigned_url_log_disabled( if protocol: path += f"?protocol={protocol}" resource_paths = ["/my/resource/path1", "/path2"] - indexd_client = indexd_client_with_arborist(resource_paths) + indexd_client_with_arborist(resource_paths) headers = { "Authorization": "Bearer " + jwt.encode( utils.authorized_download_context_claims( - user_client.username, user_client.user_id + user_client.username, str(user_client.user_id) ), key=rsa_private_key, headers={"kid": kid}, @@ -274,19 +339,37 @@ def test_presigned_url_log_disabled( @pytest.mark.parametrize("indexd_client", ["s3_and_gs"], indirect=True) -def test_presigned_url_log_failure(client, indexd_client, db_session, monkeypatch): +def test_presigned_url_log_unauthorized(client, indexd_client, db_session, monkeypatch): """ If Fence does not return a presigned URL, no audit log should be created. """ audit_service_mocker = mock.patch( - "fence.resources.audit_service_client.requests", new_callable=mock.Mock + "fence.resources.audit.client.requests", new_callable=mock.Mock ) monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True}) - path = "/data/download/1" + + guid = "dg.hello/abc" + path = f"/data/download/{guid}" with audit_service_mocker as audit_service_requests: + audit_service_requests.post.return_value = MockResponse( + data={}, + status_code=201, + ) response = client.get(path) assert response.status_code == 401 - audit_service_requests.post.assert_not_called() + audit_service_requests.post.assert_called_once_with( + "http://audit-service/log/presigned_url", + json={ + "request_url": path, + "status_code": 401, + "username": "anonymous", + "sub": None, + "guid": guid, + "resource_paths": [], + "action": "download", + "protocol": "s3", + }, + ) #################### @@ -299,13 +382,12 @@ def test_presigned_url_log_failure(client, indexd_client, db_session, monkeypatc "fence.resources.openid.ras_oauth2.RASOauth2Client.get_value_from_discovery_doc" ) def test_login_log_login_endpoint( - mock_discovery, app, client, idp, mock_arborist_requests, rsa_private_key, - db_session, + db_session, # do not remove :-) See note at top of file monkeypatch, ): """ @@ -314,7 +396,7 @@ def test_login_log_login_endpoint( """ mock_arborist_requests() audit_service_mocker = mock.patch( - "fence.resources.audit_service_client.requests", new_callable=mock.Mock + "fence.resources.audit.client.requests", new_callable=mock.Mock ) monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"login": True}) @@ -403,3 +485,131 @@ def test_login_log_login_endpoint( if get_user_id_patch: get_user_id_patch.stop() + + +########################## +# Push audit logs to SQS # +########################## + + +def mock_audit_service_sqs(app): + # the `PUSH_AUDIT_LOGS_CONFIG` config has already been loaded during + # the app init, so monkeypatching it is not enough + fence.config["PUSH_AUDIT_LOGS_CONFIG"] = { + "type": "aws_sqs", + "sqs_url": "mocked-sqs-url", + "region": "region", + } + + # mock the ping function so we don't try to reach the audit-service + mock.patch( + "fence.resources.audit.client.AuditServiceClient._ping", + new_callable=mock.Mock, + ).start() + + # mock the SQS + mocked_sqs_client = MagicMock() + patch("fence.resources.audit.client.boto3.client", mocked_sqs_client).start() + mocked_sqs = boto3.client( + "sqs", + region_name=config["PUSH_AUDIT_LOGS_CONFIG"]["region"], + endpoint_url="http://localhost", + ) + mocked_sqs.url = config["PUSH_AUDIT_LOGS_CONFIG"]["sqs_url"] + mocked_sqs_client.return_value = mocked_sqs + + # the audit-service client has already been loaded during the app + # init, so reload it with the new config + fence._setup_audit_service_client(app) + + return mocked_sqs + + +@pytest.mark.parametrize("indexd_client_with_arborist", ["s3_and_gs"], indirect=True) +def test_presigned_url_log_push_to_sqs( + app, + client, + user_client, + mock_arborist_requests, + indexd_client_with_arborist, + kid, + rsa_private_key, + primary_google_service_account, + cloud_manager, + google_signed_url, + monkeypatch, +): + """ + Get a presigned URL from Fence and make sure an audit log was pushed + to the configured SQS. + """ + mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) + monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True}) + mocked_sqs = mock_audit_service_sqs(app) + + # get a presigned URL + protocol = "gs" + guid = "dg.hello/abc" + path = f"/data/download/{guid}?protocol={protocol}" + resource_paths = ["/my/resource/path1", "/path2"] + indexd_client_with_arborist(resource_paths) + headers = { + "Authorization": "Bearer " + + jwt.encode( + utils.authorized_download_context_claims( + user_client.username, str(user_client.user_id) + ), + key=rsa_private_key, + headers={"kid": kid}, + algorithm="RS256", + ).decode("utf-8") + } + response = client.get(path, headers=headers) + assert response.status_code == 200, response + assert response.json.get("url") + + expected_audit_data = { + "request_url": path, + "status_code": 200, + "username": user_client.username, + "sub": user_client.user_id, + "guid": guid, + "resource_paths": resource_paths, + "action": "download", + "protocol": protocol, + "category": "presigned_url", + } + mocked_sqs.send_message.assert_called_once_with( + MessageBody=json.dumps(expected_audit_data), QueueUrl=mocked_sqs.url + ) + + +def test_login_log_push_to_sqs( + app, + client, + mock_arborist_requests, + rsa_private_key, + db_session, # do not remove :-) See note at top of file + monkeypatch, +): + """ + Log in and make sure an audit log was pushed to the configured SQS. + """ + mock_arborist_requests() + monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"login": True}) + mocked_sqs = mock_audit_service_sqs(app) + + username = "test@test" + mocked_get_user_id = MagicMock(return_value={"email": username}) + get_user_id_patch = patch( + "flask.current_app.google_client.get_user_id", mocked_get_user_id + ) + get_user_id_patch.start() + + path = "/login/google/login" + response = client.get(path) + assert response.status_code == 200, response + # not checking the parameters here because we can't json.dumps "sub: ANY" + mocked_sqs.send_message.assert_called_once() + + get_user_id_patch.stop()