Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Downloads through GA4GH DRS are recorded in audit service #1117

Merged
merged 2 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions fence/blueprints/data/blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
)
from fence.config import config
from fence.errors import Forbidden, InternalError, UserError, Unauthorized
from fence.resources.audit.utils import enable_audit_logging
from fence.utils import get_valid_expiration


Expand Down Expand Up @@ -328,7 +327,6 @@ def upload_file(file_id):


@blueprint.route("/download/<path:file_id>", methods=["GET"])
@enable_audit_logging
def download_file(file_id):
"""
Get a presigned url to download a file given by file_id.
Expand Down
5 changes: 4 additions & 1 deletion fence/blueprints/data/indexd.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
AccountSasPermissions,
generate_blob_sas,
)
from fence import auth

from fence import auth
from fence.auth import (
get_jwt,
current_token,
Expand All @@ -47,7 +47,9 @@
give_service_account_billing_access_if_necessary,
)
from fence.resources.ga4gh.passports import sync_gen3_users_authz_from_ga4gh_passports
from fence.resources.audit.utils import enable_audit_logging
from fence.utils import get_valid_expiration_from_request

from . import multipart_upload
from ...models import AssumeRoleCacheAWS, query_for_user, query_for_user_by_id
from ...models import AssumeRoleCacheGCP
Expand All @@ -66,6 +68,7 @@
ANONYMOUS_USERNAME = "anonymous"


@enable_audit_logging
def get_signed_url_for_file(
action,
file_id,
Expand Down
1 change: 1 addition & 0 deletions fence/blueprints/ga4gh.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import flask
from flask import request

from fence.errors import UserError
from fence.config import config

Expand Down
19 changes: 17 additions & 2 deletions fence/resources/audit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ def create_audit_log_for_request(response):
in `enable_audit_logging` decorator), record an audit log. The data we
need to record the logs are stored in `flask.g.audit_data` before reaching
this code.

TODO The audit service has the ability to record presigned URL "upload" logs but we are not
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a ticket for this TODO?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can create a ticket but tbh this is more of an explanation for future-me looking for upload audit logs 🙂

currently sending those logs. We would need to:
- add the `@enable_audit_logging` decorator to `init_multipart_upload` (single upload requests
are handled by `get_signed_url_for_file` which is already decorated).
- update this function to send the appropriate data when those endpoints are called.
- add upload unit tests to `test_audit_service.py`.
"""
try:
method = flask.request.method
Expand All @@ -49,11 +56,19 @@ def create_audit_log_for_request(response):
# could use `flask.request.url` but we don't want the root URL
request_url += f"?{flask.request.query_string.decode('utf-8')}"

if method == "GET" and endpoint.startswith("/data/download/"):
if method == "GET" and (
endpoint.startswith("/data/download/")
or endpoint.startswith("/ga4gh/drs/v1/objects/")
):
if endpoint.startswith("/data/download/"):
guid = endpoint[len("/data/download/") :]
else:
guid = endpoint[len("/ga4gh/drs/v1/objects/") :]
guid = guid.split("/access/")[0]
flask.current_app.audit_service_client.create_presigned_url_log(
status_code=response.status_code,
request_url=request_url,
guid=endpoint[len("/data/download/") :],
guid=guid,
action="download",
**audit_data,
)
Expand Down
56 changes: 41 additions & 15 deletions tests/test_audit_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,10 @@ def __init__(self, data, status_code=200):


@pytest.mark.parametrize("indexd_client_with_arborist", ["s3_and_gs"], indirect=True)
@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"])
@pytest.mark.parametrize("protocol", ["gs", None])
def test_presigned_url_log(
endpoint,
protocol,
client,
user_client,
Expand All @@ -133,9 +135,12 @@ def test_presigned_url_log(
monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True})

guid = "dg.hello/abc"
path = f"/data/download/{guid}"
if protocol:
path += f"?protocol={protocol}"
if endpoint == "download":
path = f"/data/download/{guid}"
if protocol:
path += f"?protocol={protocol}"
else:
path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol or 's3'}"
resource_paths = ["/my/resource/path1", "/path2"]
indexd_client_with_arborist(resource_paths)
headers = {
Expand Down Expand Up @@ -182,7 +187,9 @@ def test_presigned_url_log(
@pytest.mark.parametrize(
"indexd_client_with_arborist", ["s3_and_gs_acl_no_authz"], indirect=True
)
@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"])
def test_presigned_url_log_acl(
endpoint,
client,
user_client,
mock_arborist_requests,
Expand All @@ -206,7 +213,10 @@ def test_presigned_url_log_acl(

protocol = "gs"
guid = "dg.hello/abc"
path = f"/data/download/{guid}?protocol={protocol}"
if endpoint == "download":
path = f"/data/download/{guid}?protocol={protocol}"
else:
path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol}"
indexd_client_with_arborist(None)
headers = {
"Authorization": "Bearer "
Expand Down Expand Up @@ -244,7 +254,8 @@ def test_presigned_url_log_acl(


@pytest.mark.parametrize("public_indexd_client", ["s3_and_gs"], indirect=True)
def test_presigned_url_log_public(client, public_indexd_client, monkeypatch):
@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"])
def test_presigned_url_log_public(endpoint, client, public_indexd_client, monkeypatch):
"""
Same as `test_presigned_url_log`, but with an anonymous user downloading
public data.
Expand All @@ -254,8 +265,12 @@ def test_presigned_url_log_public(client, public_indexd_client, monkeypatch):
)
monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True})

protocol = "s3"
guid = "dg.hello/abc"
path = f"/data/download/{guid}"
if endpoint == "download":
path = f"/data/download/{guid}?protocol={protocol}"
else:
path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol}"

with audit_service_mocker as audit_service_requests:
audit_service_requests.post.return_value = MockResponse(
Expand All @@ -275,13 +290,15 @@ def test_presigned_url_log_public(client, public_indexd_client, monkeypatch):
"guid": guid,
"resource_paths": [],
"action": "download",
"protocol": "s3",
"protocol": protocol,
},
)


@pytest.mark.parametrize("indexd_client_with_arborist", ["s3_and_gs"], indirect=True)
@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"])
def test_presigned_url_log_disabled(
endpoint,
client,
user_client,
mock_arborist_requests,
Expand All @@ -307,7 +324,10 @@ def test_presigned_url_log_disabled(

protocol = "gs"
guid = "dg.hello/abc"
path = f"/data/download/{guid}"
if endpoint == "download":
path = f"/data/download/{guid}"
else:
path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol}"
if protocol:
path += f"?protocol={protocol}"
resource_paths = ["/my/resource/path1", "/path2"]
Expand All @@ -324,9 +344,6 @@ def test_presigned_url_log_disabled(
)
}

# protocol=None should fall back to s3 (first indexed location):
expected_protocol = protocol or "s3"

with audit_service_mocker as audit_service_requests:
audit_service_requests.post.return_value = MockResponse(
data={},
Expand All @@ -339,17 +356,26 @@ def test_presigned_url_log_disabled(


@pytest.mark.parametrize("indexd_client", ["s3_and_gs"], indirect=True)
def test_presigned_url_log_unauthorized(client, indexd_client, db_session, monkeypatch):
@pytest.mark.parametrize("endpoint", ["download", "ga4gh-drs"])
def test_presigned_url_log_unauthorized(
endpoint, client, indexd_client, db_session, monkeypatch
):
"""
If Fence does not return a presigned URL, no audit log should be created.
If Fence does not return a presigned URL, an audit log with the appropriate status
code should be created.
"""
audit_service_mocker = mock.patch(
"fence.resources.audit.client.requests", new_callable=mock.Mock
)
monkeypatch.setitem(config, "ENABLE_AUDIT_LOGS", {"presigned_url": True})

protocol = "s3"
guid = "dg.hello/abc"
path = f"/data/download/{guid}"
path = f"/data/download/{guid}?protocol={protocol}"
if endpoint == "download":
path = f"/data/download/{guid}?protocol={protocol}"
else:
path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol}"
with audit_service_mocker as audit_service_requests:
audit_service_requests.post.return_value = MockResponse(
data={},
Expand All @@ -367,7 +393,7 @@ def test_presigned_url_log_unauthorized(client, indexd_client, db_session, monke
"guid": guid,
"resource_paths": [],
"action": "download",
"protocol": "s3",
"protocol": protocol,
},
)

Expand Down
Loading