Skip to content

Commit

Permalink
Merge branch 'master' into feat/googlesync
Browse files Browse the repository at this point in the history
  • Loading branch information
Avantol13 committed Jul 15, 2020
2 parents cfbf091 + 85253d1 commit ec71eae
Show file tree
Hide file tree
Showing 6 changed files with 309 additions and 29 deletions.
57 changes: 52 additions & 5 deletions fence/blueprints/data/blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

from cdislogging import get_logger

from fence.auth import login_required, require_auth_header, current_token
from fence.auth import login_required, require_auth_header, current_token, get_jwt
from fence.blueprints.data.indexd import (
BlankIndex,
IndexedFile,
get_signed_url_for_file,
)
from fence.errors import Forbidden, InternalError, UserError
from fence.errors import Forbidden, InternalError, UserError, Forbidden
from fence.utils import is_valid_expiration
from fence.authz.auth import check_arborist_auth

Expand Down Expand Up @@ -51,7 +51,6 @@ def delete_data_file(file_id):
@blueprint.route("/upload", methods=["POST"])
@require_auth_header(aud={"data"})
@login_required({"data"})
@check_arborist_auth(resource="/data_file", method="file_upload")
def upload_data_file():
"""
Return a presigned URL for use with uploading a data file.
Expand All @@ -65,17 +64,60 @@ def upload_data_file():
params = flask.request.get_json()
if not params:
raise UserError("wrong Content-Type; expected application/json")

if "file_name" not in params:
raise UserError("missing required argument `file_name`")
blank_index = BlankIndex(file_name=params["file_name"])

authorized = False
authz_err_msg = "Auth error when attempting to get a presigned URL for upload. User must have '{}' access on '{}'."

authz = params.get("authz")
uploader = None

if authz:
# if requesting an authz field, using new authorization method which doesn't
# rely on uploader field, so clear it out
uploader = ""
authorized = flask.current_app.arborist.auth_request(
jwt=get_jwt(),
service="fence",
methods=["create", "write-storage"],
resources=authz,
)
if not authorized:
logger.error(authz_err_msg.format("create' and 'write-storage", authz))
else:
# no 'authz' was provided, so fall back on 'file_upload' logic
authorized = flask.current_app.arborist.auth_request(
jwt=get_jwt(),
service="fence",
methods=["file_upload"],
resources=["/data_file"],
)
if not authorized:
logger.error(authz_err_msg.format("file_upload", "/data_file"))

if not authorized:
raise Forbidden(
"You do not have access to upload data. You either need "
"general file uploader permissions or create & write-storage permissions "
"on the authz resources you specified (if you specified any)."
)

blank_index = BlankIndex(
file_name=params["file_name"], authz=params.get("authz"), uploader=uploader
)
expires_in = flask.current_app.config.get("MAX_PRESIGNED_URL_TTL", 3600)

if "expires_in" in params:
is_valid_expiration(params["expires_in"])
expires_in = min(params["expires_in"], expires_in)

response = {
"guid": blank_index.guid,
"url": blank_index.make_signed_url(params["file_name"], expires_in=expires_in),
}

return flask.jsonify(response), 201


Expand Down Expand Up @@ -172,7 +214,12 @@ def upload_file(file_id):
"""
Get a presigned url to upload a file given by file_id.
"""
result = get_signed_url_for_file("upload", file_id)
file_name = flask.request.args.get("file_name")
if not file_name:
logger.warning(f"file_name not provided, using GUID: {file_id}")
file_name = str(file_id)

result = get_signed_url_for_file("upload", file_id, file_name=file_name)
return flask.jsonify(result)


Expand Down
80 changes: 62 additions & 18 deletions fence/blueprints/data/indexd.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
ANONYMOUS_USERNAME = "anonymous"


def get_signed_url_for_file(action, file_id):
def get_signed_url_for_file(action, file_id, file_name=None):
requested_protocol = flask.request.args.get("protocol", None)
r_pays_project = flask.request.args.get("userProject", None)

Expand All @@ -74,36 +74,44 @@ def get_signed_url_for_file(action, file_id):
expires_in,
force_signed_url=force_signed_url,
r_pays_project=r_pays_project,
file_name=file_name,
)
return {"url": signed_url}


class BlankIndex(object):
"""
Create a new blank record in indexd, to use for the data upload flow.
A blank record in indexd, to use for the data upload flow.
See docs on data upload flow for further details:
https://github.com/uc-cdis/cdis-wiki/tree/master/dev/gen3/data_upload
"""

def __init__(self, uploader=None, file_name=None, logger_=None):
def __init__(
self, uploader=None, file_name=None, logger_=None, guid=None, authz=None
):
self.logger = logger_ or logger
self.indexd = (
flask.current_app.config.get("INDEXD")
or flask.current_app.config["BASE_URL"] + "/index"
)
self.uploader = uploader or current_token["context"]["user"]["name"]
self.file_name = file_name

@property
def guid(self):
"""
Return the GUID for this record in indexd.
# allow passing "" empty string to signify you do NOT want
# uploader to be populated. If nothing is provided, default
# to parsing from token
if uploader == "":
self.uploader = None
elif uploader:
self.uploader = uploader
else:
self.uploader = current_token["context"]["user"]["name"]

Currently the field in indexd is actually called ``did``.
"""
return self.index_document["did"]
self.file_name = file_name
self.authz = authz

# if a guid is not provided, this will create a blank record for you
self.guid = guid or self.index_document["did"]

@cached_property
def index_document(self):
Expand All @@ -117,8 +125,24 @@ def index_document(self):
"""
index_url = self.indexd.rstrip("/") + "/index/blank/"
params = {"uploader": self.uploader, "file_name": self.file_name}
auth = (config["INDEXD_USERNAME"], config["INDEXD_PASSWORD"])
indexd_response = requests.post(index_url, json=params, auth=auth)

# if attempting to set record's authz field, need to pass token
# through
if self.authz:
params["authz"] = self.authz
token = get_jwt()

auth = None
headers = {"Authorization": f"bearer {token}"}
logger.info("passing users authorization header to create blank record")
else:
logger.info("using indexd basic auth to create blank record")
auth = (config["INDEXD_USERNAME"], config["INDEXD_PASSWORD"])
headers = {}

indexd_response = requests.post(
index_url, json=params, headers=headers, auth=auth
)
if indexd_response.status_code not in [200, 201]:
try:
data = indexd_response.json()
Expand Down Expand Up @@ -303,7 +327,13 @@ def indexed_file_locations(self):
return list(map(IndexedFileLocation.from_url, urls))

def get_signed_url(
self, protocol, action, expires_in, force_signed_url=True, r_pays_project=None
self,
protocol,
action,
expires_in,
force_signed_url=True,
r_pays_project=None,
file_name=None,
):
if self.public and action == "upload":
raise Unauthorized("Cannot upload on public files")
Expand All @@ -316,12 +346,21 @@ def get_signed_url(
if action is not None and action not in SUPPORTED_ACTIONS:
raise NotSupported("action {} is not supported".format(action))
return self._get_signed_url(
protocol, action, expires_in, force_signed_url, r_pays_project
protocol, action, expires_in, force_signed_url, r_pays_project, file_name
)

def _get_signed_url(
self, protocol, action, expires_in, force_signed_url, r_pays_project
self, protocol, action, expires_in, force_signed_url, r_pays_project, file_name
):
if action == "upload":
# NOTE: self.index_document ensures the GUID exists in indexd and raises
# an error if not (which is expected to be caught upstream in the
# app)
blank_record = BlankIndex(uploader="", guid=self.index_document.get("did"))
return blank_record.make_signed_url(
file_name=file_name, expires_in=expires_in
)

if not protocol:
# no protocol specified, return first location as signed url
try:
Expand Down Expand Up @@ -366,6 +405,9 @@ def check_authz(self, action):
if not self.index_document.get("authz"):
raise ValueError("index record missing `authz`")

logger.debug(
f"authz check can user {action} on {self.index_document['authz']} for fence?"
)
return flask.current_app.arborist.auth_request(
jwt=get_jwt(),
service="fence",
Expand All @@ -389,12 +431,14 @@ def check_authorization(self, action):
# have just the `uploader` field and no ACLs. in this just check that the
# current user's username matches the uploader field
if self.index_document.get("uploader"):
logger.info("Checking access using `uploader` value")
username = None
if flask.g.token:
username = flask.g.token["context"]["user"]["name"]
else:
username = flask.g.user.username
logger.debug(
f"authz check using uploader field: {self.index_document.get('uploader')} == {username}"
)
return self.index_document.get("uploader") == username

try:
Expand Down
1 change: 1 addition & 0 deletions fence/scripting/fence_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,7 @@ def verify_bucket_access_group(DB):
for access_group in access_groups:
try:
members = manager.get_group_members(access_group.email)
logger.debug(f"google group members response: {members}")
except GoogleAuthError as e:
logger.error("ERROR: Authentication error!!!. Detail {}".format(e))
return
Expand Down
33 changes: 30 additions & 3 deletions openapis/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -443,14 +443,28 @@ paths:
summary: >-
Create a new blank record in indexd to use for uploading a data file
less than 5GB. For files larger than 5GB, please use multipart upload presigned url
description: >-
description: |
This is the first step on the API side for the data upload flow. This
endpoint causes fence to make a request to indexd to create a new, blank
index record, and returns the GUID for this new record and a presigned
URL which a client can then use to upload their data file directly to a
storage bucket. No parameters are accepted because fence retrieves the
username from the current token to send to indexd for the `uploader`
field in the new record.
This API also supports utilizing the newer authorization within the indexing
service by providing an "authz" field.
If an "authz" field is provided, the authorization checks are slightly different:
your user must have *both* "create" and "write-storage" permission on the resources
you are including in the "authz" list.
In addition to a different authorization check, when "authz" is provided the
"uploader" field in indexd with *not* be populated.
Previous authorization check requires a more general, global upload permission:
"file_upload" on "/data_file" resource. When "authz" is *not* provided, this
endpoint will check for that permission for your user.
security:
- OAuth2:
- user
Expand All @@ -461,8 +475,6 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/RequestUploadBlank'


responses:
201:
description: successful operation; created new record in indexd
Expand Down Expand Up @@ -516,6 +528,14 @@ paths:
the configured maximum will be used.
schema:
type: integer
- name: file_name
required: false
in: query
description: >-
the requested file name in the cloud bucket you will upload to.
if not provided, will use the GUID/file_id
schema:
type: string
responses:
'200':
description: successful operation
Expand Down Expand Up @@ -1510,9 +1530,16 @@ components:
expires_in:
type: integer
description: optional integer specifying the presigned URL lifetime
authz:
type: array
items:
type: string
description: requested authorization resources to be set on the
resulting indexed record. You must have proper authorization to set this
example:
file_name: "my_file.bam"
expires_in: 1200
authz: ["/programs/A"]
RequestMultipartUpload:
type: object
required:
Expand Down
Loading

0 comments on commit ec71eae

Please sign in to comment.