Skip to content

Commit

Permalink
Merge pull request #39 from uc-cdis/feat/update-request-create-policy…
Browse files Browse the repository at this point in the history
…-for-role

Feat/update request create policy for role
  • Loading branch information
mfshao committed Jul 21, 2022
2 parents 7655eba + b824fa1 commit 63442fd
Show file tree
Hide file tree
Showing 9 changed files with 660 additions and 284 deletions.
6 changes: 5 additions & 1 deletion docs/functionality_and_flow.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,12 @@ A user interacts with the service in the following way:
- From a Gen3 Commons, the implementing organization will expose a "Request Access" option which will be displayed to users if they are logged in and do not currently have access to the resource (Program / Project) or policy.
- The user is directed to an external form to complete an access request which should contain at a minimum:
- User name
- Resource or policy access is being requested for
- One of the following for which access is being requested:
- policy
- resource_paths + existing role_ids
- resource_path[s] without a role_id (to be assigned default reader roles, currently `peregrine_reader`, `guppy_reader`, and `fence_storage_reader`)
- Level of access
- `resource_paths` will take precedence over `resource_path` if both are present in the request.
- The service will generate a unique request ID (per user+resource or user+policy) and store this along with the username and resource or policy ID.
- The service will send this data to the capturing system/process for review by appropriate administrative staff for the Gen3 Commons in question.
- _The User may or may not receive confirmation of their request being received - this would be handled by the capturing system/process._
Expand Down
10 changes: 10 additions & 0 deletions docs/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,16 @@ components:
resource_path:
title: Resource Path
type: string
resource_paths:
items:
type: string
title: Resource Paths
type: array
role_ids:
items:
type: string
title: Role Ids
type: array
status:
title: Status
type: string
Expand Down
225 changes: 26 additions & 199 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "requestor"
version = "1.6.0"
version = "1.7.0"
description = "Gen3 Access Request Service"
authors = ["CTDS UChicago <cdis@uchicago.edu>"]
license = "Apache-2.0"
Expand All @@ -16,7 +16,7 @@ alembic = "^1.4.2"
authutils = "^6.2.1"
cdislogging = "^1.0.0"
fastapi = "^0.65.0"
gen3authz = "^1.5.1"
gen3authz = "^2.0.0"
gen3config = "^1.0.0"
httpx = ">=0.20.0,<1.0.0"
jsonschema = "^4.6.0"
Expand Down
129 changes: 109 additions & 20 deletions src/requestor/arborist.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,16 +84,17 @@ def get_resource_paths_for_policy(expanded_policies: list, policy_id: str) -> li


def get_auto_policy_id_for_resource_path(resource_path: str) -> str:
"""
For backwards compatibility, when given a `resource_path` instead of a
`policy_id`, we automatically generate a policy with `read` and
`read-storage` access to the provided `resource_path`.
"""
resources = resource_path.split("/")
policy_id = ".".join(resources[1:]) + "_accessor"
return policy_id


def get_auto_policy_id_for_resource_paths(resource_paths: list[str]) -> str:
resources = "_".join([".".join(r.split("/")[1:]) for r in resource_paths])
policy_id = resources + "_accessor"
return policy_id


async def user_has_policy(
arborist_client: ArboristClient, username: str, policy_id: str
) -> bool:
Expand All @@ -107,21 +108,16 @@ async def user_has_policy(
@maybe_sync
async def create_arborist_policy(
arborist_client: ArboristClient,
resource_path: str,
resource_paths: list[str],
resource_description: str = None,
):
# create the resource
logger.debug(f"Attempting to create resource {resource_path} in Arborist")
resources = resource_path.split("/")
resource_name = resources[-1]
parent_path = "/".join(resources[:-1])
resource = {
"name": resource_name,
"description": resource_description,
}
res = arborist_client.create_resource(parent_path, resource, create_parents=True)
if inspect.isawaitable(res):
await res
"""
For backwards compatibility, when given a `resource_path[s]` instead of a
`policy_id` (and without existing `role_ids`), we automatically generate
a policy with `accessor` access to the provided `resource_paths`.
"""
for resource_path in resource_paths:
await create_resource(arborist_client, resource_path, resource_description)

# Create the roles needed to query and download data.
# If they already exist arborist would "Do Nothing"
Expand Down Expand Up @@ -166,13 +162,13 @@ async def create_arborist_policy(
await res

# create the policy
policy_id = get_auto_policy_id_for_resource_path(resource_path)
policy_id = get_auto_policy_id_for_resource_paths(resource_paths)
logger.debug(f"Attempting to create policy {policy_id} in Arborist")
policy = {
"id": policy_id,
"description": "policy created by requestor",
"role_ids": ["peregrine_reader", "guppy_reader", "fence_storage_reader"],
"resource_paths": [resource_path],
"resource_paths": resource_paths,
}
res = arborist_client.create_policy(policy, skip_if_exists=True)
if inspect.isawaitable(res):
Expand All @@ -181,6 +177,99 @@ async def create_arborist_policy(
return policy_id


@maybe_sync
async def create_arborist_policy_for_role_ids(
arborist_client: ArboristClient,
role_ids: list[str],
resource_paths: list[str],
resource_description: str = None,
):
for resource_path in resource_paths:
await create_resource(arborist_client, resource_path, resource_description)

# create the policy
policy_id = get_auto_policy_id_for_resource_paths_and_role_ids(
resource_paths, role_ids
)
logger.debug(f"Attempting to create policy {policy_id} in Arborist")
policy = {
"id": policy_id,
"description": "policy created by requestor",
"role_ids": role_ids,
"resource_paths": resource_paths,
}
res = arborist_client.create_policy(policy, skip_if_exists=True)
if inspect.isawaitable(res):
await res

return policy_id


@maybe_sync
async def create_resource(
arborist_client: ArboristClient,
resource_path: str,
resource_description: str = None,
):
# create the resources
logger.debug(f"Attempting to create resource {resource_path} in Arborist")
resources = resource_path.split("/")
resource_name = resources[-1]
parent_path = "/".join(resources[:-1])
resource = {
"name": resource_name,
"description": resource_description,
}
res = arborist_client.create_resource(parent_path, resource, create_parents=True)
if inspect.isawaitable(res):
await res


def get_auto_policy_id_for_resource_paths_and_role_ids(
resource_paths: list[str], role_ids: list[str]
) -> str:
"""
Create a policy_name given role_ids and resource_paths with format
'[resource_paths]_[role_ids]'
where items have been concatenated with underscore ('_').
The logic for each resource_path matches `get_auto_policy_id_for_resource_path`:
- content up to and including first slash ('/') is removed
- following slashes are replaced with a dot ('.').
The logic for the role_ids is:
- slashes are removed.
As an example, with
resource_paths=['/study/123456','other_path/study/7890', '/another_resource']
and
role_ids = ["study_registrant", "/other-resource-user", "/study-user"]
the expected result is
policy_id = 'study.123456_study.7890_another_resource_study_registrant_other-resource-user_study-user'
See `test_get_auto_policy_id_for_resource_paths_and_role_ids` for more examples.
"""
resources = "_".join([".".join(r.split("/")[1:]) for r in resource_paths])
roles = "_".join(["".join(r.split("/")) for r in role_ids])
policy_id = resources + "_" + roles
return policy_id


@maybe_sync
async def list_roles(arborist_client: ArboristClient) -> dict:
"""
We can cache this data later if needed, but it's tricky - the length
we can cache depends on the source of the information, so this MUST
invalidate the cache whenever Arborist adds a role.
For now, just make a call to Arborist every time we need this information.
"""
res = arborist_client.list_roles()
if inspect.isawaitable(res):
res = await res

return res


async def grant_user_access_to_policy(
arborist_client: ArboristClient,
username: str,
Expand Down
71 changes: 53 additions & 18 deletions src/requestor/routes/manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@ class CreateRequestInput(BaseModel):

username: str = None
resource_path: str = None
resource_paths: list[str] = None
resource_id: str = None
resource_display_name: str = None
status: str = None
policy_id: str = None
role_ids: list[str] = None


async def grant_or_revoke_arborist_policy(arborist_client, policy_id, username, revoke):
Expand Down Expand Up @@ -89,27 +91,49 @@ async def create_request(
f"Creating request. request_id: {request_id}. Received body: {data}. Revoke: {'revoke' in api_request.query_params}"
)

# error (if we have both resource_path and policy_id) OR (if we have neither)
if bool(data.get("resource_path")) == bool(data.get("policy_id")):
msg = f"The request must have either a resource_path or a policy_id."
logger.error(
msg + f" body: {body}",
exc_info=True,
)
raise HTTPException(
HTTP_400_BAD_REQUEST,
msg,
)
# error (if we have both policy_id and (resource_path or resource_paths))
# OR (if we have neither)
if bool(data.get("policy_id")) == (
bool(data.get("resource_paths")) or bool(data.get("resource_path"))
):
msg = f"The request must have either resource_paths or a policy_id."
raise_error(logger, msg, body)

# error if we have both role_ids and policy_id
if data.get("role_ids") and data.get("policy_id"):
msg = f"The request cannot have both role_ids and policy_id."
raise_error(logger, msg, body)

# cast resource_path as list if resource_paths is not present
if data.get("resource_path") and not data.get("resource_paths"):
data["resource_paths"] = [data["resource_path"]]

resource_paths = None
client = api_request.app.arborist_client

if not data["policy_id"]:
# fallback to body `resource_path` for backwards compatibility
data["policy_id"] = await arborist.create_arborist_policy(
client, data["resource_path"]
)
resource_paths = [data["resource_path"]]
if data.get("role_ids") and data.get("resource_paths"):

existing_roles = await arborist.list_roles(client)
existing_role_ids = [item["id"] for item in existing_roles["roles"]]
# Check if any role_id does not exist in arborist
roles_not_found = list(set(data["role_ids"]) - set(existing_role_ids))
if roles_not_found:
raise HTTPException(
HTTP_400_BAD_REQUEST,
f"Request creation failed. The roles {roles_not_found} do not exist.",
)

data["policy_id"] = await arborist.create_arborist_policy_for_role_ids(
client, data["role_ids"], data["resource_paths"]
)
resource_paths = data["resource_paths"]
else:
# else fallback to body `resource_paths` for backwards compatibility
data["policy_id"] = await arborist.create_arborist_policy(
client, data["resource_paths"]
)
resource_paths = data["resource_paths"]
else:
existing_policies = await arborist.list_policies(client, expand=True)

Expand Down Expand Up @@ -198,8 +222,8 @@ async def create_request(
msg,
)

# we don't store `resource_path` in the database, so get rid of it
del data["resource_path"]
# remove any fields that are not stored in requests table
[data.pop(key) for key in ["resource_path", "resource_paths", "role_ids"]]

if draft_previous_requests:
# reuse the draft request
Expand Down Expand Up @@ -398,5 +422,16 @@ async def delete_request(
return {"request_id": request_id}


def raise_error(logger, msg: str, body: CreateRequestInput):
logger.error(
msg + f" body: {body}",
exc_info=True,
)
raise HTTPException(
HTTP_400_BAD_REQUEST,
msg,
)


def init_app(app: FastAPI):
app.include_router(router, tags=["Manage"])
56 changes: 56 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,62 @@ def list_policies_patcher(test_data):
policy_expand_patch.stop()


@pytest.fixture(scope="function")
def list_roles_patcher():
"""
This fixture patches the list_roles method with a mock implementation based on
the test_data provided which is a dictionary consisting of the
role_id and the associated permissions.
"""

future = asyncio.Future()
future.set_result(
{
"roles": [
{
"id": "study_registrant",
"permissions": [
{
"id": "study_registration",
"action": {
"service": "study_registration",
"method": "access",
},
},
],
},
{
"id": "/mds_user",
"permissions": [
{
"id": "mds_access",
"action": {"service": "mds_gateway", "method": "access"},
},
],
},
{
"id": "/study_user",
"permissions": [
{
"id": "study_access",
"action": {"service": "study_access", "method": "access"},
},
],
},
]
}
)

list_roles_mock = MagicMock()
list_roles_mock.return_value = future
role_patch = patch("requestor.routes.query.arborist.list_roles", list_roles_mock)
role_patch.start()

yield

role_patch.stop()


@pytest.fixture(autouse=True, scope="function")
def access_token_patcher(client, request):
async def get_access_token(*args, **kwargs):
Expand Down

0 comments on commit 63442fd

Please sign in to comment.