Skip to content

Commit

Permalink
Merge 19c8de7 into f5a712b
Browse files Browse the repository at this point in the history
  • Loading branch information
paulineribeyre committed Dec 14, 2019
2 parents f5a712b + 19c8de7 commit 005fb1b
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 20 deletions.
12 changes: 6 additions & 6 deletions docs/usersync.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Usersync

Usersync is a script that parses user access information from multiple sources (user.yaml files, dbGaP user authorization telemetry files AKA whitelists) and keeps users' access to Gen3 resources up to date by updating the Fence and Arborist databases.
Usersync is a script that parses user access information from multiple sources (user.yaml files, dbGaP user authorization "telemetry" files AKA whitelists) and keeps users' access to Gen3 resources up to date by updating the Fence and Arborist databases.

## Usersync flow

![Usersync Flow](images/usersync.png)

> Note that at the time of writing, the user.yaml file overrides the access obtained from the telemetry files. In the future, usersync will combine the access instead.
> The access from the user.yaml file and the dbGaP authorization files is combined (see example below), but the user.yaml file overrides the user information (such as email) obtained from the dbGaP authorization files.
## Usersync result example

Expand Down Expand Up @@ -125,7 +125,7 @@ users:
```
</details>

### Example of telemetry file (CSV format):
### Example of dbGaP authorization file (CSV format):

```
user name, login, authority, role, email, phone, status, phsid, permission set, created
Expand All @@ -135,7 +135,7 @@ Mrs. GHI,GHI,eRA,PI,ghi@com,"123-456-789",active,phs3.v2.p3.c4,"General Research

Usersync gives users "read" and "read-storage" permissions to the dbGaP studies.

> Note: The dbGaP telemetry files contain consent codes that can be parsed by usersync: [more details here](dbgap_info.md). This simplified example does not include consent code parsing.
> Note: The dbGaP authorization files contain consent codes that can be parsed by usersync: [more details here](dbgap_info.md). This simplified example does not include consent code parsing.
### Resulting access:

Expand All @@ -148,9 +148,9 @@ Usersync gives users "read" and "read-storage" permissions to the dbGaP studies.
- /open: read + read-storage
- /programs/phs1: read
- /programs/phs2: read
- /programs/phs3: read + read-storage _(from the telemetry file)_
- /programs/phs3: read + read-storage _(from the dbGaP authorization file)_
- user GHI:
- /programs/phs3: create _(user.yaml access overrides telemetry file access)_
- /programs/phs3: read + read-storage + create _(user.yaml access combined with dbGaP authorization file access)_

## Validation

Expand Down
52 changes: 41 additions & 11 deletions fence/sync/sync_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,13 +571,15 @@ def _add_dbgap_project_for_user(
@staticmethod
def sync_two_user_info_dict(user_info1, user_info2):
"""
Merge user_info1 into user_info2, which are both nested dicts like:
{username: {'email': 'abc@email.com'}}
Merge user_info1 into user_info2. Values in user_info2 are overriden
by values in user_info1. user_info2 ends up containing the merged dict.
Args:
user_info1 (dict)
user_info2 (dict)
user_info1 (dict): nested dict
user_info2 (dict): nested dict
Example:
{username: {'email': 'abc@email.com'}}
Returns:
None
Expand All @@ -587,11 +589,13 @@ def sync_two_user_info_dict(user_info1, user_info2):
@staticmethod
def sync_two_phsids_dict(phsids1, phsids2):
"""
Merge pshid1 into phsids2
Merge pshid1 into phsids2. phsids2 ends up containing the merged dict
(see explanation below).
Args:
phsids1, phsids2: nested dicts mapping phsids to sets of permissions
Example:
{
username: {
phsid1: {'read-storage','write-storage'},
Expand Down Expand Up @@ -1033,25 +1037,33 @@ def _sync(self, sess):
key.lower(): value for key, value in user_yaml.projects.items()
}

self.sync_two_phsids_dict(user_projects_csv, user_projects)
# merge all user info dicts into "user_info".
# the user info (such as email) in the user.yaml files
# overrides the user info from the CSV files.
self.sync_two_user_info_dict(user_info_csv, user_info)
self.sync_two_user_info_dict(user_yaml.user_info, user_info)

# privileges in yaml files overide ones in csv files
# merge all access info dicts into "user_projects".
# the access info is combined - if the user.yaml access is
# ["read"] and the CSV file access is ["read-storage"], the
# resulting access is ["read", "read-storage"].
self.sync_two_phsids_dict(user_projects_csv, user_projects)
self.sync_two_phsids_dict(user_yaml.projects, user_projects)
self.sync_two_user_info_dict(user_yaml.user_info, user_info)

if self.parse_consent_code:
self._grant_all_consents_to_c999_users(
user_projects, user_yaml.project_to_resource
)

# update the Fence DB
if user_projects:
self.logger.info("Sync to db and storage backend")
self.sync_to_db_and_storage_backend(user_projects, user_info, sess)
self.logger.info("Finish syncing to db and storage backend")
else:
self.logger.info("No users for syncing")

# update the Arborist DB (resources, roles, policies, groups)
if user_yaml.authz:
if not self.arborist_client:
raise EnvironmentError(
Expand All @@ -1066,8 +1078,9 @@ def _sync(self, sess):
self.logger.error("Could not synchronize successfully")
exit(1)
else:
self.logger.info("No resources specified; skipping arborist sync")
self.logger.info("No `authz` section; skipping arborist sync")

# update the Arborist DB (user access)
if self.arborist_client:
self.logger.info("Synchronizing arborist with authorization info...")
success = self._update_authz_in_arborist(sess, user_projects, user_yaml)
Expand Down Expand Up @@ -1178,6 +1191,7 @@ def _update_arborist(self, session, user_yaml):
self.logger.error(e)
# keep going; maybe just some conflicts from things existing already

# update roles
roles = user_yaml.authz.get("roles", [])
for role in roles:
try:
Expand All @@ -1188,6 +1202,7 @@ def _update_arborist(self, session, user_yaml):
self.logger.error(e)
# keep going; maybe just some conflicts from things existing already

# update policies
policies = user_yaml.authz.get("policies", [])
for policy in policies:
policy_id = policy.pop("id")
Expand All @@ -1206,7 +1221,21 @@ def _update_arborist(self, session, user_yaml):
self.logger.debug("Upserted policy with id {}".format(policy_id))
self._created_policies.add(policy_id)

# update groups
groups = user_yaml.authz.get("groups", [])

# delete from arborist the groups that have been deleted
# from the user.yaml
arborist_groups = set(
g["name"] for g in self.arborist_client.list_groups().get("groups", [])
)
useryaml_groups = set(g["name"] for g in groups)
for deleted_group in arborist_groups.difference(useryaml_groups):
# do not try to delete built in groups
if deleted_group not in ["anonymous", "logged-in"]:
self.arborist_client.delete_group(deleted_group)

# create/update the groups defined in the user.yaml
for group in groups:
missing = {"name", "users", "policies"}.difference(set(group.keys()))
if missing:
Expand All @@ -1218,7 +1247,8 @@ def _update_arborist(self, session, user_yaml):
try:
response = self.arborist_client.put_group(
group["name"],
description=group.get("description", ""),
# Arborist doesn't handle group descriptions yet
# description=group.get("description", ""),
users=group["users"],
policies=group["policies"],
)
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Flask-CORS==3.0.3
Flask_OAuthlib==0.9.4
flask-restful==0.3.6
Flask_SQLAlchemy_Session==1.1
gen3authz==0.2.3
gen3authz==0.3.0
gen3config==0.1.7
gen3cirrus==1.1.2
gen3users
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"Flask-CORS>=3.0.3,<4.0.0",
"Flask_OAuthlib>=0.9.4,<1.0.0",
"Flask_SQLAlchemy_Session>=1.1,<2.0",
"gen3authz>=0.2.0,<0.3.0",
"gen3authz>=0.3.0,<0.4.0",
"gen3cirrus>=1.1.0,<2.0",
"gen3config>=0.1.6,<1.0.0",
"google_api_python_client>=1.6.4,<2.0.0",
Expand Down
2 changes: 1 addition & 1 deletion tests/dbgap_sync/test_user_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ def test_sync_two_phsids_dict(syncer, db_session, storage_client):


@pytest.mark.parametrize("syncer", ["google", "cleversafe"], indirect=True)
def test_sync_two_phsids_dict_override(syncer, db_session, storage_client):
def test_sync_two_phsids_dict_combine(syncer, db_session, storage_client):
phsids1 = {
"userA": {
"phs000178": {"read", "read-storage"},
Expand Down

0 comments on commit 005fb1b

Please sign in to comment.