Skip to content

Commit

Permalink
Merge pull request #745 from uc-cdis/chore/dbgap_backup
Browse files Browse the repository at this point in the history
Chore/dbgap backup
  • Loading branch information
diw2 committed Jun 25, 2020
2 parents 0743817 + 9fc2e99 commit 6c89664
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 23 deletions.
17 changes: 17 additions & 0 deletions bin/fence-create
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ from fence.scripting.fence_create import (
remove_expired_google_accounts_from_proxy_groups,
remove_expired_google_service_account_keys,
sync_users,
download_dbgap_files,
delete_expired_service_accounts,
verify_bucket_access_group,
verify_user_registration,
Expand Down Expand Up @@ -158,6 +159,14 @@ def parse_arguments():
help="the base URL for the arborist service to sync to",
default=None,
)
dbgap_sync.add_argument(
"--folder", required=False, help="destination where dbGaP whitelist files are saved", default=None,
)

dbgap_download = subparsers.add_parser("dbgap-download-access-files")
dbgap_download.add_argument(
"--folder", required=False, help="destination where dbGaP whitelist files are saved", default=None,
)

bucket_link_to_project = subparsers.add_parser("link-bucket-to-project")
bucket_link_to_project.add_argument(
Expand Down Expand Up @@ -420,8 +429,16 @@ def main():
is_sync_from_dbgap_server=str2bool(args.sync_from_dbgap),
sync_from_local_csv_dir=args.csv_dir,
sync_from_local_yaml_file=args.yaml,
folder=args.folder,
arborist=arborist,
)
elif args.action == "dbgap-download-access-files":
download_dbgap_files(
dbGaP,
STORAGE_CREDENTIALS,
DB,
folder=args.folder,
)
elif args.action == "google-manage-keys":
remove_expired_google_service_account_keys(DB)
elif args.action == "google-init":
Expand Down
9 changes: 9 additions & 0 deletions docs/usersync.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ dbGaP:

An example can be found in the config used for unit testing [tests/test-fence-config.yaml](https://github.com/uc-cdis/fence/blob/master/tests/test-fence-config.yaml)

### Enable dbGaP backup to s3:
Add below to manifest.json to global block to copy dbGaP authorization files to <dbgap_backup_bucket>:
```
+ "dbgap_backup_bucket": "s3://di-planx-test",
+ "aws_sa_rule_arn": "arn:aws:iam::707767160287:role/devplanetv1-dbgap-bakup-role",
```
More deployment details can be found here: https://github.com/uc-cdis/cdis-wiki/blob/master/ops/DBGAP-backup-deployment.md

## Usersync result example

### Example of user.yaml file:
Expand Down Expand Up @@ -187,3 +195,4 @@ The [gen3users CLI](https://github.com/uc-cdis/gen3users) includes a user.yaml v
pip install gen3users
gen3users validate user.yaml
```

60 changes: 58 additions & 2 deletions fence/scripting/fence_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def _remove_client_service_accounts(db_session, client):
)


def sync_users(
def init_syncer(
dbGaP,
STORAGE_CREDENTIALS,
DB,
Expand All @@ -187,6 +187,7 @@ def sync_users(
sync_from_local_csv_dir=None,
sync_from_local_yaml_file=None,
arborist=None,
folder=None,
):
"""
sync ACL files from dbGap to auth db and storage backends
Expand Down Expand Up @@ -235,7 +236,7 @@ def sync_users(
except IOError:
pass

syncer = UserSyncer(
return UserSyncer(
dbGaP,
DB,
project_mapping=project_mapping,
Expand All @@ -244,7 +245,62 @@ def sync_users(
sync_from_local_csv_dir=sync_from_local_csv_dir,
sync_from_local_yaml_file=sync_from_local_yaml_file,
arborist=arborist,
folder=folder,
)


def download_dbgap_files(
# Note: need to keep all parameter to prevent download failure
dbGaP,
STORAGE_CREDENTIALS,
DB,
projects=None,
is_sync_from_dbgap_server=False,
sync_from_local_csv_dir=None,
sync_from_local_yaml_file=None,
arborist=None,
folder=None,
):
syncer = init_syncer(
dbGaP,
STORAGE_CREDENTIALS,
DB,
projects,
is_sync_from_dbgap_server,
sync_from_local_csv_dir,
sync_from_local_yaml_file,
arborist,
folder,
)
if not syncer:
exit(1)
syncer.download()


def sync_users(
dbGaP,
STORAGE_CREDENTIALS,
DB,
projects=None,
is_sync_from_dbgap_server=False,
sync_from_local_csv_dir=None,
sync_from_local_yaml_file=None,
arborist=None,
folder=None,
):
syncer = init_syncer(
dbGaP,
STORAGE_CREDENTIALS,
DB,
projects,
is_sync_from_dbgap_server,
sync_from_local_csv_dir,
sync_from_local_yaml_file,
arborist,
folder,
)
if not syncer:
exit(1)
syncer.sync()


Expand Down
60 changes: 43 additions & 17 deletions fence/sync/sync_users.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import errno
import glob
import os
import re
import shutil
import subprocess as sp
import tempfile
import yaml
import copy
from contextlib import contextmanager
Expand Down Expand Up @@ -288,6 +285,7 @@ def __init__(
sync_from_local_csv_dir=None,
sync_from_local_yaml_file=None,
arborist=None,
folder=None,
):
"""
Syncs ACL files from dbGap to auth database and storage backends
Expand All @@ -301,6 +299,7 @@ def __init__(
arborist:
ArboristClient instance if the syncer should also create
resources in arborist
folder: a local folder where dbgap telemetry files will sync to
"""
self.sync_from_local_csv_dir = sync_from_local_csv_dir
self.sync_from_local_yaml_file = sync_from_local_yaml_file
Expand All @@ -318,6 +317,7 @@ def __init__(
"user_syncer", log_level="debug" if config["DEBUG"] is True else "info"
)
self.arborist_client = arborist
self.folder = folder

if storage_credentials:
self.storage_manager = StorageManager(
Expand Down Expand Up @@ -456,6 +456,7 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True):
study_common_exchange_areas = dbgap_config.get(
"study_common_exchange_areas", {}
)

if parse_consent_code and enable_common_exchange_area_access:
self.logger.info(
f"using study to common exchange area mapping: {study_common_exchange_areas}"
Expand Down Expand Up @@ -1008,29 +1009,25 @@ def _process_dbgap_files(self, dbgap_config, sess):
user_info (dict)
"""
dbgap_file_list = []
tmpdir = tempfile.mkdtemp()
server = dbgap_config["info"]
protocol = dbgap_config["protocol"]
self.logger.info("Download from server")
hostname = dbgap_config["info"]["host"]
username = dbgap_config["info"]["username"]
folderdir = os.path.join(str(self.folder), str(hostname), str(username))

try:
if protocol == "sftp":
self._get_from_sftp_with_proxy(server, tmpdir)
if os.path.exists(folderdir):
dbgap_file_list = glob.glob(
os.path.join(folderdir, "*")
) # get lists of file from folder
else:
self._get_from_ftp_with_proxy(server, tmpdir)
dbgap_file_list = glob.glob(os.path.join(tmpdir, "*"))
dbgap_file_list = self._download(dbgap_config)
except Exception as e:
self.logger.error(e)
exit(1)
self.logger.info("dbgap files: {}".format(dbgap_file_list))
user_projects, user_info = self._get_user_permissions_from_csv_list(
dbgap_file_list, encrypted=True, session=sess, dbgap_config=dbgap_config
)
try:
shutil.rmtree(tmpdir)
except OSError as e:
self.logger.info(e)
if e.errno != errno.ENOENT:
raise

user_projects = self.parse_projects(user_projects)
return user_projects, user_info

Expand Down Expand Up @@ -1097,6 +1094,35 @@ def sync(self):
with self.driver.session as s:
self._sync(s)

def download(self):
for dbgap_server in self.dbGaP:
self._download(dbgap_server)

def _download(self, dbgap_config):
"""
Download files from dbgap server.
"""
server = dbgap_config["info"]
protocol = dbgap_config["protocol"]
hostname = server["host"]
username = server["username"]
folderdir = os.path.join(str(self.folder), str(hostname), str(username))

if not os.path.exists(folderdir):
os.makedirs(folderdir)

self.logger.info("Download from server")
try:
if protocol == "sftp":
self._get_from_sftp_with_proxy(server, folderdir)
else:
self._get_from_ftp_with_proxy(server, folderdir)
dbgap_files = glob.glob(os.path.join(folderdir, "*"))
return dbgap_files
except Exception as e:
self.logger.error(e)
exit(1)

def _sync(self, sess):
"""
Collect files from dbgap server(s), sync csv and yaml files to storage
Expand Down
10 changes: 6 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
Authlib==0.11
addict==2.2.1
authutils>=4.0.0,<5.0.0
boto>=2.36.0,<3.0.0
botocore>=1.7,<1.10.39
boto3>=1.5,<1.6
authutils>=4.0.0<5.0.0
awscli
boto>=2.36.0<3.0.0
botocore>=1.7<1.10.39
boto3>=1.5<1.6
cached_property==1.5.1
cdislogging>=1.0.0,<2.0.0
cdiserrors==0.1.2
Expand Down Expand Up @@ -40,4 +41,5 @@ userdatamodel==2.3.2
Werkzeug==0.16.0
pyyaml==5.1
retry==0.9.2

git+https://github.com/uc-cdis/storage-client.git@1.0.0#egg=storageclient

0 comments on commit 6c89664

Please sign in to comment.