forked from snakemake/snakemake
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding google life sciences logging save to storage bucket
Signed-off-by: vsoch <vsochat@stanford.edu>
- Loading branch information
Showing
2 changed files
with
161 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#!/usr/bin/env python | ||
|
||
# This is a helper script for the Google Life Sciences instance to be able to: | ||
# 1. download a blob from storage, which is required at the onset of the Snakemake | ||
# gls.py download <bucket> <source> <destination> | ||
# workflow step to obtain the working directory. | ||
# 2. Upload logs back to storage (or some specified directory of files) | ||
# gls.py save <bucket> <source-dir> <destination-dir> | ||
# gls.py save <bucket> /google/logs/output source/logs | ||
|
||
import argparse | ||
import datetime | ||
|
||
from google.cloud import storage | ||
from glob import glob | ||
import sys | ||
import os | ||
|
||
|
||
def download_blob(bucket_name, source_blob_name, destination_file_name): | ||
"""Downloads a blob from the bucket.""" | ||
storage_client = storage.Client() | ||
bucket = storage_client.get_bucket(bucket_name) | ||
blob = bucket.blob(source_blob_name) | ||
|
||
blob.download_to_filename(destination_file_name) | ||
|
||
print("Blob {} downloaded to {}.".format(source_blob_name, destination_file_name)) | ||
|
||
|
||
def save_files(bucket_name, source_path, destination_path): | ||
"""given a directory path, save all files recursively to storage | ||
""" | ||
storage_client = storage.Client() | ||
bucket = storage_client.get_bucket(bucket_name) | ||
|
||
# destination path should be stripped of path indicators too | ||
bucket_name = bucket_name.strip("/") | ||
destination_path = destination_path.strip("/") | ||
|
||
# These are fullpaths | ||
filenames = get_source_files(source_path) | ||
print("\nThe following files will be uploaded: %s" % "\n".join(filenames)) | ||
|
||
if not filenames: | ||
print("Did not find any filenames under %s" % source_path) | ||
|
||
# Do the upload! | ||
for filename in filenames: | ||
|
||
# The relative path of the filename from the source path | ||
relative_path = filename.replace(source_path, "", 1).strip("/") | ||
|
||
# The path in storage includes relative path from destination_path | ||
storage_path = os.path.join(destination_path, relative_path) | ||
full_path = os.path.join(bucket_name, storage_path) | ||
print(f"{filename} -> {full_path}") | ||
|
||
# Get the blob | ||
blob = bucket.blob(storage_path) | ||
if not blob.exists(): | ||
print("Uploading %s to %s" % (filename, full_path)) | ||
blob.upload_from_filename(filename) | ||
|
||
|
||
def get_source_files(source_path): | ||
"""Given a directory, return a listing of files to upload | ||
""" | ||
filenames = [] | ||
if not os.path.exists(source_path): | ||
print("%s does not exist!" % source_path) | ||
sys.exit(0) | ||
|
||
for x in os.walk(source_path): | ||
for name in glob(os.path.join(x[0], "*")): | ||
if not os.path.isdir(name): | ||
filenames.append(name) | ||
return filenames | ||
|
||
|
||
def add_ending_slash(filename): | ||
"""Since we want to replace based on having an ending slash, ensure it's there | ||
""" | ||
if not filename.endswith("/"): | ||
filename = "%s/" % filename | ||
return filename | ||
|
||
|
||
def blob_commands(args): | ||
if args.command == "download": | ||
download_blob( | ||
args.bucket_name, args.source_blob_name, args.destination_file_name | ||
) | ||
elif args.command == "save": | ||
save_files(args.bucket_name, args.source_path, args.destination_path) | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser( | ||
formatter_class=argparse.RawDescriptionHelpFormatter | ||
) | ||
|
||
subparsers = parser.add_subparsers(dest="command") | ||
|
||
# Download file from storage | ||
download_parser = subparsers.add_parser("download", help=download_blob.__doc__) | ||
download_parser.add_argument("bucket_name", help="Your cloud storage bucket.") | ||
download_parser.add_argument("source_blob_name") | ||
download_parser.add_argument("destination_file_name") | ||
|
||
# Save logs to storage | ||
save_parser = subparsers.add_parser("save", help=save_files.__doc__) | ||
save_parser.add_argument("bucket_name", help="Your cloud storage bucket.") | ||
save_parser.add_argument("source_path") | ||
save_parser.add_argument("destination_path") | ||
|
||
args = parser.parse_args() | ||
blob_commands(args) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |