-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
30 changed files
with
2,789 additions
and
4,805 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,5 +11,3 @@ exclude_lines = | |
ignore_errors = True | ||
fail_under = 50 | ||
show_missing = True | ||
|
||
omit = caliban_toolbox/deprecated/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
# Copyright 2016-2020 David Van Valen at California Institute of Technology | ||
# (Caltech), with support from the Paul Allen Family Foundation, Google, | ||
# & National Institutes of Health (NIH) under Grant U24CA224309-01. | ||
# All rights reserved. | ||
# | ||
# Licensed under a modified Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE | ||
# | ||
# The Work provided may be used for non-commercial academic purposes only. | ||
# For any other use of the Work, including commercial use, please contact: | ||
# vanvalenlab@gmail.com | ||
# | ||
# Neither the name of Caltech nor the names of its contributors may be used | ||
# to endorse or promote products derived from this software without specific | ||
# prior written permission. | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
import sys | ||
import boto3 | ||
import os | ||
import threading | ||
import re | ||
|
||
from urllib.parse import urlencode | ||
|
||
import numpy as np | ||
from getpass import getpass | ||
|
||
from caliban_toolbox.utils.utils import get_img_names, list_npzs_folder | ||
|
||
|
||
# Taken from AWS Documentation | ||
class ProgressPercentage(object): | ||
def __init__(self, filename): | ||
self._filename = filename | ||
self._size = float(os.path.getsize(filename)) | ||
self._seen_so_far = 0 | ||
self._lock = threading.Lock() | ||
|
||
def __call__(self, bytes_amount): | ||
with self._lock: | ||
self._seen_so_far += bytes_amount | ||
percentage = (self._seen_so_far / self._size) * 100 | ||
sys.stdout.write( | ||
"\r%s %s / %s (%.2f%%)" % ( | ||
self._filename, self._seen_so_far, self._size, | ||
percentage)) | ||
sys.stdout.flush() | ||
|
||
|
||
def connect_aws(): | ||
AWS_ACCESS_KEY_ID = getpass('What is your AWS access key id? ') | ||
AWS_SECRET_ACCESS_KEY = getpass('What is your AWS secret access key id? ') | ||
|
||
session = boto3.Session(aws_access_key_id=AWS_ACCESS_KEY_ID, | ||
aws_secret_access_key=AWS_SECRET_ACCESS_KEY) | ||
print('Connected to AWS') | ||
s3 = session.client('s3') | ||
|
||
return s3 | ||
|
||
|
||
def aws_upload_files(aws_folder, stage, upload_folder, pixel_only, label_only, rgb_mode): | ||
"""Uploads files to AWS bucket for use in Figure 8 | ||
Args: | ||
aws_folder: folder where uploaded files will be stored | ||
stage: specifies stage in pipeline for jobs requiring multiple rounds of annotation | ||
upload_folder: path to folder containing files that will be uploaded | ||
pixel_only: boolean flag to set pixel_only mode | ||
label_only: boolean flag to set label_only mode | ||
rgb_mode: boolean flag to set rgb_mode | ||
""" | ||
|
||
s3 = connect_aws() | ||
|
||
# load the images from specified folder but not the json log file | ||
files_to_upload = list_npzs_folder(upload_folder) | ||
|
||
filename_list = [] | ||
|
||
# change slashes separating nested folders to underscores for URL generation | ||
subfolders = re.split('/', aws_folder) | ||
subfolders = '__'.join(subfolders) | ||
|
||
url_dict = {'pixel_only': pixel_only, 'label_only': label_only, 'rgb': rgb_mode} | ||
url_encoded_dict = urlencode(url_dict) | ||
|
||
# upload images | ||
for img in files_to_upload: | ||
|
||
# full path to image | ||
img_path = os.path.join(upload_folder, img) | ||
|
||
# destination path | ||
img_key = os.path.join(aws_folder, stage, img) | ||
|
||
# upload | ||
s3.upload_file(img_path, 'caliban-input', img_key, Callback=ProgressPercentage(img_path), | ||
ExtraArgs={'ACL': 'public-read', 'Metadata': {'source_path': img_path}}) | ||
print('\n') | ||
|
||
url = 'https://caliban.deepcell.org/{}__{}__{}__' \ | ||
'{}__{}?{}'.format('caliban-input', 'caliban-output', subfolders, stage, img, | ||
url_encoded_dict) | ||
|
||
# add caliban url to list | ||
filename_list.append(url) | ||
|
||
return files_to_upload, filename_list | ||
|
||
|
||
def aws_transfer_file(s3, input_bucket, output_bucket, key_src, key_dst): | ||
"""Helper function to transfer files from one bucket/key to another. Used | ||
in conjunction with a soon-to-be-created transfer jobs script for jobs with multiple stages""" | ||
|
||
copy_source = {'Bucket': output_bucket, | ||
'Key': key_src} | ||
|
||
s3.copy(copy_source, input_bucket, key_dst, | ||
ExtraArgs={'ACL': 'public-read'}) | ||
|
||
|
||
def aws_download_files(upload_log, output_dir): | ||
"""Download files following Figure 8 annotation. | ||
Args: | ||
upload_log: pandas file containing information from upload process | ||
output_dir: directory where files will be saved | ||
""" | ||
|
||
s3 = connect_aws() | ||
|
||
# get files | ||
files_to_download = upload_log['filename'] | ||
aws_folder = upload_log['aws_folder'][0] | ||
stage = upload_log['stage'][0] | ||
|
||
# download all images | ||
for img in files_to_download: | ||
|
||
# full path to save image | ||
save_path = os.path.join(output_dir, img) | ||
|
||
# path to file in aws | ||
img_path = os.path.join(aws_folder, stage, img) | ||
|
||
s3.download_file(Bucket='caliban-output', Key=img_path, Filename=save_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
# Copyright 2016-2020 David Van Valen at California Institute of Technology | ||
# (Caltech), with support from the Paul Allen Family Foundation, Google, | ||
# & National Institutes of Health (NIH) under Grant U24CA224309-01. | ||
# All rights reserved. | ||
# | ||
# Licensed under a modified Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE | ||
# | ||
# The Work provided may be used for non-commercial academic purposes only. | ||
# For any other use of the Work, including commercial use, please contact: | ||
# vanvalenlab@gmail.com | ||
# | ||
# Neither the name of Caltech nor the names of its contributors may be used | ||
# to endorse or promote products derived from this software without specific | ||
# prior written permission. | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
import requests | ||
import os | ||
import stat | ||
import zipfile | ||
import pandas as pd | ||
import urllib | ||
|
||
from getpass import getpass | ||
from caliban_toolbox.log_file import create_upload_log | ||
from caliban_toolbox.aws_functions import aws_upload_files, aws_download_files | ||
|
||
|
||
def copy_job(job_id, key): | ||
"""Helper function to create a Figure 8 job based on existing job. | ||
Args: | ||
job_id: ID number of job to copy instructions and settings from when creating new job | ||
key: API key to access Figure 8 account | ||
Returns: | ||
int: ID number of job created | ||
""" | ||
|
||
url = 'https://api.appen.com/v1/jobs/{}/copy.json?'.format(str(job_id)) | ||
API_key = {"key": key} | ||
|
||
new_job = requests.get(url, params=API_key) | ||
if new_job.status_code != 200: | ||
print("copy_job not successful. Status code: ", new_job.status_code) | ||
new_job_id = new_job.json()['id'] | ||
|
||
return new_job_id | ||
|
||
|
||
def upload_data(csv_path, job_id, key): | ||
"""Add data to an existing Figure 8 job by uploading a CSV file | ||
Args: | ||
csv_path: full path to csv | ||
job_id: ID number of job to upload data to | ||
key: API key to access Figure 8 account | ||
""" | ||
|
||
# format url with appropriate arguments | ||
url = "https://api.appen.com/v1/jobs/{}/upload.json?{}" | ||
url_dict = {'key': key, 'force': True} | ||
url_encoded_dict = urllib.parse.urlencode(url_dict) | ||
url = url.format(job_id, url_encoded_dict) | ||
|
||
csv_file = open(csv_path, 'r') | ||
csv_data = csv_file.read() | ||
|
||
headers = {"Content-Type": "text/csv"} | ||
|
||
add_data = requests.put(url, data=csv_data, headers=headers) | ||
if add_data.status_code != 200: | ||
print("Upload_data not successful. Status code: ", add_data.status_code) | ||
else: | ||
print("Data successfully uploaded to Figure Eight.") | ||
|
||
|
||
def create_figure_eight_job(base_dir, job_id_to_copy, aws_folder, stage, | ||
rgb_mode=False, label_only=False, pixel_only=False): | ||
"""Create a Figure 8 job and upload data to it. New job ID printed out for convenience. | ||
Args: | ||
base_dir: full path to directory that contains CSV files | ||
job_id_to_copy: ID number of Figure 8 job to use as template for new job | ||
aws_folder: folder in aws bucket where files be stored | ||
stage: specifies stage in pipeline for jobs requiring multiple rounds of annotation | ||
pixel_only: flag specifying whether annotators will be restricted to pixel edit mode | ||
label_only: flag specifying whether annotators will be restricted to label edit mode | ||
rgb_mode: flag specifying whether annotators will view images in RGB mode | ||
""" | ||
|
||
key = str(getpass("Figure eight api key? ")) | ||
|
||
# copy job without data | ||
new_job_id = copy_job(job_id_to_copy, key) | ||
if new_job_id == -1: | ||
return | ||
print('New job ID is: ' + str(new_job_id)) | ||
|
||
# upload files to AWS bucket | ||
upload_folder = os.path.join(base_dir, 'crop_dir') | ||
filenames, filepaths = aws_upload_files(aws_folder=aws_folder, stage=stage, | ||
upload_folder=upload_folder, pixel_only=pixel_only, | ||
rgb_mode=rgb_mode, label_only=label_only) | ||
|
||
# Generate log file for current job | ||
create_upload_log(base_dir=base_dir, stage=stage, aws_folder=aws_folder, | ||
filenames=filenames, filepaths=filepaths, job_id=new_job_id, | ||
pixel_only=pixel_only, rgb_mode=rgb_mode, label_only=label_only) | ||
|
||
# upload NPZs using log file | ||
upload_data(os.path.join(base_dir, 'logs/stage_0_upload_log.csv'), new_job_id, key) | ||
|
||
|
||
def download_report(job_id, log_dir): | ||
"""Download job report from Figure 8 | ||
Args: | ||
job_id: Figure 8 job id | ||
log_dir: full path to log_dir where report will be saved | ||
""" | ||
|
||
if not os.path.isdir(log_dir): | ||
print('Log directory does not exist: have you uploaded this job to Figure 8?') | ||
os.makedirs(log_dir) | ||
|
||
# add folder modification permissions to deal with files from file explorer | ||
mode = stat.S_IRWXO | stat.S_IRWXU | stat.S_IRWXG | ||
os.chmod(log_dir, mode) | ||
|
||
save_path = os.path.join(log_dir, 'job_report.zip') | ||
|
||
# password prompt for api info | ||
key = str(getpass("Please enter your Figure Eight API key:")) | ||
|
||
# construct url | ||
url = "https://api.appen.com/v1/jobs/{}.csv?".format(job_id) | ||
|
||
params = {"type": 'full', "key": key} | ||
|
||
# make http request: python requests handles redirects | ||
csv_request = requests.get(url, params=params, allow_redirects=True) | ||
open(save_path, 'wb').write(csv_request.content) | ||
print('Report saved to folder') | ||
|
||
|
||
def unzip_report(log_dir): | ||
"""Unzips .csv file and renames it appropriately | ||
Args: | ||
log_dir: full path to log_dir for saving zip | ||
""" | ||
|
||
# Extract zip | ||
zip_path = os.path.join(log_dir, 'job_report.zip') | ||
with zipfile.ZipFile(zip_path, "r") as zip_ref: | ||
default_name = zip_ref.namelist()[0] # get filename so can rename later | ||
zip_ref.extractall(log_dir) | ||
|
||
# rename from Figure 8 default | ||
default_name_path = os.path.join(log_dir, default_name) # should only be one file in zip | ||
new_name_path = os.path.join(log_dir, 'job_report.csv') | ||
os.rename(default_name_path, new_name_path) | ||
|
||
|
||
def download_figure_eight_output(base_dir): | ||
"""Gets annotated files from a Figure 8 job | ||
Args: | ||
base_dir: directory containing relevant job files | ||
""" | ||
|
||
# get information from job creation | ||
# TODO: check for latest stage job report and use that one | ||
log_file = pd.read_csv(os.path.join(base_dir, 'logs/stage_0_upload_log.csv')) | ||
job_id = log_file['job_id'][0] | ||
|
||
# download Figure 8 report | ||
log_dir = os.path.join(base_dir, 'logs') | ||
download_report(job_id=job_id, log_dir=log_dir) | ||
unzip_report(log_dir=log_dir) | ||
|
||
# download annotations from aws | ||
output_dir = os.path.join(base_dir, 'output') | ||
if not os.path.isdir(output_dir): | ||
os.makedirs(output_dir) | ||
|
||
upload_log = pd.read_csv(os.path.join(base_dir, 'logs/stage_0_upload_log.csv')) | ||
aws_download_files(upload_log, output_dir) |
Oops, something went wrong.