Skip to content

Commit

Permalink
Merge 338e240 into 64c120d
Browse files Browse the repository at this point in the history
  • Loading branch information
ngreenwald committed Jul 9, 2020
2 parents 64c120d + 338e240 commit c2f9425
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 20 deletions.
11 changes: 3 additions & 8 deletions caliban_toolbox/aws_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,11 @@
import os
import sys
import threading
import re

import boto3
import botocore

from urllib.parse import urlencode
from getpass import getpass

from caliban_toolbox.utils.misc_utils import list_npzs_folder


# Taken from AWS Documentation
class ProgressPercentage(object):
Expand Down Expand Up @@ -126,12 +121,12 @@ def aws_download_files(upload_log, output_dir):
aws_path = os.path.join(aws_folder, stage, file)

try:
s3.download_file(Bucket='caliban-output', Key=img_path, Filename=save_path)
s3.download_file(Bucket='caliban-output', Key=aws_path, Filename=local_path)
except botocore.exceptions.ClientError as e:
error_code = e.response['Error']['Code']
if error_code == '404':
print('The file {} does not exist'.format(img))
missing.append(img)
print('The file {} does not exist'.format(aws_path))
missing.append(aws_path)
else:
raise e

Expand Down
76 changes: 76 additions & 0 deletions caliban_toolbox/aws_functions_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright 2016-2020 The Van Valen Lab at the California Institute of
# Technology (Caltech), with support from the Paul Allen Family Foundation,
# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01.
# All rights reserved.
#
# Licensed under a modified Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE
#
# The Work provided may be used for non-commercial academic purposes only.
# For any other use of the Work, including commercial use, please contact:
# vanvalenlab@gmail.com
#
# Neither the name of Caltech nor the names of its contributors may be used
# to endorse or promote products derived from this software without specific
# prior written permission.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import os

from unittest.mock import patch
from caliban_toolbox import aws_functions
import tempfile
import pathlib


# TODO: What is a better way to mock the s3 = connect_aws() call within this function
@patch('caliban_toolbox.aws_functions.connect_aws')
def test_aws_upload_files(connect_aws):

class Fake_S3(object):
def upload_file(self, x1, x2, x3, Callback, ExtraArgs):
pass

f_s3 = Fake_S3()

connect_aws.return_value = f_s3
local_files = ['npz_file_' + str(num) for num in range(5)]
aws_paths = ['aws_bucket/folder/npz_file_' + str(num) for num in range(5)]

with tempfile.TemporaryDirectory() as temp_dir:
for file in local_files:
pathlib.Path(os.path.join(temp_dir, file)).touch()

local_paths = [os.path.join(temp_dir, file) for file in local_files]

aws_functions.aws_upload_files(local_paths=local_paths, aws_paths=aws_paths)


@patch('caliban_toolbox.aws_functions.connect_aws')
def test_aws_download_files(connect_aws):

class Fake_S3(object):
def download_file(self, Bucket, Key, Filename):
pass

f_s3 = Fake_S3()

connect_aws.return_value = f_s3

aws_paths = ['aws_bucket/folder/npz_file_' + str(num) for num in range(5)]

upload_log = {'stage': ['stage_0'],
'aws_folder': ['temp_folder'],
'filename': aws_paths}

output_dir = 'example/output/dir'

aws_functions.aws_download_files(upload_log=upload_log, output_dir=output_dir)
17 changes: 5 additions & 12 deletions caliban_toolbox/figure_eight_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import urllib
import re

from getpass import getpass
import getpass
from urllib.parse import urlencode

from caliban_toolbox.log_file import create_upload_log
Expand Down Expand Up @@ -160,9 +160,6 @@ def upload_log_file(log_file, job_id, key):
url_encoded_dict = urllib.parse.urlencode(url_dict)
url = url.format(job_id, url_encoded_dict)

csv_file = open(csv_path, 'r')
csv_data = csv_file.read()

headers = {"Content-Type": "text/csv"}
add_data = requests.put(url, data=log_file, headers=headers)

Expand Down Expand Up @@ -203,7 +200,7 @@ def create_figure_eight_job(base_dir, job_id_to_copy, aws_folder, stage, job_nam
if len(list_npzs_folder(upload_folder)) == 0:
raise ValueError('No NPZs found in crop dir')

key = str(getpass("Figure eight api key? "))
key = str(getpass.getpass("Figure eight api key? "))

# copy job without data
new_job_id = copy_job(job_id_to_copy, key)
Expand All @@ -227,15 +224,12 @@ def create_figure_eight_job(base_dir, job_id_to_copy, aws_folder, stage, job_nam
# upload files to AWS bucket
aws_upload_files(local_paths=npz_paths, aws_paths=npz_keys)

log_name = 'stage_0_{}_upload_log.csv'.format(stage)

# Generate log file for current job
create_upload_log(base_dir=base_dir, stage=stage, aws_folder=aws_folder,
filenames=npzs, filepaths=url_paths, job_id=new_job_id,
pixel_only=pixel_only, rgb_mode=rgb_mode, label_only=label_only,
log_name=log_name)
pixel_only=pixel_only, rgb_mode=rgb_mode, label_only=label_only)

log_path = open(os.path.join(base_dir, 'logs', log_name), 'r')
log_path = open(os.path.join(base_dir, 'logs/stage_0_upload_log.csv'), 'r')
log_file = log_path.read()

# upload log file
Expand All @@ -261,7 +255,7 @@ def download_report(job_id, log_dir):
save_path = os.path.join(log_dir, 'job_report.zip')

# password prompt for api info
key = str(getpass("Please enter your Figure Eight API key:"))
key = str(getpass.getpass("Please enter your Figure Eight API key:"))

# construct url
url = "https://api.appen.com/v1/jobs/{}.csv?".format(job_id)
Expand Down Expand Up @@ -310,7 +304,6 @@ def download_figure_eight_output(base_dir):
job_id = log_file['job_id'][0]

# download Figure 8 report
log_dir = os.path.join(base_dir, 'logs')
download_report(job_id=job_id, log_dir=log_dir)
unzip_report(log_dir=log_dir)

Expand Down
112 changes: 112 additions & 0 deletions caliban_toolbox/figure_eight_functions_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,18 @@
import tempfile
import os
import pytest
import json
import requests_mock
import urllib
import pathlib
import zipfile

import numpy as np
import pandas as pd

from pathlib import Path
from unittest.mock import patch


from caliban_toolbox import figure_eight_functions

Expand Down Expand Up @@ -86,3 +96,105 @@ def test_create_job_urls():
# pixel_only=pixel_only,
# label_only=label_only,
# rgb_mode=rgb_mode)


# TODO: Is this test useful?
def test_copy_job():
with requests_mock.Mocker() as m:

# create test data
test_job_id = 666
test_appen_key = 'a1b2c3'
return_id = 123
return_dict = {'status_code': 200, 'id': return_id}

# generate same url as function for mocking
url = 'https://api.appen.com/v1/jobs/{}/copy.json?'.format(str(test_job_id))

# mock the call
m.get(url, text=json.dumps(return_dict))
new_job_id = figure_eight_functions.copy_job(job_id=test_job_id, key=test_appen_key)

assert new_job_id == return_id


def test_upload_log_file():
with requests_mock.Mocker() as m:

# create test data
data = {'project_url': 'https://caliban.deepcell.org/example_job.npz',
'stage': 'test'}
example_log_string = pd.DataFrame(data=data, index=range(1)).to_string()
test_key = 'a1b2c3'
test_job_id = 123

# generate same url as function for mocking
url = "https://api.appen.com/v1/jobs/{}/upload.json?{}"
url_dict = {'key': test_key, 'force': True}
url_encoded_dict = urllib.parse.urlencode(url_dict)
url = url.format(test_job_id, url_encoded_dict)

# mock the call
response_dict = {'status_code': 200}
m.put(url, text=json.dumps(response_dict))
figure_eight_functions.upload_log_file(log_file=example_log_string, job_id=test_job_id,
key=test_key)

# TODO: This mocks almost every call within the function, so we don't get proper integration
# testing of the different parts working together. However, better than not testing at all


@patch('caliban_toolbox.figure_eight_functions.upload_log_file')
@patch('caliban_toolbox.figure_eight_functions.aws_upload_files')
@patch('caliban_toolbox.figure_eight_functions.copy_job')
@patch("getpass.getpass")
def test_create_figure_eight_job(getpass, copy_job, aws_upload_files, upload_log_file):
getpass.return_value = 'test_api_key'
copy_job.return_value = 123
aws_upload_files.return_value = '200'
upload_log_file.return_value = 567

with tempfile.TemporaryDirectory() as temp_dir:

# create crop directory
crop_dir = os.path.join(temp_dir, 'crop_dir')
os.makedirs(crop_dir)
np.savez(os.path.join(crop_dir, 'test_crop.npz'))

figure_eight_functions.create_figure_eight_job(base_dir=temp_dir, job_id_to_copy=123,
aws_folder='aws', stage='stage')


def test_unzip_report():
with tempfile.TemporaryDirectory() as temp_dir:
# create example zip file
pathlib.Path(os.path.join(temp_dir, 'example_file.csv')).touch()
zip_path = os.path.join(temp_dir, 'job_report.zip')
zipfile.ZipFile(zip_path, mode='w').write(os.path.join(temp_dir, 'example_file.csv'))

figure_eight_functions.unzip_report(temp_dir)

assert os.path.exists(os.path.join(temp_dir, 'job_report.csv'))


@patch('caliban_toolbox.figure_eight_functions.aws_download_files')
@patch('caliban_toolbox.figure_eight_functions.download_report')
def test_download_figure_eight_output(download_report, aws_download_files):

# we don't care about this return value, this is just to override existing function
download_report.return_value = 200
aws_download_files.return_value = 200

with tempfile.TemporaryDirectory() as temp_dir:

# create logs directory with zipped report
os.makedirs(os.path.join(temp_dir, 'logs'))
pathlib.Path(os.path.join(temp_dir, 'logs', 'example_file.csv')).touch()
zip_path = os.path.join(temp_dir, 'logs', 'job_report.zip')
zipfile.ZipFile(zip_path, mode='w').write(os.path.join(temp_dir, 'logs',
'example_file.csv'))
# create log file
log_file = pd.DataFrame({'job_id': [1234]})
log_file.to_csv(os.path.join(temp_dir, 'logs', 'stage_0_upload_log.csv'))

figure_eight_functions.download_figure_eight_output(temp_dir)
1 change: 1 addition & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ pytest==4.6.5
pytest-cov==2.5.1
pytest-pep8>=1.0.6,<2.0.0
coveralls>=1.8.2,<2.0.0
requests_mock

0 comments on commit c2f9425

Please sign in to comment.