Mock figure8 and aws functions for testing (#104)

* first attempt at main function * updated requirements-test * fix namespace error * more testing for figure8 functions * testing for aws upload * pep8 * pep8 * switched to mocker * patch specific calls rather than entire function * Update requirements-test.txt Co-authored-by: willgraf <7930703+willgraf@users.noreply.github.com> * error handling for aws functions * assert statements for remaining functions * pep8 * removed print statements, added TODO for spy Co-authored-by: willgraf <7930703+willgraf@users.noreply.github.com>
vanvalenlab · Jul 11, 2020 · b22e4ab · b22e4ab
1 parent 64c120d
commit b22e4ab
Show file tree

Hide file tree

Showing 5 changed files with 277 additions and 56 deletions.
diff --git a/caliban_toolbox/aws_functions.py b/caliban_toolbox/aws_functions.py
@@ -26,15 +26,10 @@
 import os
 import sys
 import threading
-import re
 
 import boto3
 import botocore
-
-from urllib.parse import urlencode
-from getpass import getpass
-
-from caliban_toolbox.utils.misc_utils import list_npzs_folder
+import getpass
 
 
 # Taken from AWS Documentation
@@ -57,8 +52,8 @@ def __call__(self, bytes_amount):
 
 
 def connect_aws():
-    AWS_ACCESS_KEY_ID = getpass('What is your AWS access key id? ')
-    AWS_SECRET_ACCESS_KEY = getpass('What is your AWS secret access key id? ')
+    AWS_ACCESS_KEY_ID = getpass.getpass('What is your AWS access key id? ')
+    AWS_SECRET_ACCESS_KEY = getpass.getpass('What is your AWS secret access key id? ')
 
     session = boto3.Session(aws_access_key_id=AWS_ACCESS_KEY_ID,
                             aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
@@ -80,7 +75,7 @@ def aws_upload_files(local_paths, aws_paths):
 
     # upload images
     for i in range(len(local_paths)):
-        s3.upload_file(local_paths[i], 'caliban-input', aws_paths[i],
+        s3.upload_file(Filename=local_paths[i], Bucket='caliban-input', Key=aws_paths[i],
                        Callback=ProgressPercentage(local_paths[i]),
                        ExtraArgs={'ACL': 'public-read',
                                   'Metadata': {'source_path': local_paths[i]}})
@@ -126,12 +121,13 @@ def aws_download_files(upload_log, output_dir):
         aws_path = os.path.join(aws_folder, stage, file)
 
         try:
-            s3.download_file(Bucket='caliban-output', Key=img_path, Filename=save_path)
+            s3.download_file(Bucket='caliban-output', Key=aws_path, Filename=local_path)
         except botocore.exceptions.ClientError as e:
             error_code = e.response['Error']['Code']
+
             if error_code == '404':
-                print('The file {} does not exist'.format(img))
-                missing.append(img)
+                print('The file {} does not exist'.format(aws_path))
+                missing.append(aws_path)
             else:
                 raise e
 

diff --git a/caliban_toolbox/aws_functions_test.py b/caliban_toolbox/aws_functions_test.py
@@ -0,0 +1,98 @@
+# Copyright 2016-2020 The Van Valen Lab at the California Institute of
+# Technology (Caltech), with support from the Paul Allen Family Foundation,
+# Google, & National Institutes of Health (NIH) under Grant U24CA224309-01.
+# All rights reserved.
+#
+# Licensed under a modified Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.github.com/vanvalenlab/caliban-toolbox/LICENSE
+#
+# The Work provided may be used for non-commercial academic purposes only.
+# For any other use of the Work, including commercial use, please contact:
+# vanvalenlab@gmail.com
+#
+# Neither the name of Caltech nor the names of its contributors may be used
+# to endorse or promote products derived from this software without specific
+# prior written permission.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import os
+import boto3
+import botocore
+import pytest
+
+from caliban_toolbox import aws_functions
+import pathlib
+
+
+class FakeS3(object):
+
+    def __init__(self, aws_access_key_id='key', aws_secret_access_key='secret', raise_error=None):
+        self.raise_error = raise_error
+
+    def client(self, *_, **__):
+        return self
+
+    def upload_file(self, Filename, Bucket, Key, Callback, ExtraArgs):
+        assert os.path.exists(Filename)
+
+    def download_file(self, Bucket, Key, Filename):
+        if self.raise_error is None:
+            pathlib.Path(Filename).touch()
+        elif self.raise_error is 'missing':
+            raise botocore.exceptions.ClientError(error_response={'Error': {'Code': '404'}},
+                                                  operation_name='missing_file')
+        elif self.raise_error is 'other':
+            raise botocore.exceptions.ClientError(error_response={'Error': {'Code': '555'}},
+                                                  operation_name='some_other_error')
+
+
+# TODO: Can we spy on this function in order to have some sort of correctness test here?
+def test_aws_upload_files(mocker, tmp_path):
+    mocker.patch('getpass.getpass', lambda *x: None)
+    mocker.patch('boto3.Session', FakeS3)
+
+    local_files = ['npz_file_' + str(num) for num in range(5)]
+    aws_paths = ['aws_bucket/folder/npz_file_' + str(num) for num in range(5)]
+
+    for file in local_files:
+        pathlib.Path(os.path.join(tmp_path, file)).touch()
+
+    local_paths = [os.path.join(tmp_path, file) for file in local_files]
+
+    aws_functions.aws_upload_files(local_paths=local_paths, aws_paths=aws_paths)
+
+
+def test_aws_download_files(mocker, tmp_path):
+    mocker.patch('getpass.getpass', lambda *x: None)
+    mocker.patch('boto3.Session', FakeS3)
+
+    filenames = ['npz_file_' + str(num) for num in range(5)]
+
+    upload_log = {'stage': ['stage_0'],
+                  'aws_folder': ['temp_folder'],
+                  'filename': filenames}
+
+    # no missing files
+    missing = aws_functions.aws_download_files(upload_log=upload_log, output_dir=tmp_path)
+    assert missing == []
+
+    # catch missing file error, return list of missing files
+    mocker.patch('boto3.Session',
+                 lambda aws_access_key_id, aws_secret_access_key: FakeS3(raise_error='missing'))
+    missing = aws_functions.aws_download_files(upload_log=upload_log, output_dir=tmp_path)
+    missing = [os.path.split(file_path)[1] for file_path in missing]
+    assert missing == filenames
+
+    # all other errors not caught
+    with pytest.raises(botocore.exceptions.ClientError):
+        mocker.patch('boto3.Session',
+                     lambda aws_access_key_id, aws_secret_access_key: FakeS3(raise_error='other'))
+        missing = aws_functions.aws_download_files(upload_log=upload_log, output_dir=tmp_path)
diff --git a/caliban_toolbox/figure_eight_functions.py b/caliban_toolbox/figure_eight_functions.py
@@ -31,7 +31,7 @@
 import urllib
 import re
 
-from getpass import getpass
+import getpass
 from urllib.parse import urlencode
 
 from caliban_toolbox.log_file import create_upload_log
@@ -119,11 +119,12 @@ def copy_job(job_id, key):
     """
 
     url = 'https://api.appen.com/v1/jobs/{}/copy.json?'.format(str(job_id))
-    API_key = {"key": key}
+    API_key = {'key': key}
 
     new_job = requests.get(url, params=API_key)
     if new_job.status_code != 200:
-        print("copy_job not successful. Status code: ", new_job.status_code)
+        raise ValueError('copy_job not successful. Status code: '.format(new_job.status_code))
+
     new_job_id = new_job.json()['id']
 
     return new_job_id
@@ -155,21 +156,19 @@ def upload_log_file(log_file, job_id, key):
     """
 
     # format url with appropriate arguments
-    url = "https://api.appen.com/v1/jobs/{}/upload.json?{}"
+    url = 'https://api.appen.com/v1/jobs/{}/upload.json?{}'
     url_dict = {'key': key, 'force': True}
     url_encoded_dict = urllib.parse.urlencode(url_dict)
     url = url.format(job_id, url_encoded_dict)
 
-    csv_file = open(csv_path, 'r')
-    csv_data = csv_file.read()
-
-    headers = {"Content-Type": "text/csv"}
+    headers = {'Content-Type': 'text/csv'}
     add_data = requests.put(url, data=log_file, headers=headers)
 
     if add_data.status_code != 200:
-        print("Upload_data not successful. Status code: ", add_data.status_code)
+        raise ValueError('Upload_data not successful. Status code: '.format(add_data.status_code))
     else:
-        print("Data successfully uploaded to Figure Eight.")
+        print('Data successfully uploaded to Figure Eight.')
+        return add_data.status_code
 
 
 def create_figure_eight_job(base_dir, job_id_to_copy, aws_folder, stage, job_name=None,
@@ -203,7 +202,7 @@ def create_figure_eight_job(base_dir, job_id_to_copy, aws_folder, stage, job_nam
     if len(list_npzs_folder(upload_folder)) == 0:
         raise ValueError('No NPZs found in crop dir')
 
-    key = str(getpass("Figure eight api key? "))
+    key = str(getpass.getpass("Figure eight api key? "))
 
     # copy job without data
     new_job_id = copy_job(job_id_to_copy, key)
@@ -227,19 +226,18 @@ def create_figure_eight_job(base_dir, job_id_to_copy, aws_folder, stage, job_nam
     # upload files to AWS bucket
     aws_upload_files(local_paths=npz_paths, aws_paths=npz_keys)
 
-    log_name = 'stage_0_{}_upload_log.csv'.format(stage)
-
     # Generate log file for current job
     create_upload_log(base_dir=base_dir, stage=stage, aws_folder=aws_folder,
                       filenames=npzs, filepaths=url_paths, job_id=new_job_id,
-                      pixel_only=pixel_only, rgb_mode=rgb_mode, label_only=label_only,
-                      log_name=log_name)
+                      pixel_only=pixel_only, rgb_mode=rgb_mode, label_only=label_only)
 
-    log_path = open(os.path.join(base_dir, 'logs', log_name), 'r')
+    log_path = open(os.path.join(base_dir, 'logs/stage_0_upload_log.csv'), 'r')
     log_file = log_path.read()
 
     # upload log file
-    upload_log_file(log_file, new_job_id, key)
+    status_code = upload_log_file(log_file, new_job_id, key)
+
+    return status_code
 
 
 def download_report(job_id, log_dir):
@@ -261,7 +259,7 @@ def download_report(job_id, log_dir):
     save_path = os.path.join(log_dir, 'job_report.zip')
 
     # password prompt for api info
-    key = str(getpass("Please enter your Figure Eight API key:"))
+    key = str(getpass.getpass("Please enter your Figure Eight API key:"))
 
     # construct url
     url = "https://api.appen.com/v1/jobs/{}.csv?".format(job_id)
@@ -310,7 +308,6 @@ def download_figure_eight_output(base_dir):
     job_id = log_file['job_id'][0]
 
     # download Figure 8 report
-    log_dir = os.path.join(base_dir, 'logs')
     download_report(job_id=job_id, log_dir=log_dir)
     unzip_report(log_dir=log_dir)