In [7]:
import boto3
import numpy
import yaml
import os
from boto3.s3.transfer import TransferConfig

In [4]:
with open("../config.yaml", 'r') as stream:
    key = (yaml.safe_load(stream))
    API_KEY = key['key']
    AWSKEY = key['awskey']
    AWSSECRET = key['awssecret']

In [24]:
class FileUploader:
    def __init__(self, stream = False):
        self.total = 0
        self.uploaded = 0
        self.percent = 0
        self.session = boto3.Session(
            aws_access_key_id=AWSKEY,
            aws_secret_access_key=AWSSECRET,
        )
        self.s3 = boto3.client('s3')
        self.stream = stream

    def upload_callback(self, size):
        if self.total == 0:
            return
        self.uploaded += size
        percent = int(self.uploaded / self.total * 100)
        if percent > self.percent:
            print("{} %".format(int(self.uploaded / self.total * 100)))
            self.percent = percent

    def upload(self, bucket, key, file):
        self.total = os.stat(file).st_size

        if self.stream:
            with open(file, 'rb') as data:
                boto3.client('s3',
                    aws_access_key_id=AWSKEY,
                    aws_secret_access_key=AWSSECRET,
                ).upload_fileobj(
                    data, bucket, key, 
                    Config=TransferConfig(5*(1024**3)), Callback=self.upload_callback
                )
            
        else:
             boto3.client('s3',
                    aws_access_key_id=AWSKEY,
                    aws_secret_access_key=AWSSECRET,
                ).upload_file(
                    file, bucket, key, 
                    Config=TransferConfig(5*(1024**3)), Callback=self.upload_callback
                )

In [None]:
uploader = FileUploader(stream = False)

uploader.upload(bucket = 'restoration-mapper-archive',
                key = 'test-raw.zip',
                file = '../raw/test-raw.zip')

# Convert .npy to .hkl files to save 13% storage space

In [None]:
import os
import numpy as np
import hickle as hkl
# 1.3 before

files = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser("../raw/train-raw/")) for f in fn]
files = [x for x in files if ".npy" in x]
files = [x for x in files if "output" not in x]
files = [x for x in files if "processed" not in x]

for file in files:
    print(file)
    loaded = np.load(file)
    hkl_path = file[:-4] + ".hkl"
    hkl.dump(loaded, hkl_path, mode='w', compression='gzip')
    os.remove(file)