# Upload files to S3

## Libraries

In [None]:
import os
import json
import boto3
import zipfile
import tarfile

## Credentials

In [None]:
AVSLS_BUCKET = 'miba-ma-prj-options'
FOLDER = 'options'
UPFOLDER = 'data'
with open('access.json') as file:
    access_data = json.load(file)
session = boto3.session.Session()
s3 = session.client(
    service_name='s3',
    aws_access_key_id=access_data['aws_access_key_id'],
    aws_secret_access_key=access_data['aws_secret_access_key'],
    endpoint_url='https://hb.bizmrg.com'
)

## Utils

In [None]:
def get_loaded_objects(s3, bucket, upfolder, verbose=False):
    s3_result =  s3.list_objects_v2(Bucket=bucket, Prefix=upfolder)
    loaded = []
    for key in s3_result['Contents']:
        loaded.append(key['Key'])
    if verbose: print(f'loaded: {len(loaded)}')
    while s3_result['IsTruncated']:
        continuation_key = s3_result['NextContinuationToken']
        s3_result = s3.list_objects_v2(
            Bucket=AVSLS_BUCKET, 
            Prefix=UPFOLDER, 
            ContinuationToken=continuation_key
        )
        for key in s3_result['Contents']:
            loaded.append(key['Key'])
        if verbose: print(f'loaded: {len(loaded)}')
    return loaded

def upload_folder_to_s3(s3, bucket, input_dir, s3_path, verbose=False):
        print('local:', input_dir)
        os.system('ls -ltR ' + input_dir)
        print('s3 destination:', s3_path)
        loaded = [x.replace(s3_path, '') 
                  for x in get_loaded_objects(s3, bucket, s3_path, verbose)]
        print('total loaded:', len(loaded))
        try:
            for path, subdirs, files in os.walk(input_dir):
                for file in files:
                    dest_path = path.replace(input_dir, '')
                    s3file_path = dest_path + '/' + file
                    if s3file_path in loaded:
                        if verbose: print(s3file_path, 'exists')
                        continue
                    s3file = os.path.normpath(s3_path + s3file_path)
                    local_file = os.path.join(path, file)
                    if verbose: print(local_file, '->', s3file, end='')
                    s3.upload_file(local_file, bucket, s3file)
                    if verbose: print(' done')      
        except Exception as e:
            print('failed:',  path, subdirs, files)
            print(e)
            raise e

## Run upload

In [None]:
%%time
upload_folder_to_s3(s3, AVSLS_BUCKET, FOLDER, UPFOLDER, verbose=False)