# Transferring files to AWS S3

This notebook contains code to transfer files/directories to AWS S3.

First, we import the classes and libraries we need to run the notebook.

In [2]:

!pip install -q typing boto3 botocore

Now we define all the methods required to perform the transfer. 

In [17]:
from typing import NamedTuple
import os
import pathlib
import boto3
import botocore

class S3Env(NamedTuple):
  access_key_id: str
  secret_access_key: str
  s3_endpoint: str
  bucket_name: str
  default_region: str
  client: boto3.resource

class BucketMeta(NamedTuple):
  client: boto3.resource = None
  bucket_name: str = None
  bucket: any = None
  prefix: str = ""
  model_data_dir: str = None
  file_name: str = None
  object_name: str = None
  exclude_dirs_set: set[str] = None
  exclude_files_set: set[str] = None

In [18]:
def getDefaultRegion() -> str:
  return os.environ.get('AWS_DEFAULT_REGION')

def init() -> S3Env:
  access_key_id = os.environ.get('AWS_ACCESS_KEY_ID')
  secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
  default_region = getDefaultRegion()
  s3_endpoint = os.environ.get('AWS_S3_ENDPOINT')
  bucket_name = os.environ.get('AWS_S3_BUCKET')

  print(f'key_id={access_key_id}, secret_key={secret_access_key}')
  print(f'default_region={default_region}, s3_endpoint={s3_endpoint}, bucket={bucket_name}')

  client: boto3.resource = boto3.resource(
    endpoint_url = s3_endpoint,
    aws_access_key_id = access_key_id,
    aws_secret_access_key = secret_access_key,
    region_name = default_region,
    service_name = 's3'
  )

  return S3Env(access_key_id, secret_access_key, s3_endpoint, bucket_name, default_region, client)


def upload_file(file_model: BucketMeta):
  bucket = file_model.bucket

  # Replace the model directory name with the prefix (for the bucket object)
  object_name = file_model.file_name.replace(file_model.model_data_dir, file_model.prefix)

  print(f'   -> Trying to upload file [{file_model.file_name}] with key [{object_name}] to the bucket [{bucket.name}]...', end=" ")

  detailedErr = None
  try:
    bucket.upload_file(file_model.file_name, object_name)
    print('SUCCESS')
  except botocore.exceptions.ClientError as e:
    if e.response['Error']['Code'] == 'NoSuchBucket':
      detailedErr = 'Bucket NOT FOUND'
    elif e.response['Error']['Code'] == 'AccessDenied':
      detailedErr = 'Access denied'
    else:
      detailedErr = f"Unexpected error: {e}"
  except Exception as e:
    detailedErr = f"Unexpected error: {e}"

    print('FAILURE')
    if detailedErr:
      print(f" -->> *** {detailedErr} ")

def get_bucket(client: boto3.resource, bucket_name: str):
  try:
    # client.Bucket(bucket_name)
    bucket = client.Bucket(bucket_name)
    return bucket
    # return bucket in client.buckets.all()
  except botocore.exceptions.ClientError as e:
    # if e.response['Error']['Code'] == '404':
    #   return False
    print(f'ClientError:: {e}')
    return None
  except Exception as e:
    print(f'Exception:: {e}')
    return None

def upload_files(dir_model: BucketMeta):
  print(f'Uploading files from [{dir_model.model_data_dir}] directory...')
  client = dir_model.client
  bucket = get_bucket(client, dir_model.bucket_name)

  if bucket and bucket in client.buckets.all():
    print(f' -->> Bucket already exists [{dir_model.bucket_name}]')
  else:
    bucket = client.create_bucket(Bucket = dir_model.bucket_name, CreateBucketConfiguration={'LocationConstraint': getDefaultRegion()})
    print(f' -->> Created bucket [{dir_model.bucket_name}]')
  
  print(f'Bucket details: {bucket}, type={type(bucket)}')

  for (dir_path, dirs, files) in os.walk(dir_model.model_data_dir):
    print(f'root={dir_path}, dirs={dirs}, files={files}')
    for f in files:
      if not f.startswith('.'):
        file_name = pathlib.Path(dir_path, f)

        upload_file(BucketMeta(bucket = bucket,
                               file_name=str(file_name),
                               prefix=dir_model.prefix,
                               model_data_dir=dir_model.model_data_dir))
      else:
        print(f'   -->> *** File (or dir) [{f}] IGNORED... *** <<--')