# AWS S3 Learning Notebook with boto3   

### Prerequisites

**Before running this notebook, make sure you're authenticated with AWS SSO:**

```bash
aws sso login --profile cloud-course
```

This command needs to be run in your terminal (not in this notebook) and will:
- Open a browser window for authentication
- Refresh your AWS credentials
- Allow this notebook to interact with AWS services

⚠️ **Note**: If you get a `TokenRetrievalError`, it means your AWS SSO session has expired. Simply run the login command above again.


In [123]:
import boto3
from botocore.exceptions import ClientError, TokenRetrievalError
from uuid import uuid4
from  rich import print
import os
from typing import Optional

try:
    from mypy_boto3_s3 import S3Client
except ImportError:
    print("mypy_boto3_s3 is not installed. Please install it using 'pip install mypy-boto3-s3'")

In [124]:
# Constants

os.environ["AWS_PROFILE"] = "cloud-course"
os.environ["AWS_REGION"] = "us-east-1"

# crearet a session
S3_CLIENT: S3Client = boto3.client("s3")

# create a bucket
BUCKET_NAME = f"cloud-course-bucket-{str(uuid4())[:4]}"

EXAMPLE_OBJECT_KEY = "folder/file.txt"
EXAMPLE_OBJECT_CONTENT = "This is a test file!"

EXAMPLE_OBJECTS = [
    {"example-a/object/file1.txt": "This is a test object."},
    {"example-b/object/file2.txt": "This is another test object."},
    {"example-c/object/file3.txt": "This is a third test object."},
    {"example-d/object/file4.txt": "This is a fourth test object."},
    {"example-e/object/file5.txt": "This is a fifth test object."},
]

print(f"{BUCKET_NAME=}")
print(f"{EXAMPLE_OBJECT_KEY=}")
print(f"{EXAMPLE_OBJECT_CONTENT=}")
print(f"{EXAMPLE_OBJECTS=}")


### 1. Create a bucket

In [125]:
# Create a bucket

try:
    from mypy_boto3_s3.type_defs import CreateBucketOutputTypeDef
except ImportError:
    print("mypy-boto3-s3 is not installed, skipping type checking")


def create_bucket(bucket_name: str) -> Optional["CreateBucketOutputTypeDef"]:
    """
    Create a bucket
    
    Args:
        bucket_name: The name of the bucket to create

    Returns:
        The output of the create_bucket operation
    """
    try:
        return S3_CLIENT.create_bucket(Bucket=bucket_name)
    except TokenRetrievalError:
        print("❌ AWS SSO token has expired!")
        print("🔧 Please run this command in your terminal:")
        print("   aws sso login --profile <profile_name>")
        print("📝 Then restart this notebook and try again.")
        raise
    except ClientError as e:
        error_code = e.response.get("Error", {}).get("Code")
        if error_code == 'BucketAlreadyOwnedByYou':
            print(f"✅ Bucket {bucket_name} already exists and is owned by you")
            return None
        elif error_code == 'BucketAlreadyExists':
            print(f"❌ Bucket {bucket_name} already exists and is owned by someone else")
            raise
        else:
            print(f"❌ Error creating bucket: {e}")
            raise



In [126]:
# create a bucket
response = create_bucket(bucket_name=BUCKET_NAME)
if response:
    print(f"✅ Bucket {BUCKET_NAME} created successfully")
    print(f"{response=}")
else:
    print(f"ℹ️  Using existing bucket {BUCKET_NAME}")

### 2. Write an object to the bucket

In [127]:
# write an object to the bucket
try:
    from mypy_boto3_s3.type_defs import PutObjectOutputTypeDef
except ImportError:
    print("mypy-boto3-s3 is not installed, skipping type checking")


def write_text_object_to_bucket(
    bucket_name: str, 
    object_key: str, 
    object_content: str) -> Optional["PutObjectOutputTypeDef"]:
    """
    Write a text object to the bucket
    """
    try:
        return S3_CLIENT.put_object(Bucket=bucket_name, Key=object_key, Body=object_content)
    except ClientError as e:
        error_code = e.response.get("Error", {}).get("Code")
        raise Exception(f"❌ Error writing object to bucket: {error_code}")

In [128]:
# write a single example object to the bucket
response = write_text_object_to_bucket(
    bucket_name=BUCKET_NAME, 
    object_key=EXAMPLE_OBJECT_KEY, 
    object_content=EXAMPLE_OBJECT_CONTENT)

print(f"✅ Object written to path 's3://{BUCKET_NAME}/{EXAMPLE_OBJECT_KEY}'")
print(f"{response=}")

In [129]:
# write multiple example objects to the bucket
for example_object in EXAMPLE_OBJECTS:
    for object_key, object_content in example_object.items():
        print(f"Writing object to path 's3://{BUCKET_NAME}/{object_key}'")
        write_text_object_to_bucket(
            bucket_name=BUCKET_NAME, 
            object_key=object_key, 
            object_content=object_content
            )

### 3. Read an object from the bucket

In [130]:
# read object from s3
from botocore.response import StreamingBody

def read_text_object_from_bucket(
    bucket_name: str, 
    object_key: str) -> str | None:
    """
    Read a text object from the bucket
    """
    response = S3_CLIENT.get_object(Bucket=bucket_name, Key=object_key)
    content_streaming_body: StreamingBody = response["Body"]
    content:str = content_streaming_body.read().decode("utf-8")
    return content

In [131]:
# read a single example object from the bucket
content = read_text_object_from_bucket(
    bucket_name=BUCKET_NAME, 
    object_key=EXAMPLE_OBJECT_KEY
    )

if content:
    print(f"✅ Object read from path 's3://{BUCKET_NAME}/{EXAMPLE_OBJECT_KEY}'")
    print(f"{content=}")
else:
    print(f"❌ Object not found in path 's3://{BUCKET_NAME}/{EXAMPLE_OBJECT_KEY}'")

### 4. List all objects in the bucket

Resource:
- https://boto3.amazonaws.com/v1/documentation/api/latest/guide/paginators.html#creating-paginators
- https://boto3.amazonaws.com/v1/documentation/api/latest/guide/resources.html#resources


S3's list_objects_v2 API returns up to 1,000 objects per request, so the function uses a while loop with a continuation_token to fetch all objects across multiple pages.


In [132]:
# list objects in the bucket
try:
    from mypy_boto3_s3.type_defs import ListObjectsV2OutputTypeDef
except ImportError:
    print("mypy-boto3-s3 is not installed, skipping type checking")

def list_all_object_keys_in_bucket(bucket_name: str) -> list[str]:
    """
    List objects in the bucket
    """
    objects = []
    continuation_token = None
    try:
        while True:
            # If a continuation token exists, use it to fetch the next page of results
            if continuation_token:
                response = S3_CLIENT.list_objects_v2(Bucket=bucket_name, ContinuationToken=continuation_token)
            else:
                # If no continuation token (first page), fetch the first page of objects
                response: ListObjectsV2OutputTypeDef = S3_CLIENT.list_objects_v2(Bucket=bucket_name)
            
            # Extract object keys from the response's "Contents" field (if it exists)
            # Use a list comprehension to get the "Key" from each object in Contents
            # If "Contents" is missing, default to an empty list to avoid errors
            objects.extend([obj["Key"] for obj in response.get("Contents", []) if "Key" in obj])
            # Get the continuation token from the response's "NextContinuationToken" field
            # AWS S3 provides this token in the response if there are more objects to fetch
            # If no more objects exist, "NextContinuationToken" will be absent, and continuation_token will be None
            continuation_token = response.get("NextContinuationToken")
            # If no continuation token exists, all pages have been fetched, so exit the loop
            if not continuation_token:
                break
        return objects
    except ClientError as e:
        error_code = e.response.get("Error", {}).get("Code")
        raise Exception(f"❌ Error listing objects in bucket: {error_code}")

In [133]:
# List all objects in the bucket
objects = list_all_object_keys_in_bucket(BUCKET_NAME)
if objects:
    print("Objects in bucket:")
    for obj in objects:
        print(f" - {obj}")

### 5. Update/Overwrite an object

In [134]:
new_content = "This is updated content."

# the "PUT object" command is an upsert, so it will overwrite existing files
response = write_text_object_to_bucket(BUCKET_NAME, EXAMPLE_OBJECT_KEY, new_content)
print(response)

# Read the updated content of the object
updated_content = read_text_object_from_bucket(BUCKET_NAME, EXAMPLE_OBJECT_KEY)
if updated_content:
    print(f"Content of updated object '{EXAMPLE_OBJECT_KEY}':\n{updated_content}")

### 6. Delete an object from the bucket

In [135]:
try:
    from mypy_boto3_s3.type_defs import DeleteObjectOutputTypeDef
except ImportError:
    print("boto3-stubs[s3] not installed")


def delete_object_from_s3(
    bucket_name: str,
    object_key: str,
) -> Optional["DeleteObjectOutputTypeDef"]:
    """
    Delete an object from an S3 bucket.

    :param bucket_name: Name of the bucket to delete the object from
    :param object_key: Key of the object to delete
    :return: Response from the delete_object call
    """
    try:
        return S3_CLIENT.delete_object(Bucket=bucket_name, Key=object_key)
    except ClientError as e:
        error_code = e.response.get("Error", {}).get("Code")
        raise Exception(f"❌ Error deleting object from bucket: {error_code}")

In [136]:
# Delete the example object
response = delete_object_from_s3(bucket_name=BUCKET_NAME, object_key=EXAMPLE_OBJECT_KEY)
print(response)

### 7. Reading a non-existing object

In [137]:
print(f"Trying to read the deleted object at 's3://{BUCKET_NAME}/{EXAMPLE_OBJECT_KEY}' ...")

delete_object_from_s3(bucket_name=BUCKET_NAME, object_key=EXAMPLE_OBJECT_KEY)

try:
    # try to read the deleted object
    read_text_object_from_bucket(BUCKET_NAME, EXAMPLE_OBJECT_KEY)
except ClientError as err:
    assert "NoSuchKey" in str(err)

### 8. Deleting a non-existent object

Note: the result of calling s3_client.delete_object on a non-existent object succeeds whether or not there exists an object with the given key.

Note: The HTTP status code 204 means No Content. Or in other words, the request is successful but there was nothing to delete.

In [138]:
non_existant_object_key = EXAMPLE_OBJECT_KEY + "_non_existent"

# delete the non-existent object ...
response = delete_object_from_s3(bucket_name=BUCKET_NAME, object_key=non_existant_object_key)

print(response)

### 9. Error handling when deleting an object

In [139]:
HTTP_FILE_NOT_FOUND_ERROR_CODE = "404"


class S3FileNotFoundError(Exception):
    """Raise this exception when an object at a given path is not found in S3."""


def delete_object_or_error_if_not_exists(bucket_name: str, object_key: str) -> None:
    """
    Delete an object from an S3 bucket with error handling for non-existent objects.

    :param bucket_name: Name of the bucket to delete the object from
    :param object_key: Key of the object to delete

    :raises S3FileNotFoundError: if no object exists at the given path
    :raises ClientError: if an unexpected error occurs when using S3 that is not due to file not found
    """
    try:
        # check if the object exists
        S3_CLIENT.head_object(Bucket=bucket_name, Key=object_key)

        # If object exists, delete it
        S3_CLIENT.delete_object(Bucket=bucket_name, Key=object_key)

    except ClientError as err:
        # if 404 error is raised, object does not exist
        if err.response["Error"]["Code"] == HTTP_FILE_NOT_FOUND_ERROR_CODE:
            raise S3FileNotFoundError(f"Object {object_key} not found in bucket {bucket_name}")
        else:
            raise err

In [140]:
# Try to delete the non-existent example object with error handling
try:
    delete_object_or_error_if_not_exists(BUCKET_NAME, EXAMPLE_OBJECT_KEY)
except S3FileNotFoundError as err:
    print(err)

In [141]:
# List all objects in the bucket to confirm that the example object was deleted
objects = list_all_object_keys_in_bucket(BUCKET_NAME)
if objects:
    print("Objects in bucket:")
    for obj in objects:
        print(f" - {obj}")
else:
    print("Bucket is empty.")

### 10. List all objects by prefix

In [142]:
def list_all_objects_in_bucket_by_prefix(bucket_name: str, prefix: str) -> list[str]:
    """
    List objects in an S3 bucket by prefix.

    :param bucket_name: Name of the bucket to list objects from
    :param prefix: Prefix to filter objects by
    :return: List of object keys
    """

    try:
        objects = []
        continuation_token = None
        while True:
            # If a continuation token exists, use it to fetch the next page of results for the given prefix
            if continuation_token:
                response = S3_CLIENT.list_objects_v2(Bucket=bucket_name, ContinuationToken=continuation_token, Prefix=prefix)
            else:
                # If this is the first page, fetch objects with the given prefix
                response: ListObjectsV2OutputTypeDef = S3_CLIENT.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
            
            # For the current page, extract all object keys that match the prefix
            objects.extend([obj["Key"] for obj in response.get("Contents", []) if "Key" in obj])
            # Get the next continuation token (if more objects with the prefix remain)
            continuation_token = response.get("NextContinuationToken")
            # If there is no continuation token, we have listed all objects with the prefix
            if not continuation_token:
                break
        return objects
    except ClientError as e:
        error_code = e.response.get("Error", {}).get("Code")
        raise Exception(f"❌ Error listing objects in bucket: {error_code}")




In [143]:
# List objects by prefix
prefix = "example-a/"
objects_by_prefix = list_all_objects_in_bucket_by_prefix(BUCKET_NAME, prefix)
if objects_by_prefix:
    print(f"Objects with prefix '{prefix}':")
    for obj in objects_by_prefix:
        print(f" - {obj}")
else:
    print(f"No objects found with prefix '{prefix}'.")

### 11. Delete a bucket

In [144]:
import boto3
from botocore.exceptions import ClientError

try:
    from mypy_boto3_s3.type_defs import EmptyResponseMetadataTypeDef
except ImportError:
    print("boto3-stubs[s3] not installed")

def delete_all_objects_in_bucket(bucket_name: str) -> None:
    """
    Delete all objects in an S3 bucket.
    """
    try:
        # list all objects in the bucket
        objects = list_all_object_keys_in_bucket(bucket_name)
    except ClientError as err:  
        if "NoSuchBucket" in str(err):
            return
        raise

    # delete each object
    for obj in objects:
        S3_CLIENT.delete_object(Bucket=bucket_name, Key=obj)


def delete_bucket(bucket_name: str) -> Optional["EmptyResponseMetadataTypeDef"]:
    """
    Delete an S3 bucket, including all its objects.

    If the bucket does not exist, no error is raised.

    :param bucket_name: Name of the bucket to delete
    :return: Response from the delete_bucket call or None if there is no bucket.
    """
    # first we delete all objects in the bucket
    delete_all_objects_in_bucket(bucket_name)

    # then we delete the bucket
    try:
        response: EmptyResponseMetadataTypeDef = S3_CLIENT.delete_bucket(Bucket=bucket_name)
        return response
    except ClientError as err:
        if "NoSuchBucket" in str(err):
            return None
        raise

In [None]:
# delete bucket
response = delete_bucket(BUCKET_NAME)
print(response)

### 12. Recursively upload a local directory to S3

In [146]:
from pathlib import Path
from typing import Generator


def recursively_upload_dir_to_bucket(
    local_dir_fpath: str | Path,
    bucket_name: str,
    target_root_prefix_in_bucket: str = "",
):
    """
    Recurse through a local directory and upload all files to S3 under a target prefix.

    The object keys within the bucket should be the relative paths of the files within the local directory.

    Example:

    path/to/local_dir/
    ├── file1.txt
    ├── file2.txt
    └── subdir
        └── file3.txt

    Would be uploaded to

    s3://bucket-name/target_root_prefix_in_bucket/
    ├── file1.txt
    ├── file2.txt
    └── subdir/
        └── file3.txt
    """
    local_dir_fpath = Path(local_dir_fpath)
    child_fpaths: Generator[Path, None, None] = local_dir_fpath.rglob("*")
    target_root_prefix_in_bucket = target_root_prefix_in_bucket.rstrip("/")

    for child_fpath in child_fpaths:
        if child_fpath.is_file():
            relative_fpath = child_fpath.relative_to(local_dir_fpath)
            object_key = str(Path(target_root_prefix_in_bucket) / relative_fpath)
            upload_file_to_bucket(child_fpath, bucket_name, object_key)


def upload_file_to_bucket(
    local_fpath: str | Path,
    bucket_name: str,
    target_key_in_bucket: str,
):
    """
    Upload a file to an S3 bucket.

    :param local_fpath: Local file path to upload
    :param bucket_name: Name of the bucket to upload the file to
    :param target_key_in_bucket: Key to upload the file to in the bucket
    """
    local_fpath = Path(local_fpath)
    with open(local_fpath, "rb") as file:
        S3_CLIENT.put_object(Bucket=bucket_name, Key=target_key_in_bucket, Body=file)

In [147]:
# upload a directory to s3
# create a test dir locally with sample files
test_dir = Path("test_dir")
test_dir.mkdir(parents=True, exist_ok=True)
(test_dir / "file1.txt").write_text("This is file 1.")
(test_dir / "file2.txt").write_text("This is file 2.")
(test_dir / "subdir").mkdir(parents=True, exist_ok=True)
(test_dir / "subdir" / "file3.txt").write_text("This is file 3.")

# clean up the bucket
delete_bucket(BUCKET_NAME)
create_bucket(bucket_name=BUCKET_NAME)

recursively_upload_dir_to_bucket(
    bucket_name=BUCKET_NAME,
    local_dir_fpath=test_dir,
    target_root_prefix_in_bucket="test-root-dir/",
)

# List all objects in the bucket at the test target root
objects = list_all_objects_in_bucket_by_prefix(
    bucket_name=BUCKET_NAME,
    prefix="test-root-dir/",
)
print(objects)

### 13. Rename a "folder" in an S3 bucket

In [149]:
def rename_folder_in_bucket(
    bucket_name: str,
    old_folder_prefix: str,
    new_folder_prefix: str,
):
    """
    Rename a "folder" in an S3 bucket.

    Example:

    Given the following structure in S3:

    s3://bucket-name/<old_folder_prefix>/
    ├── file1.txt
    ├── file2.txt
    └── subdir/
        └── file3.txt

    After renaming

    s3://bucket-name/<new_folder_prefix>/
    ├── file1.txt
    ├── file2.txt
    └── subdir/
        └── file3.txt

    :param bucket_name: Name of the S3 bucket
    :param source_folder: Source "folder" path in the bucket
    :param destination_folder: Destination "folder" path in the bucket
    """
    old_folder_prefix = old_folder_prefix.rstrip("/") + "/"
    new_folder_prefix = new_folder_prefix.rstrip("/") + "/"

    # List all objects in the source folder
    objects = list_all_objects_in_bucket_by_prefix(bucket_name=bucket_name, prefix=old_folder_prefix)

    for current_key in objects:
        new_key = current_key.replace(old_folder_prefix, new_folder_prefix, 1)

        # Move the object to the new key
        move_object_in_bucket(bucket_name=bucket_name, source_key=current_key, destination_key=new_key)


def move_object_in_bucket(bucket_name: str, source_key: str, destination_key: str):
    """
    Move an object within an S3 bucket by copying to the new key and deleting the old key.

    :param bucket_name: Name of the S3 bucket
    :param source_key: Source key of the object to move
    :param destination_key: Destination key of the object
    """
     # Copy the object to the new key
    S3_CLIENT.copy_object(
        Bucket=bucket_name, CopySource={"Bucket": bucket_name, "Key": source_key}, Key=destination_key
    )

    # Delete the old object
    S3_CLIENT.delete_object(Bucket=bucket_name, Key=source_key)


"""Test the rename_folder_in_bucket function."""


def upload_file_to_bucket(
    local_fpath: str | Path,
    bucket_name: str,
    target_key_in_bucket: str,
):
    """
    Upload a file to an S3 bucket.

    :param local_fpath: Local file path to upload
    :param bucket_name: Name of the bucket to upload the file to
    :param target_key_in_bucket: Key to upload the file to in the bucket
    """
    local_fpath = Path(local_fpath)
    with open(local_fpath, "rb") as file:
        S3_CLIENT.put_object(Bucket=bucket_name, Key=target_key_in_bucket, Body=file)

In [150]:
# rename folder in bucket

# Create test objects in the source folder
test_source_folder = "nested/source-folder/"
test_dest_folder = "nested/destination-folder/"

create_bucket(bucket_name=BUCKET_NAME)

# Upload test files to source folder
upload_file_to_bucket(
    local_fpath="test_dir/file1.txt", bucket_name=BUCKET_NAME, target_key_in_bucket=test_source_folder + "file1.txt"
)
upload_file_to_bucket(
    local_fpath="test_dir/file2.txt", bucket_name=BUCKET_NAME, target_key_in_bucket=test_source_folder + "file2.txt"
)
upload_file_to_bucket(
    local_fpath="test_dir/subdir/file3.txt",
    bucket_name=BUCKET_NAME,
    target_key_in_bucket=test_source_folder + "subdir/file3.txt",
)

# Rename the source folder to the destination folder
rename_folder_in_bucket(
    bucket_name=BUCKET_NAME,
    old_folder_prefix=test_source_folder,
    new_folder_prefix=test_dest_folder,
)

# List all objects in the destination folder
objects = list_all_objects_in_bucket_by_prefix(bucket_name=BUCKET_NAME, prefix=test_dest_folder)
print(objects)