This notebook contains the scripts to rename the filenames of videos stored in AWS bucket.

In [None]:
# TODO refactor, merge and extract utils etc, and tests

In [None]:
# @title <font size="5">↓ ឵឵<i>Install and load requirements</font> { vertical-output: true }

!pip install boto3
import getpass
import boto3
import pandas as pd
from IPython.display import display, HTML


In [None]:
def aws_credentials():
    # Save your access key for the s3 bucket.
    aws_access_key_id = getpass.getpass("Enter the key id for the aws server")
    aws_secret_access_key = getpass.getpass(
        "Enter the secret access key for the aws server"
    )

    return aws_access_key_id, aws_secret_access_key


def connect_s3(aws_access_key_id: str, aws_secret_access_key: str):
    # Connect to the s3 bucket
    client = boto3.client(
        "s3",
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key,
    )
    return client


def get_aws_client():
    # Set aws account credentials
    aws_access_key_id, aws_secret_access_key = aws_credentials()

    # Connect to S3
    client = connect_s3(aws_access_key_id, aws_secret_access_key)

    return client

def get_matching_s3_objects(
    client: boto3.client, bucket: str, prefix: str = "", suffix: str = ""
):
    """
    ## Code modified from alexwlchan (https://alexwlchan.net/2019/07/listing-s3-keys/)
    Generate objects in an S3 bucket.

    :param client: S3 client.
    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch objects whose key starts with
        this prefix (optional).
    :param suffix: Only fetch objects whose keys end with
        this suffix (optional).
    """

    paginator = client.get_paginator("list_objects_v2")

    kwargs = {"Bucket": bucket}

    # We can pass the prefix directly to the S3 API.  If the user has passed
    # a tuple or list of prefixes, we go through them one by one.
    if isinstance(prefix, str):
        prefixes = (prefix,)
    else:
        prefixes = prefix

    for key_prefix in prefixes:
        kwargs["Prefix"] = key_prefix

        for page in paginator.paginate(**kwargs):
            try:
                contents = page["Contents"]
            except KeyError:
                break

            for obj in contents:
                key = obj["Key"]
                if key.endswith(suffix):
                    yield obj


def get_matching_s3_keys(
    client: boto3.client, bucket: str, prefix: str = "", suffix: str = ""
):
    """
    ## Code from alexwlchan (https://alexwlchan.net/2019/07/listing-s3-keys/)
    Generate the keys in an S3 bucket.

    :param client: S3 client.
    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch keys that start with this prefix (optional).
    :param suffix: Only fetch keys that end with this suffix (optional).
    return a list of the matching objects
    """

    # Select the relevant bucket
    s3_keys = [
        obj["Key"] for obj in get_matching_s3_objects(client, bucket, prefix, suffix)
    ]

    return s3_keys


def get_movie_extensions():
    # Specify the formats of the movies to select
    return tuple(["wmv", "mpg", "mov", "avi", "mp4", "MOV", "MP4"])

def get_movies_df(client: boto3.client, bucket: str, prefix: str = "") -> pd.DataFrame:
    """
    Get a DataFrame containing the keys of all movie files in the specified S3 bucket.

    :param client: S3 client.
    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch keys that start with this prefix (optional).
    :return: DataFrame with a single column, 'Key', containing paths to movie files.
    """
    # Get the movie file extensions
    movie_extensions = get_movie_extensions()

    # Retrieve keys that match movie file extensions
    movie_keys = get_matching_s3_keys(client, bucket, prefix, suffix=movie_extensions)

    # Convert the list of movie keys to a DataFrame
    movies_df = pd.DataFrame(movie_keys, columns=["Key"])

    return movies_df

# Function to preview underwater movies
def preview_movie(
    client,
    Key,
):
    """
    It takes a movie filename and its associated metadata and returns a HTML object that can be displayed in the notebook

    :param movie_path: the filename of the movie you want to preview
    :param movie_metadata: the metadata of the movie you want to preview
    :return: HTML object
    """

    movie_path = client.generate_presigned_url(
        "get_object",
        Params={"Bucket": "marine-buv", "Key": Key},
        ExpiresIn=26400,
    )

    # Adjust the width of the video and metadata sections based on your preference
    video_width = "60%"  # Adjust as needed
    metadata_width = "40%"  # Adjust as needed

    html_code = f"""<html>
            <div style="display: flex; align-items: center; width: 100%;">
                <div style="width: {video_width}; padding-right: 10px;">
                    <video width="100%" controls>
                        <source src={movie_path}>
                    </video>
                </div>
            </div>
            </html>"""

    return HTML(html_code)

In [None]:
# Connect to s3 bucket
client = get_aws_client()

In [None]:
# List all movies available
movies_df = get_movies_df(client, "marine-buv", "")

In [None]:
preview_movie(client, "TON_20241125_BUV/TON_056/TON_056.mp4")

End