# Rename files in S3 given an CSV file with old and new file names

This notebook is part of the Spyfish Aotearoa data cleaning effort.


In [None]:
# Last change: 2025.10.07

In [1]:
import pandas as pd
import os

from sftk.common import LOCAL_DATA_FOLDER_PATH, MOVIE_EXTENSIONS
from sftk.s3_handler import S3Handler

# Connect to S3

In [2]:
s3_handler = S3Handler()

2025-10-07 17:10:19,188 - INFO - s3_handler.py:95 - Created a new instance of the S3Handler class.


# Get S3 keys of movies of interest

In [10]:
foo = s3_handler.get_file_paths_set_from_s3(prefix="media/AHE",suffixes=".mp4")

In [25]:
import csv

output_csv_file = 'file_paths_per_row.csv'

with open(output_csv_file, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    # You might want a header to describe the data
    writer.writerow(["File Path"])

    # Iterate through the set and write each item as a new row
    # Note: Sets are unordered, so the order of rows in the CSV won't be guaranteed.
    # If order matters, convert to a list and sort first: for item in sorted(list(foo)):
    for item in foo:
        writer.writerow([item])

print(f"File paths saved to {output_csv_file}")

File paths saved to file_paths_per_row.csv


# Read the csv with old and new filenames (S3 keys)

Change variables here to match your use case 

In [12]:
# define the path of the csv
file_path = "rename_movies.csv"

# Define the names of the columns containing the old names and the new names
new_name_column = "NEW"
old_name_column = "OLD"

In [13]:
# Read the CSV file containing  into a DataFrame
rename_csv_df = pd.read_csv(file_path)

# Extract the dict with keys with old name and values with the new name
rename_pairs = dict(zip(rename_csv_df[old_name_column], rename_csv_df[new_name_column]))

# review the dictionary
print(rename_pairs)

{'media/OKU_20250226_BUV/OKU_20250226_BUV_OKU_100_01/GX010036 - OKU100.MP4': 'media/OKU_20250221_BUV/OKU_20250221_BUV_OKU_100_01_01/OKU_20250221_BUV_OKU_100_01.mp4', 'media/OKU_20250226_BUV/OKU_20250226_BUV_OKU_101_01/GX010037 - OKU101.MP4': 'media/OKU_20250221_BUV/OKU_20250221_BUV_OKU_101_01_01/OKU_20250221_BUV_OKU_101_01.mp4', 'media/OKU_20250226_BUV/OKU_20250226_BUV_OKU_102_01/GX010011.MP4': 'media/OKU_20250221_BUV/OKU_20250221_BUV_OKU_102_01_01/OKU_20250221_BUV_OKU_102_01.mp4', 'media/OKU_20250226_BUV/OKU_20250226_BUV_OKU_103_01/GX010006 OKU103.MP4': 'media/OKU_20250221_BUV/OKU_20250221_BUV_OKU_103_01_01/OKU_20250221_BUV_OKU_103_01.mp4', 'media/OKU_20250226_BUV/OKU_20250226_BUV_OKU_104_01/GX010007.MP4': 'media/OKU_20250221_BUV/OKU_20250221_BUV_OKU_104_01_01/OKU_20250221_BUV_OKU_104_01.mp4', 'media/OKU_20250226_BUV/OKU_20250226_BUV_OKU_105_01/GX010038 - OKU105.MP4': 'media/OKU_20250221_BUV/OKU_20250221_BUV_OKU_105_01_01/OKU_20250221_BUV_OKU_105_01.mp4', 'media/OKU_20250226_BUV/OKU_2

# Rename files in the bucket.

Try_run does not rename, but runs the code to see what will be affected, change to False when ready.

In [None]:
s3_handler = S3Handler()
s3_handler.rename_s3_objects_from_dict(rename_pairs, suffixes=MOVIE_EXTENSIONS, try_run=False)

2025-10-07 17:14:07,442 - INFO - s3_handler.py:406 - Renamed: media/OKU_20250226_BUV/OKU_20250226_BUV_OKU_100_01/GX010036 - OKU100.MP4 ➜ media/OKU_20250221_BUV/OKU_20250221_BUV_OKU_100_01_01/OKU_20250221_BUV_OKU_100_01.mp4
2025-10-07 17:14:28,125 - INFO - s3_handler.py:406 - Renamed: media/OKU_20250226_BUV/OKU_20250226_BUV_OKU_101_01/GX010037 - OKU101.MP4 ➜ media/OKU_20250221_BUV/OKU_20250221_BUV_OKU_101_01_01/OKU_20250221_BUV_OKU_101_01.mp4
2025-10-07 17:14:53,010 - INFO - s3_handler.py:406 - Renamed: media/OKU_20250226_BUV/OKU_20250226_BUV_OKU_102_01/GX010011.MP4 ➜ media/OKU_20250221_BUV/OKU_20250221_BUV_OKU_102_01_01/OKU_20250221_BUV_OKU_102_01.mp4
2025-10-07 17:15:19,958 - INFO - s3_handler.py:406 - Renamed: media/OKU_20250226_BUV/OKU_20250226_BUV_OKU_103_01/GX010006 OKU103.MP4 ➜ media/OKU_20250221_BUV/OKU_20250221_BUV_OKU_103_01_01/OKU_20250221_BUV_OKU_103_01.mp4
2025-10-07 17:15:42,874 - INFO - s3_handler.py:406 - Renamed: media/OKU_20250226_BUV/OKU_20250226_BUV_OKU_104_01/GX0100

In [None]:

# End