# GoPro Video Concatenation
This notebook uses the `sftk` to find and concatenate multi-part GoPro video files stored in an S3 bucket. It identifies groups of videos belonging to the same `DropID` and combines them into a single MP4 file.

# Requirements
This section imports the necessary libraries for the script to run. These include libraries for handling file paths, interacting with AWS S3, processing videos, and managing concurrent operations.

In [None]:
from sftk.s3_handler import S3Handler
from sftk.video_handler import VideoProcessor

In [None]:
# --- Configuration ---
S3_PREFIX = "media/HOR_20240408_BUV"  # e.g., "media/SURVEY_ID"
GOPRO_PREFIX = "G"  # Prefix for GoPro video files (e.g., 'GX', 'GH')
DELETE_ORIGINALS = True  # Set to True to delete original video parts after concatenation
TEST_MODE = False  # If True, concatenated files are not uploaded and originals are not deleted (TBC, process only the first drop)
PARALLEL_DROPS=2 # Number of drops to process simultaneously
SEQUENTIAL_DOWNLOAD=False  # If True, files within each drop downloaded one at a time
DOWNLOAD_THREADS=4 # Max number of parallel downloads for a single drop

# 1. Initialize S3Handler and VideoProcessor
s3_handler = S3Handler()
processor = VideoProcessor(s3_handler, 
prefix=S3_PREFIX, 
gopro_prefix=GOPRO_PREFIX, 
delete_originals= DELETE_ORIGINALS, 
test_mode=TEST_MODE, 
download_threads=DOWNLOAD_THREADS,
parallel_drops=PARALLEL_DROPS,  
sequential_download=SEQUENTIAL_DOWNLOAD
)


# 2. Preview the movies that will be processed
display(processor.filtered_df)

In [None]:
# Process go_pro_files from drops
processor.process_gopro_videos()

This section finds individual files that can be removed because a concatenated version already exists.

In [None]:
# TBCFunctionality to be added to processor
# Get individual files that can be removed (concatenated version exists)
files_to_remove = processor.find_already_concatenated_movies_df(size_tolerance=0.01)

# Preview the files that will be removed
for _, row in files_to_remove.iterrows():
    print(f"Safe to remove: {row['Key']} ({row['Size']/1024/1024:.1f}MB)")

This section removes the redundant files. The `3_handler.s3.delete_object` line is commented out for safety. Uncomment it to actually delete the files.

In [None]:
# Remove redundant files
for _, row in files_to_remove.iterrows():
    print(f"Removing: {row['Key']} ({row['Size']/1024/1024:.1f}MB)")
    # s3_handler.s3.delete_object(Bucket=s3_handler.bucket, Key=row['Key'])

This section enables the code to be run in NESI

In [None]:
import subprocess
import os

# --- Configuration ---
S3_PREFIX = "media/BNP_20210127"  # e.g., "media/SURVEY_ID"
GOPRO_PREFIX = "G"  # Prefix for GoPro video files (e.g., 'GX', 'GH')
DELETE_ORIGINALS = False  # Set to True to delete original video parts after concatenation
TEST_MODE = False  # If True, concatenated files are not uploaded and originals are not deleted (TBC, process only the first drop)
PARALLEL_DROPS=1 # Number of drops to process simultaneously
SEQUENTIAL_DOWNLOAD=False  # If True, files within each drop downloaded one at a time
DOWNLOAD_THREADS=4 # Max number of parallel downloads for a single drop


def setup_ffmpeg_nesi():
    """Setup FFmpeg on NeSI by loading the module in the Python environment."""
    
    # Get the module initialization script
    module_init_script = """
source /etc/profile.d/modules.sh
module purge
module load FFmpeg/4.2.2-GCCcore-9.2.0
env
"""
    
    # Run the script and capture the environment
    result = subprocess.run(
        ["bash", "-c", module_init_script],
        capture_output=True,
        text=True,
        timeout=30
    )
    
    if result.returncode != 0:
        print(f"❌ Error loading module: {result.stderr}")
        return False
    
    # Parse the environment variables from the module
    for line in result.stdout.split('\n'):
        if '=' in line:
            key, _, value = line.partition('=')
            # Only update PATH and LD_LIBRARY_PATH related vars
            if key in ['PATH', 'LD_LIBRARY_PATH', 'LIBRARY_PATH']:
                os.environ[key] = value
    
    # Verify ffmpeg works
    try:
        test_result = subprocess.run(
            ["ffmpeg", "-version"],
            capture_output=True,
            text=True,
            timeout=5
        )
        
        if test_result.returncode == 0:
            print("✓ FFmpeg loaded successfully via module system")
            print(test_result.stdout.split('\n')[0])
            return True
        else:
            print(f"❌ FFmpeg test failed: {test_result.stderr}")
            return False
            
    except Exception as e:
        print(f"❌ Error testing ffmpeg: {e}")
        return False

# Run the setup
if setup_ffmpeg_nesi():
    print("\n✓ Ready to process videos")
    
    # Now initialize your processor
    from sftk.s3_handler import S3Handler
    from sftk.video_handler import VideoProcessor
    
    s3_handler = S3Handler()
    processor = VideoProcessor(
        s3_handler, 
        prefix=S3_PREFIX, 
        gopro_prefix=GOPRO_PREFIX, 
        delete_originals=DELETE_ORIGINALS, 
        test_mode=TEST_MODE, 
        download_threads=DOWNLOAD_THREADS,
        parallel_drops=PARALLEL_DROPS,  
        sequential_download=SEQUENTIAL_DOWNLOAD
    )
    # Process go_pro_files from drops
    processor.process_gopro_videos()
else:
    print("\n❌ Failed to setup FFmpeg. Check module availability.")