In [None]:
import os
import csv
import re
from collections import defaultdict

def analyze_video_sequences(folder_path, output_csv, video_extensions):
    """
    Groups videos by Camera ID (c#), finds the sequence gaps (v#), 
    and saves the results to a structured CSV.
    """
    
    # 1. Parse and Group Video Files
    camera_data = defaultdict(list)
    
    # Regex to extract the camera ID (c#) and sequence number (v#)
    # The 'r' ensures the string is treated as raw, and the parentheses create capture groups
    pattern = re.compile(r'c(\d+)_v(\d+)\.(?:' + '|'.join(ext.lstrip('.') for ext in video_extensions) + r')$', re.IGNORECASE)

    print("üîé Starting file analysis...")

    # Iterate through all files in the directory
    for file_name in os.listdir(folder_path):
        # Full path is only needed to check if it's a file
        full_path = os.path.join(folder_path, file_name)
        
        if os.path.isfile(full_path):
            match = pattern.match(file_name)
            
            if match:
                # Group 1 is the Camera ID (c#), Group 2 is the Sequence Number (v#)
                camera_id = int(match.group(1))
                video_seq = int(match.group(2))
                
                # Store the full file name and the sequence number
                camera_data[camera_id].append({
                    'name': file_name,
                    'sequence': video_seq
                })

    if not camera_data:
        print("‚ùå No matching video files found with the 'c#_v#' pattern.")
        return

    # 2. Process Data: Sort, Find Gaps, and Prepare for CSV
    
    # Get sorted Camera IDs (e.g., [1, 2, 10, ...])
    sorted_camera_ids = sorted(camera_data.keys())
    
    # This list will hold the final, structured rows for the CSV
    csv_rows = []
    
    # Determine the maximum length needed for the column structure
    max_len = 0
    
    # Process each camera ID to find gaps and sort videos
    processed_data = {}
    for cam_id in sorted_camera_ids:
        videos = camera_data[cam_id]
        
        # Sort videos by their sequence number (v#)
        videos.sort(key=lambda x: x['sequence'])
        
        # Extract sequences and find min/max
        sequences = [v['sequence'] for v in videos]
        min_seq = 1 # We assume the sequence starts at 1
        max_seq = sequences[-1] if sequences else 0
        
        # Find missing numbers in the sequence
        present_set = set(sequences)
        all_expected_set = set(range(min_seq, max_seq + 1))
        
        missing_sequences = sorted(list(all_expected_set - present_set))
        
        # Store results
        processed_data[cam_id] = {
            'names': [v['name'] for v in videos],
            'missing': [f"v{m}" for m in missing_sequences]
        }
        
        # Update max length for proper column construction
        max_len = max(max_len, len(videos) + len(processed_data[cam_id]['missing']) + 1)


    # 3. Construct CSV Rows
    
    # Create the Header Row (e.g., ['C1 Video', 'C1 Missing', 'C2 Video', 'C2 Missing', ...])
    header = []
    for cam_id in sorted_camera_ids:
        header.extend([f"C{cam_id} Video Name", f"C{cam_id} Missing Sequence"])
    csv_rows.append(header)
    
    # Determine the max number of rows needed (names + missing) for the longest column
    num_rows = 0
    for cam_id in sorted_camera_ids:
        # The list of entries to display for a camera is its names + its missing numbers
        num_rows = max(num_rows, len(processed_data[cam_id]['names']) + len(processed_data[cam_id]['missing']))
        
    # Generate the Data Rows
    for i in range(num_rows):
        row = []
        for cam_id in sorted_camera_ids:
            data = processed_data[cam_id]
            
            # Combine the list of video names and the list of missing sequences
            # Names go first, then missing sequences
            combined_list = data['names'] + data['missing']
            
            # C# Video Name column:
            video_name_entry = combined_list[i] if i < len(data['names']) else ''
            row.append(video_name_entry)

            # C# Missing Sequence column:
            # Missing entries start where the video names end
            missing_index = i - len(data['names'])
            missing_entry = data['missing'][missing_index] if i >= len(data['names']) and missing_index < len(data['missing']) else ''
            row.append(missing_entry)
            
        csv_rows.append(row)

    # 4. Write the CSV File
    try:
        with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerows(csv_rows)
        
        print(f"\n‚úÖ Successfully analyzed data for {len(sorted_camera_ids)} cameras.")
        print(f"File saved to **{output_csv}**")
        
    except Exception as e:
        print(f"‚ùå An error occurred while writing the CSV: {e}")

# --- Configuration ---
# 1. SET THE FOLDER PATH:
VIDEO_FOLDER = "C:/Users/LT/Downloads/fp/FP_S2"

# 2. SET THE OUTPUT FILE NAME:
OUTPUT_FILE = "C:/Users/LT/Downloads/video_inventory.csv"

# 3. DEFINE VIDEO EXTENSIONS:
EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv'] 

# --- Run the function ---
if __name__ == "__main__":
    analyze_video_sequences(VIDEO_FOLDER, OUTPUT_FILE, EXTENSIONS)

üîé Starting file analysis...
‚ùå No matching video files found with the 'c#_v#' pattern.
