In [None]:
import os
import csv
import re
from collections import defaultdict

def analyze_video_sequences(folder_path, output_csv, video_extensions):
    """
    Groups videos by Camera ID (c#), finds the sequence gaps (v#), 
    and saves the results to a structured CSV.
    """
    
    # 1. Parse and Group Video Files
    camera_data = defaultdict(list)
    
    # Regex to extract the camera ID (c#) and sequence number (v#)
    # The 'r' ensures the string is treated as raw, and the parentheses create capture groups
    pattern = re.compile(r'c(\d+)_v(\d+)\.(?:' + '|'.join(ext.lstrip('.') for ext in video_extensions) + r')$', re.IGNORECASE)

    print("üîé Starting file analysis...")

    # Iterate through all files in the directory
    for file_name in os.listdir(folder_path):
        # Full path is only needed to check if it's a file
        full_path = os.path.join(folder_path, file_name)
        
        if os.path.isfile(full_path):
            match = pattern.match(file_name)
            
            if match:
                # Group 1 is the Camera ID (c#), Group 2 is the Sequence Number (v#)
                camera_id = int(match.group(1))
                video_seq = int(match.group(2))
                
                # Store the full file name and the sequence number
                camera_data[camera_id].append({
                    'name': file_name,
                    'sequence': video_seq
                })

    if not camera_data:
        print("‚ùå No matching video files found with the 'c#_v#' pattern.")
        return

    # 2. Process Data: Sort, Find Gaps, and Prepare for CSV
    
    # Get sorted Camera IDs (e.g., [1, 2, 10, ...])
    sorted_camera_ids = sorted(camera_data.keys())
    
    # This list will hold the final, structured rows for the CSV
    csv_rows = []
    
    # Determine the maximum length needed for the column structure
    max_len = 0
    
    # Process each camera ID to find gaps and sort videos
    processed_data = {}
    for cam_id in sorted_camera_ids:
        videos = camera_data[cam_id]
        
        # Sort videos by their sequence number (v#)
        videos.sort(key=lambda x: x['sequence'])
        
        # Extract sequences and find min/max
        sequences = [v['sequence'] for v in videos]
        min_seq = 1 # We assume the sequence starts at 1
        max_seq = sequences[-1] if sequences else 0
        
        # Find missing numbers in the sequence
        present_set = set(sequences)
        all_expected_set = set(range(min_seq, max_seq + 1))
        
        missing_sequences = sorted(list(all_expected_set - present_set))
        
        # Store results
        processed_data[cam_id] = {
            'names': [v['name'] for v in videos],
            'missing': [f"v{m}" for m in missing_sequences]
        }
        
        # Update max length for proper column construction
        max_len = max(max_len, len(videos) + len(processed_data[cam_id]['missing']) + 1)


    # 3. Construct CSV Rows
    
    # Create the Header Row (e.g., ['C1 Video', 'C1 Missing', 'C2 Video', 'C2 Missing', ...])
    header = []
    for cam_id in sorted_camera_ids:
        header.extend([f"C{cam_id} Video Name", f"C{cam_id} Missing Sequence"])
    csv_rows.append(header)
    
    # Determine the max number of rows needed (names + missing) for the longest column
    num_rows = 0
    for cam_id in sorted_camera_ids:
        # The list of entries to display for a camera is its names + its missing numbers
        num_rows = max(num_rows, len(processed_data[cam_id]['names']) + len(processed_data[cam_id]['missing']))
        
    # Generate the Data Rows
    for i in range(num_rows):
        row = []
        for cam_id in sorted_camera_ids:
            data = processed_data[cam_id]
            
            # Combine the list of video names and the list of missing sequences
            # Names go first, then missing sequences
            combined_list = data['names'] + data['missing']
            
            # C# Video Name column:
            video_name_entry = combined_list[i] if i < len(data['names']) else ''
            row.append(video_name_entry)

            # C# Missing Sequence column:
            # Missing entries start where the video names end
            missing_index = i - len(data['names'])
            missing_entry = data['missing'][missing_index] if i >= len(data['names']) and missing_index < len(data['missing']) else ''
            row.append(missing_entry)
            
        csv_rows.append(row)

    # 4. Write the CSV File
    try:
        with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerows(csv_rows)
        
        print(f"\n‚úÖ Successfully analyzed data for {len(sorted_camera_ids)} cameras.")
        print(f"File saved to **{output_csv}**")
        
    except Exception as e:
        print(f"‚ùå An error occurred while writing the CSV: {e}")

# --- Configuration ---
# 1. SET THE FOLDER PATH:
VIDEO_FOLDER = "C:/Users/LT/Downloads/fp/FP_S2"

# 2. SET THE OUTPUT FILE NAME:
OUTPUT_FILE = "C:/Users/LT/Downloads/video_inventory.csv"

# 3. DEFINE VIDEO EXTENSIONS:
EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv'] 

# --- Run the function ---
if __name__ == "__main__":
    analyze_video_sequences(VIDEO_FOLDER, OUTPUT_FILE, EXTENSIONS)

üîé Starting file analysis...

‚úÖ Successfully analyzed data for 9 cameras.
File saved to **C:/Users/LT/Downloads/video_inventory.csv**


: 

In [None]:
import pandas as pd
import re

def find_total_missing_sequences(input_csv, output_csv):
    """
    Finds sequence numbers (v#) that are absent from BOTH the 'Video Name' 
    and 'csvs' columns for each camera group, relative to the highest v# found.
    """
    try:
        df = pd.read_csv(input_csv)
    except FileNotFoundError:
        print(f"‚ùå Error: Input file not found at {input_csv}")
        return
    
    # Find all unique camera IDs (N)
    camera_ids = sorted(
        set(
            int(re.search(r'C(\d+)', col).group(1))
            for col in df.columns
            if re.search(r'C(\d+) Video Name', col)
        )
    )

    if not camera_ids:
        print("‚ùå Could not identify camera groups (C1, C2, etc.) in the columns.")
        return

    video_pattern = re.compile(r'c\d+_v(\d+)(\..+)?$', re.IGNORECASE)
    new_columns_data = {}
    max_total_missing_len = 0 
    
    print("üîé Starting total missing sequence analysis...")

    for cam_id in camera_ids:
        c_name_col = f'C{cam_id} Video Name'
        c_csvs_col = f'C{cam_id} csvs'
        new_total_missing_col = f'C{cam_id} Total Missing'
        
        # --- 1. Extract and Combine Sequences ---
        
        def extract_sequence(filename):
            if pd.isna(filename):
                return None
            match = video_pattern.search(str(filename))
            return int(match.group(1)) if match else None

        names_sequences = set(df[c_name_col].apply(extract_sequence).dropna().unique())
        csvs_sequences = set(df[c_csvs_col].apply(extract_sequence).dropna().unique())
        
        all_present_sequences = names_sequences.union(csvs_sequences)

        # --- 2. Determine Range and Find Gaps ---
        
        max_seq = max(all_present_sequences) if all_present_sequences else 0
        max_seq_int = int(max_seq) # Corrected TypeError
        
        expected_range = set(range(1, max_seq_int + 1))
        
        total_missing_sequences = sorted(list(expected_range - all_present_sequences))

        # --- 3. Store Results ---
        formatted_missing = [f"v{seq}" for seq in total_missing_sequences]
        
        new_columns_data[new_total_missing_col] = formatted_missing
        max_total_missing_len = max(max_total_missing_len, len(formatted_missing))

        print(f"  - C{cam_id}: Found {len(formatted_missing)} sequences totally missing up to v{max_seq_int}.")

    # --- 3.5 PAD ALL NEW COLUMNS TO MAX LENGTH (THE NEW FIX) ---
    for col_name, data_list in new_columns_data.items():
        padding_needed = max_total_missing_len - len(data_list)
        data_list.extend([''] * padding_needed)

    # --- 4. Merge Results and Save CSV ---
    
    # 4a. Create the DataFrame for the new columns (succeeds due to padding)
    new_df = pd.DataFrame(new_columns_data)
    
    # 4b. Determine the maximum required length for the final DataFrame
    original_rows = len(df)
    max_len_required = max(original_rows, max_total_missing_len)
    
    # 4c. Pad the ORIGINAL DataFrame (df) if it is shorter than the required length
    if original_rows < max_len_required:
        padding_rows = max_len_required - original_rows
        padding_df_orig = pd.DataFrame('', 
                                     index=range(original_rows, max_len_required), 
                                     columns=df.columns)
        df = pd.concat([df, padding_df_orig])
    
    # 4d. Pad the NEW DataFrame (new_df) if it is shorter than the required length
    # This might seem redundant, but it guards against issues if max_total_missing_len was 0
    # and original_rows > 0 (or vice-versa).
    if len(new_df) < max_len_required:
        padding_rows = max_len_required - len(new_df)
        padding_df_new = pd.DataFrame('', 
                                      index=range(len(new_df), max_len_required), 
                                      columns=new_df.columns)
        new_df = pd.concat([new_df, padding_df_new])
        
    # 4e. Concatenate horizontally (they now have the same length)
    final_df = pd.concat([df.reset_index(drop=True), new_df.reset_index(drop=True)], axis=1)

    # Save the final DataFrame to a new CSV file
    final_df.to_csv(output_csv, index=False, encoding='utf-8')
    print(f"\n‚úÖ Analysis complete. Results saved to **{output_csv}**")


# --- Configuration ---

INPUT_FILE = "C:/Users/LT/Downloads/video_inventory (1).csv"  
OUTPUT_FILE = "C:/Users/LT/Downloads/video_inventory_over.csv"

# --- Run the function ---
if __name__ == "__main__":
    find_total_missing_sequences(INPUT_FILE, OUTPUT_FILE)

üîé Starting total missing sequence analysis...
  - C1: Found 137 sequences totally missing up to v267.
  - C2: Found 152 sequences totally missing up to v279.
  - C3: Found 182 sequences totally missing up to v280.
  - C4: Found 149 sequences totally missing up to v329.
  - C5: Found 140 sequences totally missing up to v304.
  - C6: Found 140 sequences totally missing up to v271.
  - C7: Found 195 sequences totally missing up to v281.
  - C8: Found 168 sequences totally missing up to v260.
  - C9: Found 207 sequences totally missing up to v257.
  - C10: Found 202 sequences totally missing up to v271.


ValueError: All arrays must be of the same length