# Extract Scene Frames from Videos

# required Input

In [27]:
import pandas as pd
%load_ext autoreload
%autoreload 2
import os
input_folder: str = os.path.join("test_video_frames") # folder with (subfolders of) videos
output_folder: str = os.path.join("output_video_frames") # folder with extracted scene images (one subfolder per video), will be created if not existant
project_number: str = "1"
project_name: str = "abc"

file_extensions_video = ['mpeg', 'mp4', 'gif', 'avi', 'mov']

n_scene_frames = 3 #number of frames extracted per scene

image_length_max = 500 # size of extracted frames

from pathlib import Path
project_folder = Path(input_folder).parent.absolute()
project_title = "_".join([project_number, project_name])
print(f"Project: {project_title}")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Project: 1_abc


In [28]:
import os
try:
    import scenedetect
except:
    os.system("pip install scenedetect==0.5.5 --quiet")
    pass

try:
    import cv2
except:
    os.system("pip install opencv-python==4.5.3.56 --quiet")
    pass

try:
    import timm
except:
    os.system("pip install timm")
    pass

try:
    import tqdm
except:
    os.system("pip install tqdm")
    pass

try:
    import sklearn
except:
    os.system("pip install scikit-learn --quiet")
    pass

try:
    import pandas_profiling
except:
    os.system("pip install pandas-profiling[notebook]")
    pass

def load_timm_model(model_name="inception_v4"):
    model = timm.create_model(model_name, pretrained=True, num_classes=0)
    model.eval()
    return model

In [29]:
def load_timm_model(model_name="inception_v4"):
    model = timm.create_model(model_name, pretrained=True, num_classes=0)
    model.eval()
    return model

In [30]:

from src.image_utils import image_resize
from src.embedder import Embedder
from src.video_utils import get_video_format, get_frames

scene_frame_col = "scene_image_path"
timm_models = ["convnext_xlarge_in22k", "ig_resnext101_32x48d", "vgg16", "resnet50d"]
embedder = Embedder(load_timm_model(timm_models[0]))

tmp_dir = "tmp" #to store extracted frames, will be deleted once program is done
if not os.path.isdir(tmp_dir):
    os.makedirs(tmp_dir, exist_ok=True)

if not os.path.isdir(output_folder):
    os.makedirs(output_folder, exist_ok=True)


  model = create_fn(


model config {'input_size': (3, 224, 224), 'interpolation': 'bicubic', 'mean': (0.485, 0.456, 0.406), 'std': (0.229, 0.224, 0.225), 'crop_pct': 0.875, 'crop_mode': 'center'}


In [31]:
import os
video_paths = []
for path, _, files in os.walk(input_folder):
    for f in files:
        f_ext = os.path.splitext(f)[1]
        f_ext = f_ext.replace(".", "")
        if f_ext in file_extensions_video:
            video_paths.append(os.path.join(path, f))

print(f"\ngiven defined file extensions,found {len(video_paths)} videos ")



given defined file extensions,found 1 videos 


In [32]:
from tqdm.notebook import tqdm
import shutil
import os
import pandas as pd
import numpy as np
from pathlib import Path
from src.scene_detection import SceneDetector

# Create scene xlsx folder
scene_xlsx_folder = os.path.join(output_folder, "scene_xlsx")
os.makedirs(scene_xlsx_folder, exist_ok=True)

# Create scene metadata file path
scene_meta_name = "_".join([project_title, "video_scene_meta.xlsx"])
scene_meta_path = os.path.join(project_folder, scene_meta_name)

results = []
for video_path in tqdm(video_paths):
    video_name = os.path.basename(video_path)
    video_name_clean = os.path.splitext(video_name)[0]
    
    # Create output folder for this video FIRST
    output_folder_video = os.path.join(output_folder, video_name_clean)
    os.makedirs(output_folder_video, exist_ok=True)
    
    try:
        print(f"processing video {video_name}")
        video_format, fps = get_video_format(video_path)
        frame_paths = get_frames(video_path, tmp_dir, video_format, max_image_length=image_length_max)
        
        scene_detector = SceneDetector(video_path=video_path, frame_paths=frame_paths, fps=fps, debug_handler=None, verbose=False)
        scene_df = scene_detector.detect_scenes(n_scene_frames=n_scene_frames, embedder=embedder)
        
        # Map frame_path -> frame_id in scene detector
        scene_df["scene_frame_id"] = scene_df["scene_image_path"].apply(lambda p: int(os.path.splitext(os.path.basename(p))[0]))
        unique_scenes = scene_df["scene_number"].unique().tolist()
        print(f"found {len(unique_scenes)} unique scenes with {len(scene_df)} total scene frames")

        # Add video metadata to scene dataframe
        scene_df["video_path"] = video_path
        scene_df["video_name"] = video_name_clean
        
        # COPY SCENE FRAMES TO OUTPUT FOLDER BEFORE CLEANUP
        scene_frames = scene_df["scene_image_path"].tolist()
        copied_frame_paths = []
        
        for i, scene_frame in enumerate(scene_frames):
            if os.path.exists(scene_frame):  # Check if source file exists
                # Create meaningful filename with scene number and frame number
                scene_num = scene_df.iloc[i]["scene_number"]
                scene_frame_name = f"{video_name_clean}_scene_{scene_num:03d}_frame_{i+1:03d}.jpg"
                scene_frame_output_path = os.path.join(output_folder_video, scene_frame_name)
                
                try:
                    shutil.copy2(scene_frame, scene_frame_output_path)
                    copied_frame_paths.append(scene_frame_output_path)
                    print(f"Copied: {scene_frame_name}")
                except Exception as copy_error:
                    print(f"Error copying {scene_frame}: {copy_error}")
            else:
                print(f"Warning: Scene frame not found: {scene_frame}")
        
        print(f"Successfully copied {len(copied_frame_paths)} scene frames to {output_folder_video}")
        
        # Update scene_df with new paths for Excel file
        scene_df_for_excel = scene_df.copy()
        scene_df_for_excel["output_image_path"] = copied_frame_paths[:len(scene_df_for_excel)]
        
        # Save scene dataframe to Excel (keep original paths for reference)
        scene_xlsx_name = os.path.join(scene_xlsx_folder, video_name_clean + ".xlsx")
        scene_df_for_excel.to_excel(scene_xlsx_name, index=False)
        
        # Clean up temporary frame files AFTER copying
        for f in frame_paths:
            if os.path.exists(f):
                os.remove(f)
        
        # Remove the temporary column from dataframe for results
        if "scene_image_path" in scene_df.columns:
            scene_df.drop("scene_image_path", axis=1, inplace=True)
        
        num_unique_scenes = len(unique_scenes)
        num_scene_frames = len(scene_df)
        message = f"ok - copied {len(copied_frame_paths)} images"
        
    except Exception as e:
        scene_xlsx_name = None
        num_unique_scenes = np.nan
        num_scene_frames = np.nan
        message = str(e)
        print(f"!!!!!    Error processing {video_name}: {message}")
        
        # Clean up temporary files even on error
        try:
            if 'frame_paths' in locals():
                for f in frame_paths:
                    if os.path.exists(f):
                        os.remove(f)
        except:
            pass

    results.append([video_path, video_name, scene_xlsx_name, num_unique_scenes, num_scene_frames, message])

# Create and save results dataframe
result_df = pd.DataFrame(results, columns=["video_path", "video_name", "scene_xlsx_path", "num_scenes", "num_scene_frames", "status"])
result_df.to_excel(scene_meta_path, index=False)

print(f"\nProcessing complete! Check the following locations:")
print(f"- Scene images: {output_folder}")
print(f"- Scene Excel files: {scene_xlsx_folder}")
print(f"- Summary report: {scene_meta_path}")

  0%|          | 0/1 [00:00<?, ?it/s]

processing video 100007-22_PL_Bądźmy mistrzami w burzeniu barier. Dołączasz_-1.mp4


VideoManager is deprecated and will be removed.


found 32 unique scenes with 93 total scene frames
Copied: 100007-22_PL_Bądźmy mistrzami w burzeniu barier. Dołączasz_-1_scene_000_frame_001.jpg
Copied: 100007-22_PL_Bądźmy mistrzami w burzeniu barier. Dołączasz_-1_scene_000_frame_002.jpg
Copied: 100007-22_PL_Bądźmy mistrzami w burzeniu barier. Dołączasz_-1_scene_000_frame_003.jpg
Copied: 100007-22_PL_Bądźmy mistrzami w burzeniu barier. Dołączasz_-1_scene_001_frame_004.jpg
Copied: 100007-22_PL_Bądźmy mistrzami w burzeniu barier. Dołączasz_-1_scene_002_frame_005.jpg
Copied: 100007-22_PL_Bądźmy mistrzami w burzeniu barier. Dołączasz_-1_scene_002_frame_006.jpg
Copied: 100007-22_PL_Bądźmy mistrzami w burzeniu barier. Dołączasz_-1_scene_002_frame_007.jpg
Copied: 100007-22_PL_Bądźmy mistrzami w burzeniu barier. Dołączasz_-1_scene_003_frame_008.jpg
Copied: 100007-22_PL_Bądźmy mistrzami w burzeniu barier. Dołączasz_-1_scene_003_frame_009.jpg
Copied: 100007-22_PL_Bądźmy mistrzami w burzeniu barier. Dołączasz_-1_scene_003_frame_010.jpg
Copied: 10

# Add Scene Statistics to result file

In [None]:
import numpy as np
import pandas as pd
import cv2
from PIL import Image
from PIL.Image import Image as PIL_Image
from typing import List
from tqdm.notebook import tqdm

def get_video_stats(capture):
    """Get video statistics from cv2.VideoCapture object"""
    fps = int(capture.get(cv2.CAP_PROP_FPS))
    num_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = num_frames / fps
    return duration, num_frames, fps

def get_frames_by_idx(capture, idx_lst: List[int]) -> List[np.ndarray]:
    """Extract frames by index from video capture"""
    frames = []
    for idx in idx_lst:
        capture.set(cv2.CAP_PROP_POS_FRAMES, idx - 1)
        ret, frame = capture.read()
        if ret:  # Check if frame was successfully read
            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        else:
            print(f"Warning: Could not read frame at index {idx}")
    return frames

def get_video_format(img: PIL_Image):
    """Determine video format based on aspect ratio"""
    if img.width > img.height:
        format_type = "horizontal"
    elif img.height > img.width:
        format_type = "vertical"
    else:
        format_type = "square"
    return format_type

# Process each video in the results dataframe
for idx, row in tqdm(result_df.iterrows(), total=len(result_df)):
    try:
        # Read scene data for this video
        scene_df = pd.read_excel(row["scene_xlsx_path"], index_col=0, engine="openpyxl")
        scene_df.drop_duplicates(subset="scene_number", keep="first", inplace=True)
        
        # Calculate scene statistics
        result_df.loc[idx, "scene_duration_mean"] = scene_df["duration"].mean()
        result_df.loc[idx, "num_scenes"] = len(scene_df)

        # Get video statistics
        capture = cv2.VideoCapture(row["video_path"])
        if not capture.isOpened():
            print(f"Error: Could not open video {row['video_path']}")
            continue
            
        duration, num_frames, fps = get_video_stats(capture)
        result_df.loc[idx, "video_duration"] = duration
        result_df.loc[idx, "num_scenes_per_sec"] = len(scene_df) / duration if duration > 0 else 0
        
        # Calculate scenes in first 5 seconds
        if duration > 5:
            first_5_sec_df = scene_df[scene_df["end_time"] <= 5]  # Use <= instead of <
            result_df.loc[idx, "num_scenes_first_5_sec"] = len(first_5_sec_df)
        elif duration >= 5:  # Handle case where duration is exactly 5
            result_df.loc[idx, "num_scenes_first_5_sec"] = len(scene_df)
        else:
            result_df.loc[idx, "num_scenes_first_5_sec"] = len(scene_df)  # For videos shorter than 5 sec
        
        capture.release()

        # Get frame dimensions and format
        capture = cv2.VideoCapture(row["video_path"])
        if not capture.isOpened():
            print(f"Error: Could not reopen video {row['video_path']}")
            continue
            
        # Get scene frame indices, ensuring they're valid
        scene_frame_ids = scene_df["scene_frame_id"].tolist()
        scene_frame_ids = [int(fid) for fid in scene_frame_ids if not pd.isna(fid)]
        
        if len(scene_frame_ids) == 0:
            print(f"Warning: No valid scene frame IDs for video {row['video_path']}")
            capture.release()
            continue
            
        frames = get_frames_by_idx(capture, scene_frame_ids)
        capture.release()
        
        if len(frames) == 0:
            print(f"Warning: No frames extracted for video {row['video_path']}")
            continue
            
        # Get video dimensions from first frame
        img = Image.fromarray(frames[0])
        result_df.loc[idx, "video_width"] = img.width
        result_df.loc[idx, "video_height"] = img.height
        result_df.loc[idx, "video_width_height_ratio"] = img.width / img.height
        result_df.loc[idx, "video_format"] = get_video_format(img)
        
    except Exception as e:
        print(f"Error processing video {row['video_path']}: {str(e)}")
        # Fill with NaN values for failed processing
        result_df.loc[idx, "scene_duration_mean"] = np.nan
        result_df.loc[idx, "num_scenes"] = np.nan
        result_df.loc[idx, "video_duration"] = np.nan
        result_df.loc[idx, "num_scenes_per_sec"] = np.nan
        result_df.loc[idx, "num_scenes_first_5_sec"] = np.nan
        result_df.loc[idx, "video_width"] = np.nan
        result_df.loc[idx, "video_height"] = np.nan
        result_df.loc[idx, "video_width_height_ratio"] = np.nan
        result_df.loc[idx, "video_format"] = np.nan

# Save updated results
result_df.to_excel(scene_meta_path, index=False)
print("Processing complete!")
result_df.head()

  0%|          | 0/1 [00:00<?, ?it/s]

Error processing video test_video_frames\100007-22_PL_Bądźmy mistrzami w burzeniu barier. Dołączasz_-1.mp4: Index(['scene_number'], dtype='object')
Processing complete!


Unnamed: 0,video_path,video_name,scene_xlsx_path,num_scenes,num_scene_frames,status,scene_duration_mean,video_duration,num_scenes_per_sec,num_scenes_first_5_sec,video_width,video_height,video_width_height_ratio,video_format
0,test_video_frames\100007-22_PL_Bądźmy mistrzam...,100007-22_PL_Bądźmy mistrzami w burzeniu barie...,output_video_frames\scene_xlsx\100007-22_PL_Bą...,,93,ok - copied 93 images,,,,,,,,


In [39]:
result_df.head()

Unnamed: 0,video_path,video_name,scene_xlsx_path,num_scenes,num_scene_frames,status,scene_duration_mean,video_duration,num_scenes_per_sec,num_scenes_first_5_sec,video_width,video_height,video_width_height_ratio,video_format
0,test_video_frames\100007-22_PL_Bądźmy mistrzam...,100007-22_PL_Bądźmy mistrzami w burzeniu barie...,output_video_frames\scene_xlsx\100007-22_PL_Bą...,,93,ok - copied 93 images,,,,,,,,


# Create Profile HTML

In [40]:
from pandas_profiling import ProfileReport #https://github.com/ydataai/pandas-profiling?s=09

profile = ProfileReport(result_df, title=project_title + " Video Statistics")
profile.to_file(os.path.join(project_folder,"_".join([project_title, "ebay_video_scenes.html"])))


Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 14/14 [00:00<00:00, 14014.38it/s]


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]