# Shot Boundary Detection

In [3]:
import os

import cv2
import re
import numpy as np
from scenedetect import VideoManager, SceneManager
from scenedetect.detectors import ContentDetector, ThresholdDetector
from tqdm import tqdm


# Shot Boundary Detection System

This code performs shot boundary detection in video files using a combination of techniques, including content and threshold detection, histogram comparison, and optical flow analysis. Detected shot boundaries and keyframes are extracted and saved for further processing. 

### Techniques Used:
- **ContentDetector**: Detects abrupt changes in frame content.
- **ThresholdDetector**: Detects significant changes in frame properties.
- **Histogram Comparison**: Identifies gradual transitions based on histogram differences.
- **Optical Flow Analysis**: Measures motion magnitude to detect shot boundaries.

The results are stored in specified output directories, with detected scenes saved to files and keyframes extracted from the start of each scene.



In [None]:
# Configuration
input_dir = 'preprocessed_videos/'
output_dir = 'shot_boundaries/'
keyframe_dir = 'keyframes/'

# Configuration Google Colab
# input_dir = '/content/drive/MyDrive/preprocessed_videos'
# output_dir = '/content/drive/MyDrive/shot_boundaries'
# keyframe_dir = '/content/drive/MyDrive/keyframes'

min_scene_length = 15  # Minimum length of a scene in frames
threshold = 30.0  # Threshold for the ThresholdDetector
min_scene_len = 2  # Minimum number of frames a scene should last
hist_threshold = 0.4  # Threshold for histogram comparison

# Ensure the output and keyframe directories exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(keyframe_dir, exist_ok=True)

def calculate_histogram_difference(frame1, frame2):
    hist1 = cv2.calcHist([frame1], [0], None, [256], [0, 256])
    hist2 = cv2.calcHist([frame2], [0], None, [256], [0, 256])
    cv2.normalize(hist1, hist1)
    cv2.normalize(hist2, hist2)
    return cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL)

def detect_shot_boundaries(video_path, output_path, keyframe_path):
    if os.path.exists(output_path):
        print(f"Output file {output_path} already exists. Skipping shot boundary detection.")
        return
    
    video_manager = VideoManager([video_path])
    scene_manager = SceneManager()

    # Add ContentDetector and ThresholdDetector
    scene_manager.add_detector(ContentDetector(threshold=30.0, min_scene_len=min_scene_length))
    scene_manager.add_detector(ThresholdDetector(threshold=threshold, min_scene_len=min_scene_len))

    video_manager.set_downscale_factor()
    video_manager.start()
    scene_manager.detect_scenes(frame_source=video_manager)
    scenes = scene_manager.get_scene_list()
    print(f"Detected {len(scenes)} scenes in video {video_path}")

    # Additional processing for gradual transitions
    cap = cv2.VideoCapture(video_path)
    prev_frame = None
    prev_gray = None
    frame_num = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        if prev_frame is not None:
            hist_diff = calculate_histogram_difference(prev_frame, frame)
            if hist_diff < hist_threshold:
                # Gradual transition detected
                scenes.append((frame_num, frame_num + min_scene_len))
            # Motion analysis using optical flow
            if prev_gray is not None:
                flow = cv2.calcOpticalFlowFarneback(prev_gray, gray_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)
                mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
                motion_magnitude = np.mean(mag)
                if motion_magnitude > threshold:
                    scenes.append((frame_num, frame_num + min_scene_len))
        prev_frame = frame
        prev_gray = gray_frame
        frame_num += 1
    cap.release()

    # Remove duplicate and sort scenes
    scenes = sorted(list(set(scenes)))
    print(f"Total scenes after processing: {len(scenes)}")

    # Save shot boundaries to a file
    with open(output_path, 'w') as f:
        for start_time, end_time in scenes:
            f.write(f"{start_time}, {end_time}\n")
            # f.write(f"{start_time.get_seconds()}, {end_time.get_seconds()}\n")
            # f.write(f"{start_time.get_frames()}, {end_time.get_frames()}\n")
        print(f"Shot boundaries saved to {output_path}")

    # Extract keyframes for each detected scene
    cap = cv2.VideoCapture(video_path)
    for start, end in scenes:
        keyframe_filename = os.path.join(keyframe_path, f"{os.path.basename(video_path)}_start_{start}.jpg")
        if not os.path.exists(keyframe_filename):
            cap.set(cv2.CAP_PROP_POS_FRAMES, int(start))
            ret, frame = cap.read()
            if ret:
                cv2.imwrite(keyframe_filename, frame)
    cap.release()
    print(f"Keyframes saved to {keyframe_path}")


def process_videos(video_files, output_directory, keyframe_directory):
    for video_file in tqdm(video_files, desc="Detecting Shot Boundaries", unit="video"):
        output_file = os.path.join(output_directory, os.path.splitext(os.path.basename(video_file))[0] + '_shots.txt')
        keyframe_path = os.path.join(keyframe_directory, os.path.splitext(os.path.basename(video_file))[0])
        os.makedirs(keyframe_path, exist_ok=True)
        try:
            detect_shot_boundaries(video_file, output_file, keyframe_path)
        except Exception as e:
            print(f"Error processing {video_file}: {e}")

def get_video_files(input_directory):
    video_files = []
    for root, _, files in os.walk(input_directory):
        for file in files:
            if file.endswith(('.mp4', '.avi', '.mov', '.mkv')):
                match = re.search(r'\d+', file)
                if match:
                    number = int(match.group())
                    if 100 <= number <= 149:
                        video_files.append(os.path.join(root, file))
    return video_files


print("Starting shot boundary detection...")
video_files = get_video_files(input_dir)
print(f"Found {len(video_files)} video files to process.")
print("Video files:", video_files)
process_videos(video_files, output_dir, keyframe_dir)
print("Shot boundary detection completed successfully.") 

Starting shot boundary detection...
Found 50 video files to process.
Video files: ['preprocessed_videos/00120/00120.mp4', 'preprocessed_videos/00118/00118.mp4', 'preprocessed_videos/00127/00127.mp4', 'preprocessed_videos/00111/00111.mp4', 'preprocessed_videos/00129/00129.mp4', 'preprocessed_videos/00116/00116.mp4', 'preprocessed_videos/00142/00142.mp4', 'preprocessed_videos/00145/00145.mp4', 'preprocessed_videos/00128/00128.mp4', 'preprocessed_videos/00117/00117.mp4', 'preprocessed_videos/00110/00110.mp4', 'preprocessed_videos/00119/00119.mp4', 'preprocessed_videos/00126/00126.mp4', 'preprocessed_videos/00121/00121.mp4', 'preprocessed_videos/00144/00144.mp4', 'preprocessed_videos/00143/00143.mp4', 'preprocessed_videos/00103/00103.mp4', 'preprocessed_videos/00104/00104.mp4', 'preprocessed_videos/00132/00132.mp4', 'preprocessed_videos/00135/00135.mp4', 'preprocessed_videos/00134/00134.mp4', 'preprocessed_videos/00133/00133.mp4', 'preprocessed_videos/00105/00105.mp4', 'preprocessed_videos

Detecting Shot Boundaries:   0%|          | 0/50 [00:00<?, ?video/s]VideoManager is deprecated and will be removed.


Output file shot_boundaries/00120_shots.txt already exists. Skipping shot boundary detection.
Detected 185 scenes in video preprocessed_videos/00118/00118.mp4
Total scenes after processing: 321
Shot boundaries saved to shot_boundaries/00118_shots.txt


Detecting Shot Boundaries:   4%|▍         | 2/50 [39:55<15:58:18, 1197.88s/video]VideoManager is deprecated and will be removed.


Keyframes saved to keyframes/00118
Detected 162 scenes in video preprocessed_videos/00127/00127.mp4
Total scenes after processing: 300
Shot boundaries saved to shot_boundaries/00127_shots.txt


Detecting Shot Boundaries:   6%|▌         | 3/50 [52:16<13:09:07, 1007.39s/video]VideoManager is deprecated and will be removed.


Keyframes saved to keyframes/00127
Output file shot_boundaries/00111_shots.txt already exists. Skipping shot boundary detection.
Output file shot_boundaries/00129_shots.txt already exists. Skipping shot boundary detection.
Output file shot_boundaries/00116_shots.txt already exists. Skipping shot boundary detection.
Output file shot_boundaries/00142_shots.txt already exists. Skipping shot boundary detection.
Detected 4 scenes in video preprocessed_videos/00145/00145.mp4


Detecting Shot Boundaries:  16%|█▌        | 8/50 [1:03:37<4:08:43, 355.33s/video]VideoManager is deprecated and will be removed.


Total scenes after processing: 4
Shot boundaries saved to shot_boundaries/00145_shots.txt
Keyframes saved to keyframes/00145
Detected 168 scenes in video preprocessed_videos/00128/00128.mp4


# Post Processing

# PySceneDetect (same results as above)

In [None]:
import os
import re
import cv2
import numpy as np
from tqdm import tqdm
from scenedetect import VideoManager
from scenedetect import SceneManager
from scenedetect.detectors import ContentDetector, ThresholdDetector

# Configuration
input_dir = 'preprocessed_videos/'
output_dir = 'shot_boundaries_test/'
keyframe_dir = 'keyframes_test/'

# Ensure the output and keyframe directories exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(keyframe_dir, exist_ok=True)

def detect_shot_boundaries(video_path, output_path, keyframe_path, content_threshold=30.0, min_scene_len=15, pixel_threshold=12):
    video_manager = VideoManager([video_path])
    scene_manager = SceneManager()
    
    # Add ContentDetector and ThresholdDetector with customizable parameters
    scene_manager.add_detector(ContentDetector(threshold=content_threshold, min_scene_len=min_scene_len))
    scene_manager.add_detector(ThresholdDetector(threshold=pixel_threshold, min_scene_len=min_scene_len))

    video_manager.set_downscale_factor()
    video_manager.start()
    scene_manager.detect_scenes(frame_source=video_manager)
    scenes = scene_manager.get_scene_list()

    print(f"Detected {len(scenes)} scenes in video {video_path}")

    # Save shot boundaries to a file
    with open(output_path, 'w') as f:
        for start_time, end_time in scenes:
            f.write(f"{start_time.get_frames()}, {end_time.get_frames()}\n")
    print(f"Shot boundaries saved to {output_path}")

    # Extract keyframes for each detected scene
    if not os.path.exists(keyframe_path):
        os.makedirs(keyframe_path)
    
    cap = cv2.VideoCapture(video_path)
    for i, (start, end) in enumerate(scenes):
        cap.set(cv2.CAP_PROP_POS_FRAMES, start.get_frames())
        ret, frame = cap.read()
        if ret:
            keyframe_filename = os.path.join(keyframe_path, f"{os.path.splitext(os.path.basename(video_path))[0]}_Scene-{i + 1}.jpg")
            cv2.imwrite(keyframe_filename, frame)
    cap.release()
    print(f"Keyframes saved to {keyframe_path}")

def process_videos(video_files, output_directory, keyframe_directory, content_threshold=30.0, min_scene_len=15, pixel_threshold=12):
    for video_file in tqdm(video_files, desc="Detecting Shot Boundaries", unit="video"):
        output_file = os.path.join(output_directory, os.path.splitext(os.path.basename(video_file))[0] + '_shots.txt')
        keyframe_path = os.path.join(keyframe_directory, os.path.splitext(os.path.basename(video_file))[0])
        os.makedirs(keyframe_path, exist_ok=True)
        try:
            detect_shot_boundaries(video_file, output_file, keyframe_path, content_threshold, min_scene_len, pixel_threshold)
        except Exception as e:
            print(f"Error processing {video_file}: {e}")

def get_video_files(input_directory):
    video_files = []
    for root, _, files in os.walk(input_directory):
        for file in files:
            if file.endswith(('.mp4', '.avi', '.mov', '.mkv')):
                match = re.search(r'\d+', file)
                if match:
                    number = int(match.group())
                    if 126 <= number <= 149:
                        video_files.append(os.path.join(root, file))
    return video_files

print("Starting shot boundary detection...")
video_files = get_video_files(input_dir)
print(f"Found {len(video_files)} video files to process.")
print("Video files:", video_files)
process_videos(video_files, output_dir, keyframe_dir, content_threshold=30.0, min_scene_len=15, pixel_threshold=12)
print("Shot boundary detection completed successfully.")


# Frame Differencing (poor results but could maybe be used in combination with other techniques)

### Description and Parameters for Shot Boundary Detection using Frame Differencing

This script performs shot boundary detection on a set of videos using the frame differencing technique. Frame differencing involves calculating the difference between consecutive frames to detect abrupt changes and gradual transitions, indicating scene changes. Keyframes for each detected scene are extracted and saved.

#### Parameters:

- **frame_diff_threshold**: Threshold for detecting abrupt changes between consecutive frames. A lower value makes the detection more sensitive.
- **accumulated_diff_threshold**: Threshold for detecting gradual transitions by accumulating frame differences over several frames.
- **min_scene_len**: Minimum length of a scene in frames to filter out very short scenes.

#### Workflow:

1. **Frame Differencing**:
   - The `calculate_frame_difference` function computes the difference between two frames and converts it to grayscale.
   - The `detect_shot_boundaries` function processes each frame to detect scene changes using both abrupt and gradual transitions.
   
2. **Scene Detection**:
   - Abrupt scene changes are detected when the difference between consecutive frames exceeds `frame_diff_threshold`.
   - Gradual transitions are detected when the accumulated difference over several frames exceeds `accumulated_diff_threshold`.
   - Detected scenes are saved to a file.

3. **Keyframe Extraction**:
   - A keyframe is extracted for each detected scene and saved as an image.


In [None]:
import os
import re
import cv2
import numpy as np
from tqdm import tqdm

# Configuration
input_dir = 'preprocessed_videos/'
output_dir = 'shot_boundaries_test/'
keyframe_dir = 'keyframes_test/'

# Ensure the output and keyframe directories exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(keyframe_dir, exist_ok=True)

# Parameters for frame differencing
frame_diff_threshold = 50.0  # Threshold for frame difference
accumulated_diff_threshold = 500.0  # Threshold for accumulated frame difference
min_scene_len = 15  # Minimum length of a scene in frames

def calculate_frame_difference(frame1, frame2):
    diff = cv2.absdiff(frame1, frame2)
    gray_diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
    return np.sum(gray_diff)

def detect_shot_boundaries(video_path, output_path, keyframe_path, frame_diff_threshold=50.0, accumulated_diff_threshold=500.0, min_scene_len=15):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video file {video_path}")
        return

    frame_num = 0
    prev_frame = None
    scenes = []
    current_scene_start = 0
    accumulated_diff = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if prev_frame is not None:
            frame_diff = calculate_frame_difference(prev_frame, frame)
            accumulated_diff += frame_diff

            if frame_diff > frame_diff_threshold and (frame_num - current_scene_start) > min_scene_len:
                scenes.append((current_scene_start, frame_num))
                current_scene_start = frame_num
                accumulated_diff = 0
            elif accumulated_diff > accumulated_diff_threshold and (frame_num - current_scene_start) > min_scene_len:
                scenes.append((current_scene_start, frame_num))
                current_scene_start = frame_num
                accumulated_diff = 0

        prev_frame = frame
        frame_num += 1

    # Add the last scene
    if current_scene_start < frame_num:
        scenes.append((current_scene_start, frame_num))

    cap.release()

    print(f"Detected {len(scenes)} scenes in video {video_path}")

    # Save shot boundaries to a file
    with open(output_path, 'w') as f:
        for start_frame, end_frame in scenes:
            f.write(f"{start_frame}, {end_frame}\n")
    print(f"Shot boundaries saved to {output_path}")

    # Extract keyframes for each detected scene
    if not os.path.exists(keyframe_path):
        os.makedirs(keyframe_path)
    
    cap = cv2.VideoCapture(video_path)
    for i, (start, end) in enumerate(scenes):
        cap.set(cv2.CAP_PROP_POS_FRAMES, start)
        ret, frame = cap.read()
        if ret:
            keyframe_filename = os.path.join(keyframe_path, f"{os.path.splitext(os.path.basename(video_path))[0]}_Scene-{i + 1}.jpg")
            cv2.imwrite(keyframe_filename, frame)
    cap.release()
    print(f"Keyframes saved to {keyframe_path}")

def process_videos(video_files, output_directory, keyframe_directory, frame_diff_threshold=50.0, accumulated_diff_threshold=500.0, min_scene_len=15):
    for video_file in tqdm(video_files, desc="Detecting Shot Boundaries", unit="video"):
        output_file = os.path.join(output_directory, os.path.splitext(os.path.basename(video_file))[0] + '_shots.txt')
        keyframe_path = os.path.join(keyframe_directory, os.path.splitext(os.path.basename(video_file))[0])
        os.makedirs(keyframe_path, exist_ok=True)
        try:
            detect_shot_boundaries(video_file, output_file, keyframe_path, frame_diff_threshold, accumulated_diff_threshold, min_scene_len)
        except Exception as e:
            print(f"Error processing {video_file}: {e}")

def get_video_files(input_directory):
    video_files = []
    for root, _, files in os.walk(input_directory):
        for file in files:
            if file.endswith(('.mp4', '.avi', '.mov', '.mkv')):
                match = re.search(r'\d+', file)
                if match:
                    number = int(match.group())
                    if 126 <= number <= 149:
                        video_files.append(os.path.join(root, file))
    return video_files

print("Starting shot boundary detection...")
video_files = get_video_files(input_dir)
print(f"Found {len(video_files)} video files to process.")
print("Video files:", video_files)
process_videos(video_files, output_dir, keyframe_dir, frame_diff_threshold=50.0, accumulated_diff_threshold=500.0, min_scene_len=15)
print("Shot boundary detection completed successfully.")


# TransNetV2

Not working

In [None]:
import os
import re
import cv2
import numpy as np
from tqdm import tqdm
from transnetv2 import TransNetV2

# Configuration
input_dir = 'preprocessed_videos/'
output_dir = 'shot_boundaries_test/'
keyframe_dir = 'keyframes_test/'

# Ensure the output and keyframe directories exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(keyframe_dir, exist_ok=True)

def detect_shot_boundaries(video_path, output_path, keyframe_path):
    model = TransNetV2()
    
    # Get video frames and predictions
    video_frames, single_frame_predictions, all_frame_predictions = model.predict_video(video_path)
    
    # Get scenes from predictions
    scenes = model.predictions_to_scenes(single_frame_predictions)
    
    print(f"Detected {len(scenes)} scenes in video {video_path}")

    # Save shot boundaries to a file
    with open(output_path, 'w') as f:
        for start_time, end_time in scenes:
            f.write(f"{start_time}, {end_time}\n")
    print(f"Shot boundaries saved to {output_path}")

    # Extract keyframes for each detected scene
    if not os.path.exists(keyframe_path):
        os.makedirs(keyframe_path)
    
    for i, (start, end) in enumerate(scenes):
        keyframe_filename = os.path.join(keyframe_path, f"{os.path.splitext(os.path.basename(video_path))[0]}_Scene-{i + 1}.jpg")
        cv2.imwrite(keyframe_filename, video_frames[start])
    print(f"Keyframes saved to {keyframe_path}")

def process_videos(video_files, output_directory, keyframe_directory):
    for video_file in tqdm(video_files, desc="Detecting Shot Boundaries", unit="video"):
        output_file = os.path.join(output_directory, os.path.splitext(os.path.basename(video_file))[0] + '_shots.txt')
        keyframe_path = os.path.join(keyframe_directory, os.path.splitext(os.path.basename(video_file))[0])
        os.makedirs(keyframe_path, exist_ok=True)
        try:
            detect_shot_boundaries(video_file, output_file, keyframe_path)
        except Exception as e:
            print(f"Error processing {video_file}: {e}")

def get_video_files(input_directory):
    video_files = []
    for root, _, files in os.walk(input_directory):
        for file in files:
            if file.endswith(('.mp4', '.avi', '.mov', '.mkv')):
                match = re.search(r'\d+', file)
                if match:
                    number = int(match.group())
                    if 126 <= number <= 149:
                        video_files.append(os.path.join(root, file))
    return video_files

print("Starting shot boundary detection...")
video_files = get_video_files(input_dir)
print(f"Found {len(video_files)} video files to process.")
print("Video files:", video_files)
process_videos(video_files, output_dir, keyframe_dir)
print("Shot boundary detection completed successfully.")


In [None]:
import torch
from transnetv2 import TransNetV2

model = TransNetV2()
model_dir="TransNetV2/inference/transnetv2-weights/"
state_dict = torch.load("transnetv2-pytorch-weights.pth")
model.load_state_dict(state_dict)
model.eval().cuda()

with torch.no_grad():
    # shape: batch dim x video frames x frame height x frame width x RGB (not BGR) channels
    input_video = torch.zeros(1, 100, 27, 48, 3, dtype=torch.uint8)
    single_frame_pred, all_frame_pred = model(input_video.cuda())
    
    single_frame_pred = torch.sigmoid(single_frame_pred).cpu().numpy()
    all_frame_pred = torch.sigmoid(all_frame_pred["many_hot"]).cpu().numpy()