# Demo Notebook - Modular Video Summarization

This demo shows how to run the modular scripts on a single video.

In [None]:
# Install required packages (run once)
!pip install opencv-python-headless librosa torch torchvision pytorch-grad-cam ultralytics facenet-pytorch scenedetect scikit-learn tqdm seaborn datasets --quiet

In [None]:
from pathlib import Path
import sys
sys.path.append('/mnt/data/video_sum_project')
from preprocess import sample_frames, extract_audio_rms
from embed import MobileNetEmbed, batch_embed, simclr_refine
from select import method_A_kmeans, method_B_visual_audio, method_C_mmr, frames_to_keyshots
from explain import yolo_objects_on_frames, textual_rationale_for_shot
from eval import fscore_vs_gt

VIDEO = '/kaggle/input/demo-video/demo.mp4'  # change to your uploaded path
OUTDIR = '/mnt/data/video_sum_project/outputs'
Path(OUTDIR).mkdir(parents=True, exist_ok=True)

# Sample
frames, ts, idxs, fps, duration = sample_frames(VIDEO, stride_sec=1.0, resize=224)
print('Frames', len(frames), 'duration', duration)

# Embeds
model = MobileNetEmbed(device='cuda' if torch.cuda.is_available() else 'cpu').to('cuda' if torch.cuda.is_available() else 'cpu').eval()
embs = batch_embed(model, frames, batch=64, device='cuda' if torch.cuda.is_available() else 'cpu')

# (optional) SimCLR-lite refine
embs_refined = simclr_refine(embs, epochs=2, batch=64, device='cuda' if torch.cuda.is_available() else 'cpu')

# Audio
audio_scores = extract_audio_rms(VIDEO, ts)

# Methods
selA, _ = method_A_kmeans(embs_refined, k=12)
selB, _ = method_B_visual_audio(embs_refined, audio_scores, k=12)
selC = method_C_mmr(embs_refined, scores=(audio_scores+embs_refined.mean(axis=1)), k=12, lengths=[2.0]*len(embs_refined), budget_sec=20.0)

# Keyshots
shotsA = frames_to_keyshots(selA, ts, stride_sec=1.0, budget_sec=20.0)
shotsB = frames_to_keyshots(selB, ts, stride_sec=1.0, budget_sec=20.0)
shotsC = frames_to_keyshots(selC, ts, stride_sec=1.0, budget_sec=20.0)

print('Shots A', shotsA)
print('Shots B', shotsB)
print('Shots C', shotsC)

# Explain
labelsA = yolo_objects_on_frames([frames[i] for i in selA])
for i,lab in zip(selA, labelsA):
    print(f'Frame {i} rationale:', textual_rationale_for_shot(lab, audio_peak=False))

# Save a few keyframes
import cv2
for j,i in enumerate(selA[:6]):
    p = f'{OUTDIR}/A_key_{j}.jpg'
    cv2.imwrite(p, cv2.cvtColor(frames[i], cv2.COLOR_RGB2BGR))
print('Saved keyframes to', OUTDIR)
