# 🎬 PrismCuts en Google Colab con VideoMAE
Este notebook segmenta un video en escenas con base en la similitud entre embeddings visuales usando `videomae-base` desde Hugging Face.

In [None]:
# 🔧 Instalación de dependencias
!pip install transformers accelerate decord ffmpeg-python

In [None]:
# 📦 Importar librerías necesarias
import os
import torch
import ffmpeg
import numpy as np
from transformers import VideoMAEFeatureExtractor, VideoMAEModel
from decord import VideoReader, cpu

In [None]:
# 🎞️ Funciones auxiliares
def get_video_clips(video_path, clip_duration):
    vr = VideoReader(video_path, ctx=cpu(0))
    num_frames = len(vr)
    clips = []
    for i in range(0, num_frames, int(clip_duration * vr.get_avg_fps())):
        clip = vr[i:i+int(clip_duration * vr.get_avg_fps())]
        if len(clip) > 0:
            clips.append(clip)
    return clips

def extract_embeddings(clips, model, processor):
    embeddings = []
    for clip in clips:
        inputs = processor(list(clip), return_tensors="pt")
        with torch.no_grad():
            output = model(**inputs)
        embeddings.append(output.last_hidden_state.mean(dim=1).squeeze().numpy())
    return embeddings

def detect_scene_changes(embeddings, threshold):
    changes = [0]
    for i in range(1, len(embeddings)):
        sim = np.dot(embeddings[i-1], embeddings[i]) / (
            np.linalg.norm(embeddings[i-1]) * np.linalg.norm(embeddings[i])
        )
        if sim < threshold:
            changes.append(i)
    return changes

def split_video(video_path, scene_changes, clip_duration, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    for i, scene_idx in enumerate(scene_changes):
        start = scene_idx * clip_duration
        output_path = os.path.join(output_dir, f"scene_{i}.mp4")
        (
            ffmpeg
            .input(video_path, ss=start, t=clip_duration)
            .output(output_path, codec='copy')
            .run(overwrite_output=True)
        )

In [None]:
# 🚀 Ejecutar flujo completo
clip_duration = 1.0
threshold = 0.75
video_path = "video.mp4"  # Cambiar por tu video

print("[INFO] Cargando modelo...")
processor = VideoMAEFeatureExtractor.from_pretrained("MCG-NJU/videomae-base")
model = VideoMAEModel.from_pretrained("MCG-NJU/videomae-base")

print("[INFO] Extrayendo clips...")
clips = get_video_clips(video_path, clip_duration)

print("[INFO] Generando embeddings...")
embeddings = extract_embeddings(clips, model, processor)

print("[INFO] Detectando cortes de escena...")
scene_changes = detect_scene_changes(embeddings, threshold)
print("Cortes detectados:", scene_changes)

print("[INFO] Cortando video...")
split_video(video_path, scene_changes, clip_duration, "escenas")
print("[LISTO] Escenas generadas en la carpeta 'escenas'")