# 🎬 PrismCuts en Google Colab con VideoMAE
Este notebook segmenta un video en escenas con base en la similitud entre embeddings visuales usando `videomae-base` desde Hugging Face.

In [None]:
# 🔧 Instala dependencias
!pip install transformers ffmpeg-python scikit-learn opencv-python -q


In [None]:
# 📥 Sube tu video .mp4
from google.colab import files
uploaded = files.upload()
video_path = list(uploaded.keys())[0]


In [None]:
# 📚 Importar librerías
import cv2
import numpy as np
import torch
import ffmpeg
from sklearn.metrics.pairwise import cosine_similarity
from transformers import VideoMAEFeatureExtractor, VideoMAEModel
import os


In [None]:
# ⚙️ Funciones de extracción y segmentación
def get_video_clips(video_path, seconds_per_clip=1.0):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frames_per_clip = int(fps * seconds_per_clip)
    clips = []
    frames = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame_rgb)

        if len(frames) == frames_per_clip:
            clips.append(frames.copy())
            frames.clear()

    cap.release()
    return clips

def extract_embeddings(clips, model, processor):
    embeddings = []
    for clip in clips:
        clip_np = np.stack(clip, axis=0)  # [T, H, W, C]
        inputs = processor(videos=clip_np, return_tensors="pt")
        with torch.no_grad():
            outputs = model(**inputs)
            emb = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
            embeddings.append(emb)
    return embeddings

def detect_scene_changes(embeddings, threshold=0.75):
    scene_changes = [0]
    for i in range(len(embeddings) - 1):
        sim = cosine_similarity([embeddings[i]], [embeddings[i + 1]])[0][0]
        if sim < threshold:
            scene_changes.append(i + 1)
    return scene_changes

def split_video(input_path, scene_changes, seconds_per_clip, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    for i in range(len(scene_changes) - 1):
        start = scene_changes[i] * seconds_per_clip
        duration = (scene_changes[i + 1] - scene_changes[i]) * seconds_per_clip
        output_path = os.path.join(output_dir, f"scene_{i + 1}.mp4")
        (
            ffmpeg
            .input(input_path, ss=start, t=duration)
            .output(output_path, codec='copy')
            .run(overwrite_output=True)
        )


In [None]:
# 🚀 Ejecutar flujo completo
clip_duration = 1.0
threshold = 0.75

print("[INFO] Cargando modelo...")
processor = VideoMAEFeatureExtractor.from_pretrained("MCG-NJU/videomae-base")
model = VideoMAEModel.from_pretrained("MCG-NJU/videomae-base")

print("[INFO] Extrayendo clips...")
clips = get_video_clips(video_path, clip_duration)

print("[INFO] Generando embeddings...")
embeddings = extract_embeddings(clips, model, processor)

print("[INFO] Detectando cortes de escena...")
scene_changes = detect_scene_changes(embeddings, threshold)
print("Cortes detectados:", scene_changes)

print("[INFO] Cortando video...")
split_video(video_path, scene_changes, clip_duration, "escenas")

print("[LISTO] Escenas generadas en la carpeta 'escenas'")
