# 🎬 Video Scene Splitter en Google Colab
Este notebook te permite segmentar automáticamente un video en escenas utilizando el modelo `VideoPrism` de Google.

In [None]:
# 🔧 Instala dependencias
!pip install torch transformers ffmpeg-python scikit-learn opencv-python -q


In [None]:
# 📥 Sube tu video o cárgalo desde Drive
from google.colab import files
uploaded = files.upload()  # Subir el archivo .mp4


In [None]:
# 📚 Importar librerías
import cv2
import numpy as np
import torch
from transformers import AutoProcessor, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
import ffmpeg
import os


In [None]:
# ⚙️ Funciones: extracción de clips y embeddings
def get_video_clips(video_path, seconds_per_clip=1.0):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frames_per_clip = int(fps * seconds_per_clip)
    clips, frames = [], []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame_rgb)
        if len(frames) == frames_per_clip:
            clips.append(frames.copy())
            frames.clear()
    cap.release()
    return clips

def extract_embeddings(clips, model, processor):
    embeddings = []
    for clip in clips:
        inputs = processor(videos=[clip], return_tensors="pt")
        with torch.no_grad():
            outputs = model(**inputs)
            emb = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
            embeddings.append(emb)
    return embeddings


In [None]:
# 📉 Detección de cambios de escena
def detect_scene_changes(embeddings, threshold=0.75):
    scene_changes = [0]
    for i in range(len(embeddings) - 1):
        sim = cosine_similarity([embeddings[i]], [embeddings[i + 1]])[0][0]
        if sim < threshold:
            scene_changes.append(i + 1)
    return scene_changes


In [None]:
# ✂️ Cortar video con FFmpeg
def split_video(input_path, scene_changes, seconds_per_clip, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    for i in range(len(scene_changes) - 1):
        start = scene_changes[i] * seconds_per_clip
        duration = (scene_changes[i + 1] - scene_changes[i]) * seconds_per_clip
        output_path = os.path.join(output_dir, f"scene_{i + 1}.mp4")
        (
            ffmpeg
            .input(input_path, ss=start, t=duration)
            .output(output_path, codec='copy')
            .run(overwrite_output=True)
        )


In [None]:
# 🚀 Ejecutar el flujo
video_path = list(uploaded.keys())[0]  # Solo toma el primer archivo subido
clip_duration = 1.0  # segundos por clip
threshold = 0.75     # umbral de similitud

print("[INFO] Cargando modelo...")
processor = AutoProcessor.from_pretrained("google/videoprism")
model = AutoModel.from_pretrained("google/videoprism")

print("[INFO] Extrayendo clips...")
clips = get_video_clips(video_path, clip_duration)

print("[INFO] Generando embeddings...")
embeddings = extract_embeddings(clips, model, processor)

print("[INFO] Detectando cortes de escena...")
scene_changes = detect_scene_changes(embeddings, threshold)
print("Cortes detectados:", scene_changes)

print("[INFO] Cortando video...")
split_video(video_path, scene_changes, clip_duration, "escenas")

print("[LISTO] Escenas generadas en la carpeta 'escenas'")
