"""
Itération 1 — MVP
Objectif :
Entrée  : vidéo 2D (.mp4)
Sortie  : vidéo 3D stéréo Side-by-Side (SBS)

Pipeline :
Video → Frames → Depth → Stereo Shift → Video
"""


In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm


In [2]:
# Paths
VIDEO_PATH = "input.mp4"
FRAMES_DIR = "frames"
DEPTH_DIR = "depth"
STEREO_DIR = "stereo"
OUTPUT_VIDEO = "output_sbs.mp4"

# Stereo parameters
MAX_DISPARITY = 30   # pixels (sécurité visuelle)


In [3]:
os.makedirs(FRAMES_DIR, exist_ok=True)

cap = cv2.VideoCapture(VIDEO_PATH)
fps = cap.get(cv2.CAP_PROP_FPS)

idx = 0
while True:
    ret, frame = cap.read()
    if not ret:
        break
    cv2.imwrite(f"{FRAMES_DIR}/frame_{idx:05d}.png", frame)
    idx += 1

cap.release()
print(f"{idx} frames extraites à {fps:.2f} fps")


11838 frames extraites à 30.00 fps


In [10]:
import sys

repo_path = os.path.abspath("models/Depth-Anything-V2")
if repo_path not in sys.path:
    sys.path.append(repo_path)

print("Repo ajouté au PYTHONPATH")


Repo ajouté au PYTHONPATH


In [16]:
import torch
from depth_anything_v2.dpt import DepthAnythingV2

#DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'


model = DepthAnythingV2(
    encoder="vits",        # Small = ViT-S
    features=64,
    out_channels=[48, 96, 192, 384]
)

ckpt = torch.load("models/Depth-anything-V2/depth_anything_v2_vits.pth", map_location=DEVICE)
model.load_state_dict(ckpt)
model = model.to(DEVICE).eval()


In [None]:

os.makedirs(DEPTH_DIR, exist_ok=True)

frame_files = sorted(os.listdir(FRAMES_DIR))

for f in tqdm(frame_files):
    img = cv2.imread(os.path.join(FRAMES_DIR, f))
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    with torch.no_grad():
        depth = model.infer_image(img_rgb)

    # Normalisation 0–1
    depth = (depth - depth.min()) / (depth.max() - depth.min())

    depth_uint8 = (depth * 255).astype(np.uint8)
    cv2.imwrite(os.path.oin(DEPTH_DIR, f), depth_uint8)


 51%|█████     | 6034/11838 [6:32:27<6:28:18,  4.01s/it]    

In [None]:
os.makedirs(STEREO_DIR, exist_ok=True)

for f in tqdm(frame_files):
    img = cv2.imread(os.path.join(FRAMES_DIR, f))
    depth = cv2.imread(os.path.join(DEPTH_DIR, f), cv2.IMREAD_GRAYSCALE)

    h, w = depth.shape
    depth_norm = depth.astype(np.float32) / 255.0

    disparity = (1.0 - depth_norm) * MAX_DISPARITY

    left = np.zeros_like(img)
    right = np.zeros_like(img)

    for y in range(h):
        for x in range(w):
            d = int(disparity[y, x])
            if x - d >= 0:
                left[y, x - d] = img[y, x]
            if x + d < w:
                right[y, x + d] = img[y, x]

    sbs = np.hstack([left, right])
    cv2.imwrite(os.path.join(STEREO_DIR, f), sbs)


In [None]:
first = cv2.imread(os.path.join(STEREO_DIR, frame_files[0]))
h, w, _ = first.shape

fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(OUTPUT_VIDEO, fourcc, fps, (w, h))

for f in tqdm(frame_files):
    img = cv2.imread(os.path.join(STEREO_DIR, f))
    out.write(img)

out.release()

print("Vidéo SBS générée :", OUTPUT_VIDEO)
