In [1]:

from vggt.models.vggt import VGGT

In [7]:

from vton3d.utils import test

In [8]:
print(test.e())

None


In [9]:
VGGT

vggt.models.vggt.VGGT

In [None]:
import vton3d

print(dir(vton3d))


['__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__']


In [2]:
import os
import sys
import cv2
import torch
import numpy as np
from PIL import Image

# Pfad zu deinem Sapiens-Pytorch-Inference Ordner anpassen
sapiens_dir = "../Sapiens-Pytorch-Inference"

if not os.path.isdir(sapiens_dir):
    raise FileNotFoundError(f"Sapiens-Ordner nicht gefunden: {sapiens_dir}")

# Repo in sys.path eintragen, damit der Import klappt
sys.path.insert(0, sapiens_dir)

from sapiens_inference import (
    SapiensDepth,
    SapiensDepthType,
    SapiensSegmentation,
    SapiensSegmentationType,
    SapiensConfig,
)


In [3]:
# Konfiguration
config = SapiensConfig()
config.depth_type = SapiensDepthType.DEPTH_1B
config.segmentation_type = SapiensSegmentationType.SEGMENTATION_1B

if torch.cuda.is_available():
    config.device = "cuda"
else:
    print("Keine GPU gefunden, verwende CPU.")
    config.device = "cpu"

# Modelle laden (damit die Modelle im richtigen Ordner landen)
orig_cwd = os.getcwd()
try:
    os.makedirs(os.path.join(sapiens_dir, "models"), exist_ok=True)
    os.chdir(sapiens_dir)
    depth_predictor = SapiensDepth(config.depth_type, config.device, config.dtype)
    seg_predictor = SapiensSegmentation(config.segmentation_type, config.device, config.dtype)
finally:
    os.chdir(orig_cwd)

print("Modelle geladen auf Gerät:", config.device)


Modelle geladen auf Gerät: cuda


In [4]:
import matplotlib.pyplot as plt

def run_sapiens_on_image(img_path):
    # Bild laden (RGB) und nach BGR konvertieren wie in deinem Skript
    pil = Image.open(img_path).convert("RGB")
    rgb_np = np.array(pil)
    bgr_np = cv2.cvtColor(rgb_np, cv2.COLOR_RGB2BGR)
    H, W = bgr_np.shape[:2]

    # Segmentation
    seg_logits = seg_predictor(bgr_np)
    if isinstance(seg_logits, torch.Tensor):
        seg_map = seg_logits.squeeze().cpu().numpy()
    else:
        seg_map = seg_logits

    if seg_map.shape != (H, W):
        seg_map = cv2.resize(seg_map, (W, H), interpolation=cv2.INTER_LINEAR)

    human_mask = (seg_map > 0.5).astype(np.uint8)

    # Depth
    depth_raw = depth_predictor(bgr_np)
    if isinstance(depth_raw, torch.Tensor):
        depth_np = depth_raw.squeeze().cpu().numpy()
    else:
        depth_np = depth_raw

    d_min, d_max = np.nanmin(depth_np), np.nanmax(depth_np)
    depth_norm = (depth_np - d_min) / (d_max - d_min + 1e-8)

    # Hintergrund auf "weit weg" setzen, wie in deinem Skript
    modified_depth = np.where(human_mask == 1, depth_norm, 1.0)
    modified_8u = (modified_depth * 255).astype(np.uint8)
    depth_color_bgr = cv2.applyColorMap(modified_8u, cv2.COLORMAP_TURBO)
    depth_color_rgb = cv2.cvtColor(depth_color_bgr, cv2.COLOR_BGR2RGB)

    return rgb_np, human_mask, depth_color_rgb


In [None]:
test_image = "../data/flo_16/real/images/florian_0000.jpg"

rgb, mask, depth_vis = run_sapiens_on_image(test_image)

plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
plt.title("Original")
plt.imshow(rgb)
plt.axis("off")

plt.subplot(1, 3, 2)
plt.title("Human-Maske")
plt.imshow(mask, cmap="gray")
plt.axis("off")

plt.subplot(1, 3, 3)
plt.title("Depth (modifiziert)")
plt.imshow(depth_vis)
plt.axis("off")

plt.show()
