In [1]:
import sys
import os
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'vggt'))

In [2]:
import torch
from vggt.models.vggt import VGGT
from vggt.utils.load_fn import load_and_preprocess_images

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
device = "cuda:4" if torch.cuda.is_available() else "cpu"
# bfloat16 is supported on Ampere GPUs (Compute Capability 8.0+)
dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16
print(f"Using device: {device}, dtype: {dtype}")

Using device: cuda:4, dtype: torch.bfloat16


In [4]:
# Initialize the model and load the pretrained weights.
# This will automatically download the model weights the first time it's run, which may take a while.
model = VGGT.from_pretrained("../checkpoint/models--facebook--VGGT-1B/snapshots/860abec7937da0a4c03c41d3c269c366e82abdf9").to(device)

Loading weights from local directory


In [5]:
# 列出 vggt/examples/kitchen/images 的所有png图片，并保存其路径到 image_names 列表中
image_names = [
    os.path.join("../vggt/examples/kitchen/images", f)
    for f in os.listdir("../vggt/examples/kitchen/images")
    if f.endswith(".png")
]
print(image_names[:5])

['../vggt/examples/kitchen/images/02.png', '../vggt/examples/kitchen/images/13.png', '../vggt/examples/kitchen/images/18.png', '../vggt/examples/kitchen/images/01.png', '../vggt/examples/kitchen/images/00.png']


In [6]:
images = load_and_preprocess_images(image_names).to(device)
print(images.shape)

torch.Size([25, 3, 350, 518])


In [9]:
with torch.no_grad():
    with torch.cuda.amp.autocast(dtype=dtype):
        # Predict attributes including cameras, depth maps, and point maps.
        predictions = model(images)

In [17]:
print(predictions.keys())

dict_keys(['pose_enc', 'pose_enc_list', 'depth', 'depth_conf', 'world_points', 'world_points_conf'])


In [21]:
# camera parameters
print(predictions['pose_enc'].shape)
print(len(predictions['pose_enc_list']), predictions['pose_enc_list'][0].shape)

torch.Size([1, 25, 9])
4 torch.Size([1, 25, 9])


In [None]:
# depth map, conf 为 aleatoric uncertainty
print(predictions['depth'].shape, f"[{predictions['depth'].min().item():.2f}, {predictions['depth'].max().item():.2f}]")print(predictions['depth_conf'].shape, f"{predictions['depth_conf'].min().item():.2f}, {predictions['depth_conf'].max().item():.2f}]")

torch.Size([1, 25, 350, 518, 1]) [0.31, 4.11]
torch.Size([1, 25, 350, 518]) 1.00, 33.47]


In [30]:
# point map
print(predictions['world_points'].shape)
print(predictions['world_points_conf'].shape, f"[{predictions['world_points_conf'].min().item():.2f}, {predictions['world_points_conf'].max().item():.2f}]")

torch.Size([1, 25, 350, 518, 3])
torch.Size([1, 25, 350, 518]) [1.00, 32.37]
