### Use MEt3R with MASt3R 

In [2]:
import torch
from met3r import MEt3R

IMG_SIZE = 256

# Initialize MEt3R
metric = MEt3R(
    img_size=IMG_SIZE, # Default, set to `None` to use the input resolution on the fly!
    use_norm=True, # Default 
    backbone="mast3r", # Default, select from ["mast3r", "dust3r", "raft"]
    feature_backbone="dino16", # Default, select from ["dino16", "dinov2", "maskclip", "vit", "clip", "resnet50"]
    feature_backbone_weights="mhamilton723/FeatUp", # Default
    upsampler='featup', # Default, select from ["featup", "nearest", "bilinear", "bicubic"]
    distance="cosine", # Default, ["cosine", "lpips", "rmse", "psnr", "mse", "ssim"]
    freeze=True, # Default
).cuda()

Using cache found in /data1/usman/.cache/torch/hub/mhamilton723_FeatUp_main
  from .autonotebook import tqdm as notebook_tqdm
Using cache found in /data1/usman/.cache/torch/hub/facebookresearch_dino_main




In [7]:
# Prepare inputs of shape (batch, views, channels, height, width): views must be 2
# RGB range must be in [-1, 1]
# Reduce the batch size in case of CUDA OOM
# inputs = torch.randn((10, 2, 3, IMG_SIZE, IMG_SIZE)).cuda()
inputs = torch.randn((10,1, 3, IMG_SIZE, IMG_SIZE)).cuda()
inputs = inputs.repeat(1, 2, 1, 1, 1)  # Repeat to create 2 views
print(f"Input shape: {inputs.shape}")
inputs = inputs.clip(-1, 1)

# Evaluate MEt3R
score, *_ = metric(
    images=inputs, 
    return_overlap_mask=False, # Default 
    return_score_map=False, # Default 
    return_projections=False # Default 
)

# Should be between 0.22 - 0.29
print(f"Score: ", score)
print(score.mean().item())

# Clear up GPU memory
torch.cuda.empty_cache()

device = "cuda" if torch.cuda.is_available() else "cpu"
used_memory = torch.cuda.memory_allocated(device) / (1024 ** 2)  # in MB
cached_memory = torch.cuda.memory_reserved(device) / (1024 ** 2)  # in MB

print(f"Used memory: {used_memory:.2f} MB")
print(f"Cached memory: {cached_memory:.2f} MB")

Input shape: torch.Size([10, 2, 3, 256, 256])
Score:  tensor([0.0007, 0.0008, 0.0007, 0.0055, 0.0007, 0.0008, 0.0008, 0.0007, 0.0037,
        0.0007], device='cuda:0')
0.0015098018338903785
Used memory: 2742.91 MB
Cached memory: 2872.00 MB


### Use MEt3R with DUSt3R

In [3]:
import torch
from met3r import MEt3R

IMG_SIZE = 256

# Initialize MEt3R
metric = MEt3R(
    img_size=IMG_SIZE,
    use_norm=True,
    backbone="dust3r",
    feature_backbone="dino16",
    feature_backbone_weights="mhamilton723/FeatUp",
    upsampler="featup",
    distance="cosine",
    freeze=True, 
).cuda()

Using cache found in /data1/usman/.cache/torch/hub/mhamilton723_FeatUp_main
Using cache found in /data1/usman/.cache/torch/hub/facebookresearch_dino_main


In [4]:
# Prepare inputs of shape (batch, views, channels, height, width): views must be 2
# RGB range must be in [-1, 1]
# Reduce the batch size in case of CUDA OOM
inputs = torch.randn((10, 2, 3, IMG_SIZE, IMG_SIZE)).cuda()
inputs = inputs.clip(-1, 1)

# Evaluate MEt3R
score, *_ = metric(
    images=inputs, 
    return_overlap_mask=False, # Default 
    return_score_map=False, # Default 
    return_projections=False # Default 
)

# Should be between 0.30 - 0.35
print(score.mean().item())

# Clear up GPU memory
torch.cuda.empty_cache()

device = "cuda" if torch.cuda.is_available() else "cpu"
used_memory = torch.cuda.memory_allocated(device) / (1024 ** 2)  # in MB
cached_memory = torch.cuda.memory_reserved(device) / (1024 ** 2)  # in MB

print(f"Used memory: {used_memory:.2f} MB")
print(f"Cached memory: {cached_memory:.2f} MB")

0.3464803993701935
Used memory: 2297.43 MB
Cached memory: 2626.00 MB


### Use MEt3R with RAFT (Optical Flow)

In [None]:
import torch
from met3r import MEt3R

IMG_SIZE = 256

# Initialize MEt3R
metric = MEt3R(
    img_size=IMG_SIZE,
    use_norm=True, 
    backbone="raft",
    feature_backbone="dino16",
    feature_backbone_weights="mhamilton723/FeatUp",
    upsampler="featup",
    distance="cosine",
    freeze=True, 
).cuda()

In [None]:
# Prepare inputs of shape (batch, views, channels, height, width): views must be 2
# RGB range must be in [-1, 1]
# Reduce the batch size in case of CUDA OOM
inputs = torch.randn((10, 2, 3, IMG_SIZE, IMG_SIZE)).cuda()
inputs = inputs.clip(-1, 1)

# Evaluate MEt3R
score, *_ = metric(
    images=inputs, 
    return_overlap_mask=False, # Default 
    return_score_map=False, # Default 
    return_projections=False # Default 
)

# Should be between 0.17 - 0.18
print(score.mean().item())

# Clear up GPU memory
torch.cuda.empty_cache()

### Use MEt3R with VGGT

In [1]:
import torch
from met3r import MEt3R

IMG_SIZE = 224

# Initialize MEt3R
metric = MEt3R(
    img_size=IMG_SIZE,
    use_norm=True,
    backbone="vggt",
    feature_backbone="dino16",
    feature_backbone_weights="mhamilton723/FeatUp",
    upsampler="featup",
    distance="cosine",
    freeze=True, 
).cuda()

# Prepare inputs of shape (batch, views, channels, height, width): views must be 2
# RGB range must be in [-1, 1]
# Reduce the batch size in case of CUDA OOM
inputs = torch.randn((10, 2, 3, IMG_SIZE, IMG_SIZE)).cuda()
inputs = inputs.clip(-1, 1)

# Evaluate MEt3R
score, *_ = metric(
    images=inputs, 
    return_overlap_mask=False, # Default 
    return_score_map=False, # Default 
    return_projections=False # Default 
)

# Should be between 0.17 - 0.18
print(score.mean().item())

# Clear up GPU memory
torch.cuda.empty_cache()

VGGT_LIB_PATH /data1/usman/vision/met3r/vggt/vggt
VGGT_REPO_PATH /data1/usman/vision/met3r/vggt
inside MET3R init


Using cache found in /data1/usman/.cache/torch/hub/mhamilton723_FeatUp_main
  from .autonotebook import tqdm as notebook_tqdm
Using cache found in /data1/usman/.cache/torch/hub/facebookresearch_dino_main


[DEBUG] Using backbone: vggt
[DEBUG] Requested backbone: vggt
VGGT model loaded


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


TypeError: object of type 'NoneType' has no len()

### Testing

In [5]:
import torch
from met3r import MEt3R

IMG_SIZE = 224

# Initialize MEt3R
metric = MEt3R(
    img_size=IMG_SIZE,
    use_norm=False,
    backbone="mast3r",
    feature_backbone="dino16",
    feature_backbone_weights="mhamilton723/FeatUp",
    upsampler="featup",
    distance="cosine",
    freeze=True, 
)
metric = metric.cuda()

# Prepare inputs of shape (batch, views, channels, height, width): views must be 2
# RGB range must be in [-1, 1]
# Reduce the batch size in case of CUDA OOM
inputs = torch.randn((10, 2, 3, IMG_SIZE, IMG_SIZE)).cuda()
inputs = inputs.clip(-1, 1)

# Evaluate MEt3R
score, *_ = metric(
    images=inputs, 
    return_overlap_mask=False, # Default 
    return_score_map=False, # Default 
    return_projections=False # Default 
)

# Should be between 0.17 - 0.18
print(score)
print(score.mean().item())
print(score.cpu().numpy().tolist())

# Clear up GPU memory
torch.cuda.empty_cache()

Using cache found in /data1/usman/.cache/torch/hub/mhamilton723_FeatUp_main
Using cache found in /data1/usman/.cache/torch/hub/facebookresearch_dino_main


tensor([0.2551, 0.2609, 0.4051, 0.3508, 0.1960, 0.2474, 0.3170, 0.2876, 0.2531,
        0.3401], device='cuda:0')
0.29131874442100525
[0.25509113073349, 0.2608541250228882, 0.4051186442375183, 0.35083553194999695, 0.19604437053203583, 0.24744966626167297, 0.31704163551330566, 0.28756314516067505, 0.25313231348991394, 0.3400569260120392]


In [1]:
import torch

path = "/data1/usman/vision/data/re10k_subset/test/000000.torch"
data = torch.load(path)


In [11]:
data[0].keys()

dict_keys(['url', 'timestamps', 'cameras', 'images', 'key'])

In [17]:
data[0]['url']

'https://www.youtube.com/watch?v=-aldZQifF2U'

In [20]:
data[0]['timestamps'].shape

torch.Size([143])

In [2]:
from PIL import Image

# Load image with alpha channel
img_path = "/data1/usman/vision/data/monocular_front/0a0bd11af4d8460f8a0f74ecf37023aa/front_frame0.png"
img = Image.open(img_path).convert("RGBA")

# Create white background image
white_bg = Image.new("RGB", img.size, (255, 255, 255))

# Paste original image onto white background using alpha channel as mask
white_bg.paste(img, mask=img.split()[3])  # Use alpha channel as mask

# Save result (overwrite or use new name)
white_bg.save("front_frame0_white.png")
img.save("front_frame0.png")


In [1]:
import cv2
import os

# Parameters
video_path = "/data1/usman/vision/data/test_video.mp4"
output_frames_dir = "./output_frames"
output_video_path = "./output_video.mp4"
start_time_sec = 30
num_frames_to_read = 101

# Create output directory if not exists
os.makedirs(output_frames_dir, exist_ok=True)

# Open the video file
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise IOError("Cannot open video file")

# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
start_frame = int(start_time_sec * fps)
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

# Get frame size
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# Read and save frames
for i in range(num_frames_to_read):
    ret, frame = cap.read()
    if not ret:
        print(f"Stopped early at frame {i} due to read failure.")
        break
    # Save PNG
    frame_filename = os.path.join(output_frames_dir, f"frame_{i:04d}.png")
    cv2.imwrite(frame_filename, frame)
    # Write to output video
    out.write(frame)

# Release everything
cap.release()
out.release()
print("Processing complete.")


Processing complete.


In [12]:
import json
import os
import matplotlib.pyplot as plt

# Parameters
json_path = "experiments.json"
output_dir = "./plots"
os.makedirs(output_dir, exist_ok=True)

# Load JSON data
with open(json_path, 'r') as f:
    data = json.load(f)

# Normalize feature_backbone names
name_map = {
    'dino': 'DINO',
    'dino16': 'DINO',
    'dinov2': 'DINOv2',
    'vit': 'ViT'
}

groups = {"DINO": [], "DINOv2": [], "ViT": []}

# Grouping
for exp in data:
    key = exp["feature_backbone"].lower()
    group_name = name_map.get(key)
    if group_name in groups:
        label = f'{exp["backbone"]}-{exp["experiment"]}'
        groups[group_name].append((label, exp["scores"]))

# Plotting
for feature_backbone, series in groups.items():
    plt.figure(figsize=(10, 6))

    for label, scores in series:
        plt.plot(range(len(scores)), scores, label=label)

    plt.title(f'{feature_backbone} MEt3R Scores Over Time')
    plt.xlabel('Time Step')
    plt.ylabel('MEt3R Score')
    plt.ylim(0, 0.6)  # <-- Fix y-axis upper limit to 0.3
    plt.legend()
    plt.grid(True)

    plot_path = os.path.join(output_dir, f'{feature_backbone}_met3r_scores.png')
    plt.savefig(plot_path)
    plt.close()

print(f"Saved plots to: {output_dir}")


Saved plots to: ./plots
