In [None]:
import torch
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Load DINOv2 model from Torch Hub
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14')
model.eval()

# Function to resize and center crop image to multiples of 14
def resize_and_crop_to_multiple_of_14(image, max_size=512, patch_size=14):
    # Resize the image to max_size while preserving aspect ratio
    width, height = image.size
    if width > height:
        new_width = max_size
        new_height = int((max_size / width) * height)
    else:
        new_height = max_size
        new_width = int((max_size / height) * width)
    
    image = image.resize((new_width, new_height))

    # Calculate the target dimensions that are multiples of patch_size
    new_width = (new_width // patch_size) * patch_size
    new_height = (new_height // patch_size) * patch_size

    # Center-crop the image to these dimensions
    left = (image.width - new_width) // 2
    top = (image.height - new_height) // 2
    right = (image.width + new_width) // 2
    bottom = (image.height + new_height) // 2

    return image.crop((left, top, right, bottom)), new_width, new_height

# Image preprocessing (preserving aspect ratio with max size 512 and cropping to patch size multiple)
preprocess = transforms.Compose([
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load an example image
image_path = "/path/to/unseen_book/IMG_9837.jpg"  # Change this to your image path
original_img = Image.open(image_path)

# Resize and crop image to ensure dimensions are multiples of 14
processed_img, new_width, new_height = resize_and_crop_to_multiple_of_14(original_img)

# Print the new dimensions after cropping/rescaling
print(f"Image size after cropping/rescaling: {new_width}x{new_height}")

# Preprocess the image for DINOv2
img = preprocess(processed_img).unsqueeze(0)  # Add batch dimension

# Forward pass through the model to get patch tokens
with torch.no_grad():
    features = model.forward_features(img)['x_norm_patchtokens']  # Extract patch tokens
    features_np = features.squeeze().cpu().numpy()  # Remove batch dimension (now num_patches x 1024)

# Apply PCA to reduce each patch's 1024 features to 3D (for RGB visualization)
pca = PCA(n_components=3)
pca_result = pca.fit_transform(features_np)  # Shape: (num_patches * num_patches, 3)

# Normalize the PCA components to range [0, 1] for RGB
pca_result_normalized = (pca_result - pca_result.min()) / (pca_result.max() - pca_result.min())

# Compute the number of patches (height and width divided by patch size 14)
num_patches_w = new_width // 14
num_patches_h = new_height // 14

# Reshape PCA result into a grid for visualization
pca_grid = pca_result_normalized.reshape(num_patches_h, num_patches_w, 3)

# Convert the PCA grid to an image format suitable for Plotly
pca_grid_img = (pca_grid * 255).astype(np.uint8)

# Create a figure with two subplots for the original image and the PCA visualization
fig = make_subplots(rows=1, cols=2, subplot_titles=("Original Image", "PCA of Patch Features"))

# Add the original image in the first subplot
fig.add_trace(go.Image(z=np.array(processed_img)), row=1, col=1)

# Add the PCA visualization in the second subplot
fig.add_trace(go.Image(z=pca_grid_img), row=1, col=2)

# Update layout
fig.update_layout(
    title="Original Image and PCA of DINOv2 Patch Features",
    margin=dict(l=20, r=20, t=40, b=20),
    height=600,
    width=1000
)

fig.show()


In [None]:
img.min()

In [None]:
import rootutils
rootutils.setup_root("/path/to/fast3r/fast3r", indicator=".project-root", pythonpath=True)


import torch
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.decomposition import PCA
from fast3r.dust3r.datasets.co3d_multiview import Co3d_Multiview
from fast3r.dust3r.datasets.habitat_multiview import Habitat_Multiview
from fast3r.dust3r.datasets.base.base_stereo_view_dataset import view_name

# Function to unnormalize the image for visualization
def unnormalize_image(tensor_img):
    # Unnormalize using the ImageNet statistics
    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
    return tensor_img * std + mean

# Load DINOv2 model from Torch Hub
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14')
model.eval()

# Initialize the Co3d_Multiview dataset
# dataset = Co3d_Multiview(
#     split="train", num_views=10, window_degree_range=360, num_samples_per_window=100, mask_bg='rand',
#     ROOT="/path/to/dust3r_data/co3d_50_seqs_per_category_subset_processed", resolution=[(910, 910)], aug_crop=16,
# )

dataset = Habitat_Multiview(1_000_000, split='train', num_views=4, ROOT='/path/to/dust3r_data/habitat_processed', aug_crop=16, resolution=[(448, 336)])

# Iterate through dataset (using one sample for this example)
for idx in np.random.permutation(len(dataset)):
    views = dataset[idx]
    assert len(views) == dataset.num_views
    print([view_name(view) for view in views])

    # Extract the image for a specific view index (already a torch tensor)
    view_idx = 0  # Choose a view to test
    img_tensor = views[view_idx]["img"]

    # Forward pass through the model to get patch tokens (no preprocessing needed)
    with torch.no_grad():
        features = model.forward_features(img_tensor.unsqueeze(0))['x_norm_patchtokens']  # Add batch dimension
        features_np = features.squeeze().cpu().numpy()  # Remove batch dimension (now num_patches_h * num_patches_w x 1024)

    # Apply PCA to reduce each patch's 1024 features to 3D (for RGB visualization)
    pca = PCA(n_components=3)
    pca_result = pca.fit_transform(features_np)  # Shape: (num_patches_h * num_patches_w, 3)

    # Normalize the PCA components to range [0, 1] for RGB
    pca_result_normalized = (pca_result - pca_result.min()) / (pca_result.max() - pca_result.min())

    # Compute the number of patches for both height and width
    patch_size = 14  # DINOv2 uses 14x14 patches
    num_patches_h = img_tensor.shape[1] // patch_size
    num_patches_w = img_tensor.shape[2] // patch_size

    # Reshape PCA result into a grid for visualization
    pca_grid = pca_result_normalized.reshape(num_patches_h, num_patches_w, 3)

    # Convert the PCA grid to an image format suitable for Plotly
    pca_grid_img = (pca_grid * 255).astype(np.uint8)

    # Unnormalize the original image for visualization
    img_unnormalized = unnormalize_image(img_tensor).cpu().numpy()
    img_unnormalized = np.transpose(img_unnormalized, (1, 2, 0))  # Convert to HxWxC for display

    # Create a figure with two subplots for the original image and the PCA visualization
    fig = make_subplots(rows=1, cols=2, subplot_titles=("Original Image", "PCA of Patch Features"))

    # Add the original image in the first subplot
    fig.add_trace(go.Image(z=(img_unnormalized * 255).astype(np.uint8)), row=1, col=1)

    # Add the PCA visualization in the second subplot
    fig.add_trace(go.Image(z=pca_grid_img), row=1, col=2)

    # Update layout
    fig.update_layout(
        title="Original Image and PCA of DINOv2 Patch Features",
        margin=dict(l=20, r=20, t=40, b=20),
        height=600,
        width=1000
    )

    # Show the figure
    fig.show()
    break  # Break after one iteration to test the output


In [None]:
features.shape

In [None]:
views[view_idx]["img"].min()

In [None]:
img.shape

In [None]:
model.forward_features(img)['x_norm_patchtokens'].shape