In [39]:
# 使用transformers库中的DINOv2模型
from transformers import AutoImageProcessor, AutoModel
from PIL import Image

# load model and processor
checkpoint = "/media/cartolab3/DataDisk/wuqilong_file/Projects/RerenkVPR/pretrained_model/dinov2_large"
processor = AutoImageProcessor.from_pretrained(checkpoint)
model = AutoModel.from_pretrained(checkpoint)
print(model)

Dinov2Model(
  (embeddings): Dinov2Embeddings(
    (patch_embeddings): Dinov2PatchEmbeddings(
      (projection): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
    )
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): Dinov2Encoder(
    (layer): ModuleList(
      (0-23): 24 x Dinov2Layer(
        (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (attention): Dinov2Attention(
          (attention): Dinov2SelfAttention(
            (query): Linear(in_features=1024, out_features=1024, bias=True)
            (key): Linear(in_features=1024, out_features=1024, bias=True)
            (value): Linear(in_features=1024, out_features=1024, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (output): Dinov2SelfOutput(
            (dense): Linear(in_features=1024, out_features=1024, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (layer_scale1): Dinov2LayerScale()
        (drop_

In [42]:
from torchvision import transforms
import cv2
from einops import rearrange
from dino_visual.visualization_tools import get_pca_map
import numpy as np
import os

# input_image_path
input_image_path = "imgs/00011.jpg"
image_size = (322,322)

def visualize_dinov2(input_image_path, image_size=(322,322)):
    # 定义加载图像并resize，输出tensor的transform
    ori_image = Image.open(input_image_path)
    transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor()
    ])

    image=transform(ori_image).unsqueeze(0)
    print(image.shape)

    # 输入模型
    outputs = model(image)
    last_hidden_states = outputs.last_hidden_state

    # 获取特征图
    features = outputs.last_hidden_state
    features = rearrange(features[:,1:,:], 'b (h w) c -> b h w c', h= int(image_size[0]/14), w=int(image_size[1]/14))
    features = features[0].float()

    # 设置重采样图大小
    color = get_pca_map(features.detach(), image_size, interpolation='bilinear')
    color=color*255

    # 将color代表的图像和原始图像拼接，形成一行两列的图像
    image = np.hstack((ori_image.resize(image_size), color))

    # 定义输出文件名称
    output_file = f'{os.path.dirname(input_image_path)}/{os.path.basename(input_image_path).split(".")[0]}_dinov2.jpg'

    cv2.imwrite(output_file, image)

In [43]:
import glob

# 可视化多张图像
img_path="./imgs/output_imgs/pitts"
image_list = glob.glob(os.path.join(img_path, "*.jpg"))
print(len(image_list))
for image_path in image_list:
    visualize_dinov2(image_path)

49
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size([1, 3, 322, 322])
torch.Size(