DINOv3
====

 * Paper: https://arxiv.org/abs/2508.10104

![DINOv3](../assets/dinov3_example.jpg)

* Installation

```bash
git clone https://github.com/facebookresearch/dinov3.git dinov3_repo
cd dinov3_repo

conda env create -f conda.yaml
conda activate dinov3
```

 * Request to download the weights

![DINOv3 Request](dinov3_request.jpg)

In [4]:
import torch
import torchvision.transforms as transforms
from PIL import Image

# Set path where the repository was cloned
REPO_DIR = "dinov3_repo"

# Load a ConvNeXt‑Base backbone pre‑trained on web images (LVD‑1689M)
model = torch.hub.load(
    REPO_DIR,
    'dinov3_convnext_base',
    source='local',
    weights='models/dinov3_convnext_base_pretrain_lvd1689m-801f2ba9.pth'
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.eval().to(device);

# define the recommended image transform for web‑pretrained models
def make_transform(resize_size=224):
    to_tensor = transforms.ToTensor()
    resize = transforms.Resize((resize_size, resize_size), antialias=True)
    normalize = transforms.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225),
    )
    return transforms.Compose([to_tensor, resize, normalize])


In [7]:
image_path = "../samples/plants.jpg"
img = Image.open(image_path).convert("RGB")

transform = make_transform()
inputs = transform(img).unsqueeze(0)  # add batch dimension

# obtain features
with torch.inference_mode():
    outputs = model(inputs.to(device))

print(outputs.shape)

torch.Size([1, 1024])
