In [8]:
import open_clip
import torch
from PIL import Image
from matplotlib import pyplot as plt

In [15]:
models = {'B-8': {'model_name':'Latent-ViT-B-8-512',
                  'pretrained':'/dlabdata1/wendler/models/latent-clip-b-8.pt'},
          'B-4-plus':{'model_name':'Latent-ViT-B-4-512-plus',
                      'pretrained':'/dlabdata1/wendler/models/latent-clip-b-4-plus.pt'}}
size = 'B-8'
model_name = models[size]['model_name']
pretrained = models[size]['pretrained']
model, _, preprocess = open_clip.create_model_and_transforms(model_name, pretrained=pretrained)
tokenizer = open_clip.get_tokenizer(model_name)

image = preprocess(Image.open("../CLIP.jpg")).unsqueeze(0)
labels = ["a diagram", "a dog", "a cat"]
text = tokenizer(labels)

with torch.no_grad(), torch.cuda.amp.autocast():
    # you can either feed an image like here, or a SDXL latent of shape (batch_size, 64, 64, 4)
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)

    text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)

print("Label probs:", text_probs)
print("Pred:", labels[text_probs.argmax()])  

Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with: 
```
pip install accelerate
```
.


Label probs: tensor([[1.0072e-04, 9.9983e-01, 6.6763e-05]])
Pred: a dog
