# Installing open-clip library

In [1]:
%%capture
!pip install open_clip_torch

# Cloning GitHub Repository

In [2]:
%%capture
!git clone https://github.com/kk-digital/kcg-ml.git

# OpenClip Examples

## Imports

In [3]:
import torch
from PIL import Image
import open_clip

## Listing All Pre-Trained Models.

In [4]:
open_clip.list_pretrained()

[('RN50', 'openai'),
 ('RN50', 'yfcc15m'),
 ('RN50', 'cc12m'),
 ('RN50-quickgelu', 'openai'),
 ('RN50-quickgelu', 'yfcc15m'),
 ('RN50-quickgelu', 'cc12m'),
 ('RN101', 'openai'),
 ('RN101', 'yfcc15m'),
 ('RN101-quickgelu', 'openai'),
 ('RN101-quickgelu', 'yfcc15m'),
 ('RN50x4', 'openai'),
 ('RN50x16', 'openai'),
 ('RN50x64', 'openai'),
 ('ViT-B-32', 'openai'),
 ('ViT-B-32', 'laion400m_e31'),
 ('ViT-B-32', 'laion400m_e32'),
 ('ViT-B-32', 'laion2b_e16'),
 ('ViT-B-32', 'laion2b_s34b_b79k'),
 ('ViT-B-32-quickgelu', 'openai'),
 ('ViT-B-32-quickgelu', 'laion400m_e31'),
 ('ViT-B-32-quickgelu', 'laion400m_e32'),
 ('ViT-B-16', 'openai'),
 ('ViT-B-16', 'laion400m_e31'),
 ('ViT-B-16', 'laion400m_e32'),
 ('ViT-B-16', 'laion2b_s34b_b88k'),
 ('ViT-B-16-plus-240', 'laion400m_e31'),
 ('ViT-B-16-plus-240', 'laion400m_e32'),
 ('ViT-L-14', 'openai'),
 ('ViT-L-14', 'laion400m_e31'),
 ('ViT-L-14', 'laion400m_e32'),
 ('ViT-L-14', 'laion2b_s32b_b82k'),
 ('ViT-L-14-336', 'openai'),
 ('ViT-H-14', 'laion2b_s32b_

## Text-Image Matching Example.
Showing the probaility distribution of a list of texts for single image.

In [5]:
MODEL_NAME = 'ViT-L-14'
PRETRAINED = 'openai'

In [6]:
model, _, preprocess = open_clip.create_model_and_transforms(model_name=MODEL_NAME, pretrained=PRETRAINED)
tokenizer = open_clip.get_tokenizer(MODEL_NAME)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

100%|███████████████████████████████████████| 933M/933M [00:13<00:00, 68.4MiB/s]


In [7]:
image = preprocess(Image.open('./kcg-ml/datasets/test-images/example3.jpg')).unsqueeze(0)

text = tokenizer(["pixel art", "painting", "digital art"]) # List of texts which will be compared.

with torch.no_grad():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)

    text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1)

print("Label probs:", text_probs)

Label probs: tensor([[9.9429e-01, 3.5808e-04, 5.3562e-03]])


## Getting CLIP Image Embeddings for Single Image

In [8]:
with torch.no_grad():
    image = preprocess(Image.open('./kcg-ml/datasets/test-images/example3.jpg')).unsqueeze(0).to(device)
    emb = model.encode_image(image).cpu().detach().numpy()

print(f"[INFO] CLIP embedding size: {emb.shape}")

[INFO] CLIP embedding size: (1, 768)


## Checking the Similarity Between Two Image Using CLIP

In [9]:
image1 = preprocess(Image.open('./kcg-ml/datasets/test-images/example1.jpg')).unsqueeze(0).to(device)
image2 = preprocess(Image.open('./kcg-ml/datasets/test-images/example2.jpg')).unsqueeze(0).to(device)

image_features = model.encode_image(image1)
image_2_features = model.encode_image(image2)

image_features /= image_features.norm(dim=-1, keepdim=True)
image_2_features /= image_2_features.norm(dim=-1, keepdim=True)
similarity = image_2_features.detach() @ image_features.detach().T
print(f'Similarit: {similarity.cpu().detach().numpy()[0][0]:.4f}')

Similarit: 0.5437


## ClipModel Examples
▶ ClipModel : Module built over OpenClip function, check: https://github.com/kk-digital/kcg-ml/blob/main/examples/ClipTools.py

In [10]:
!pip install patool

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting patool
  Downloading patool-1.12-py2.py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.5/77.5 KB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: patool
Successfully installed patool-1.12


In [14]:
import sys
sys.path.insert(0, './kcg-ml/clip_linear_probe_pipeline/')
sys.path.insert(0, './kcg-ml/')

from examples.ClipTools import ClipModel

## Creating an Instance of ClipModel Class

In [15]:
clip_model_instance = ClipModel(clip_model=MODEL_NAME, pretrained=PRETRAINED)

## Downloading Model Method Example.

In [16]:
clip_model_instance.download_model(MODEL_NAME, PRETRAINED)

[INFO] Model downloaded succesfully


## Getting Image Embedding Example.

In [None]:
clip_model_instance.encode_image_from_image_file('./kcg-ml/datasets/test-images/example1.jpg')

In [None]:
def image_to_bytes(image_path):
    with open(image_path, 'rb') as image_file:
        bytes_array = bytearray(image_file.read())
    return bytes_array

clip_model_instance.encode_image_from_image_data(image_to_bytes('./kcg-ml/datasets/test-images/example1.jpg'))

In [None]:
clip_model_instance.encode_image_list(['./kcg-ml/datasets/test-images/example1.jpg', './kcg-ml/datasets/test-images/example2.jpg', './kcg-ml/datasets/test-images/example3.jpg'])