In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## List all of the images in the train set

In [None]:
train_images_path = "/kaggle/input/cityscapes-processed/data/processed/train/image"

import glob
train_images_filenames = (glob.glob(f"{train_images_path}/*.jpg")) 
print(len(train_images_filenames))

## Load images

In [None]:
from PIL import Image

def load_image(file_name):
    im = Image.open(file_name).convert("RGB")
    return im

In [None]:
sample_image = load_image(train_images_filenames[0])
sample_image

## Load BLIP Model

In [None]:
!pip install salesforce-lavis

In [None]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from lavis.models import load_model_and_preprocess

model, vis_processors, txt_processors = load_model_and_preprocess(name="blip_feature_extractor", model_type="base", is_eval=True, device=device)

In [None]:
def get_blip_embeddings(raw_image):
    global model, vis_processors, device
    image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
    sample = {"image": image, "text_input": None}
    features_image = model.extract_features(sample, mode="image")
    return features_image.image_embeds

In [None]:
get_blip_embeddings(sample_image)

## Pre-store all BLIP Embeddings

In [None]:
from tqdm import tqdm

all_blip_embeddings = []

for file_name in tqdm(train_images_filenames):
    raw_image = load_image(file_name)
    embeds = get_blip_embeddings(raw_image)
    all_blip_embeddings.append(embeds)

## Compute pair-wise dot products

In [None]:
sum_similarities = 0.0

N = len(all_blip_embeddings)
# all_blip_embeddings = np.array(all_blip_embeddings)

for i in tqdm(range(0, N - 1)):
    for j in range(i+1, N):
        u_x = all_blip_embeddings[i]
        u_y = all_blip_embeddings[j]
        sim = u_x[:,0,:] @ u_y[:,0,:].t()
        sum_similarities += float(sim[0][0])

In [None]:
sum_similarities

In [None]:
den = (N*(N-1))/2

In [None]:
den

In [None]:
avg = sum_similarities/den

In [None]:
avg

In [None]:
N