In [7]:
import os
import random
import warnings

warnings.filterwarnings("ignore")

import lpips
import yaml

from tqdm.auto import tqdm

import numpy as np
import pandas as pd

import torch
from transformers import CLIPProcessor, CLIPModel
from torchvision import transforms

from PIL import Image

pd.set_option("display.max_rows", 200)

In [16]:
model_name1 = "default_1"
model_name2 = "artist10erase_1"

lpips_preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

images1 = []
images2 = []

artist_index = []
count_index = []
for idx in range(20):
    for i in range(10):
        image1 = lpips_preprocess(Image.open(f"image/{model_name1}/{idx}_{i}.png").convert("RGB")) * 2 - 1
        images1.append(image1)
        image2 = lpips_preprocess(Image.open(f"image/{model_name2}/{idx}_{i}.png").convert("RGB")) * 2 - 1
        images2.append(image2)
        artist_index.append(idx)
        count_index.append(i)
images1 = torch.stack(images1)
images2 = torch.stack(images2)


loss_function = lpips.LPIPS(net='alex')
lpips_score = loss_function(images1, images2).squeeze().detach().numpy().round(3)
df = pd.DataFrame({"artist": artist_index, "index": count_index, "LPIPS": lpips_score})
df.groupby("artist")["LPIPS"].mean().reset_index()

Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: c:\Users\yoonj\AppData\Local\Programs\Python\Python312\Lib\site-packages\lpips\weights\v0.1\alex.pth


Unnamed: 0,artist,LPIPS
0,0,0.5347
1,1,0.5228
2,2,0.4738
3,3,0.655
4,4,0.5408
5,5,0.6221
6,6,0.5972
7,7,0.5459
8,8,0.6957
9,9,0.4844


In [35]:
torch.set_grad_enabled(False)

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

clip_preprocess = transforms.Compose([
    transforms.ToTensor(),
])

images1 = []
images2 = []
index = []
for idx in range(20):
    for i in range(10):
        image1 = clip_preprocess(Image.open(f"image/{model_name1}/{idx}_{i}.png").convert("RGB"))
        images1.append(image1)
        image2 = clip_preprocess(Image.open(f"image/{model_name2}/{idx}_{i}.png").convert("RGB"))
        images2.append(image2)
        index.append(f"{idx}_{i}")
images1 = torch.stack(images1)
images2 = torch.stack(images2)

with open(f"data/{model_name2}.yaml", 'r', encoding='utf-8') as file:
    data = yaml.safe_load(file)

prompts = data["prompts"][:20]
total_prompts = []
for prompt in prompts:
    total_prompts += [prompt] * 10

inputs = processor(text=total_prompts, images=images1, return_tensors="pt", padding=True, do_rescale=False)
outputs = model(**inputs)
image_embds1 = outputs.image_embeds

inputs = processor(text=total_prompts, images=images2, return_tensors="pt", padding=True, do_rescale=False)
outputs = model(**inputs)
image_embds2 = outputs.image_embeds

text_embds = outputs.text_embeds

clip_score1 = torch.nn.functional.cosine_similarity(image_embds1, text_embds).numpy().round(3)
clip_score2 = torch.nn.functional.cosine_similarity(image_embds2, text_embds).numpy().round(3)

df = pd.DataFrame({"artist": artist_index, "index": count_index, "CLIP 1": clip_score1, "CLIP 2": clip_score2,
                    "CLIP diff": abs(clip_score1 - clip_score2)})
df.groupby("artist")[["CLIP 1", "CLIP 2", "CLIP diff"]].mean().reset_index()

Unnamed: 0,artist,CLIP 1,CLIP 2,CLIP diff
0,0,0.3185,0.2225,0.096
1,1,0.2028,0.1873,0.0369
2,2,0.3547,0.3137,0.041
3,3,0.3019,0.2085,0.0934
4,4,0.3413,0.2384,0.1029
5,5,0.3534,0.2277,0.1257
6,6,0.3112,0.215,0.0962
7,7,0.346,0.2412,0.1048
8,8,0.2856,0.1808,0.1048
9,9,0.3227,0.2735,0.0514
