## Generate images for evaluation

In [None]:
import json, os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import sys
utils_path = os.path.abspath(os.path.join('../..'))
sys.path.append(utils_path)

import torch
from tqdm import *
from PIL import Image
from diffusers.pipelines import BlipDiffusionPipeline
from diffusers.utils import load_image
from utils.dataset_info import get_subjects_prompts_info

# Single Subject Generation
single_subject = ""                            # "backpack"
# Single Prompt Generation
single_prompt = []                            # e.g. ["a {0} {1} near the pool"]

num_generation = 4

output_path = "../../outputs/subjects/blip_diffusion"
dataset_path = "../../pcs_dataset"
dataset_info_path = f"{dataset_path}/info.json"

negative_prompt = "over-exposure, under-exposure, saturated, duplicate, out of frame, lowres, cropped, worst quality, low quality, jpeg artifacts, morbid, mutilated, out of frame, ugly, bad anatomy, bad proportions, deformed, blurry, duplicate"
blip_diffusion_pipe = BlipDiffusionPipeline.from_pretrained(
    "Salesforce/blipdiffusion", torch_dtype=torch.float16
).to("cuda")

prompts_info = get_subjects_prompts_info(dataset_info_path)
if single_subject != "":
    prompts_info = {f"{single_subject}": prompts_info[single_subject]}

for subject in prompts_info:

    print(f"***** Subject: {subject} *****")

    os.makedirs(os.path.join(output_path, subject), exist_ok=True)

    if len(single_prompt):
        prompts = single_prompt
    else:
        prompts = prompts_info[subject]["prompts"]

    for prompt in prompts:
        if "a {0} {1}" in prompt:
            prompt = prompt.replace("a {0} {1}", "")
        else:
            prompt = prompt.replace("{0} {1}", prompts_info[subject]["class"])
        print(f"**Prompt**: {prompt}")

        if prompt not in os.listdir(os.path.join(output_path, subject)):

            for i in range(0, num_generation):
                cond_image = Image.open(os.path.join(dataset_path, "subjects", subject, f"0{i}.jpg"))

                image = blip_diffusion_pipe(
                    prompt,
                    cond_image,
                    prompts_info[subject]["class"],
                    prompts_info[subject]["class"],
                    guidance_scale=7.5,
                    num_inference_steps=25,
                    neg_prompt=negative_prompt,
                    height=512,
                    width=512,
                ).images

                save_path = os.path.join(output_path, subject, f"{prompt}")
                os.makedirs(save_path, exist_ok=True)

                image[0].save(os.path.join(save_path, f"{i:04d}.jpg"))
                
    print(f"Finished blip diffusion in subject: {subject}!")


## Evaluation

### Calculate the similarity for each sample
Calculate img to img similarity and text to img similarity by CLIP Evaluator

In [6]:
import os, sys, json

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

utils_path = os.path.abspath(os.path.join('../..'))
sys.path.append(utils_path)

import numpy as np
import torch
from utils.clip_eval import evaluate_i2i, evaluate_t2i, CLIPEvaluator

In [7]:
def convert_to_native(data):
    if isinstance(data, np.ndarray):
        return data.tolist()
    elif isinstance(data, np.generic):
        return data.item()
    elif isinstance(data, dict):
        return {key: convert_to_native(value) for key, value in data.items()}
    elif isinstance(data, list):
        return [convert_to_native(item) for item in data]
    else:
        return data
    
def add_evaluation(file_path, new_data):
    # check whether the file exist
    if not os.path.exists(file_path):
        with open(file_path, 'w') as file:
            json.dump(convert_to_native(new_data), file, indent=4)
    else:
        with open(file_path, 'r') as file:
            data = json.load(file)
        data.update(convert_to_native(new_data))
        with open(file_path, 'w') as file:
            json.dump(data, file, indent=4)

In [None]:
outputs_path = "../../outputs/subjects/blip_diffusion"
eval_res_path = "../../eval_results/subjects/blip_diffusion"
dataset_path = "../../pcs_dataset/subjects"
dataset_info_path = "../../pcs_dataset/info.json"
os.makedirs(eval_res_path, exist_ok=True)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
evaluator = CLIPEvaluator(device)

subjects_list = os.listdir(outputs_path)

with open(dataset_info_path, 'r') as f:
    info_data = json.load(f)

for subject in subjects_list:
    evaluation_res = dict()
    print(f"***** Subject: {subject} *****")

    res_for_each_subject =dict()

    for prompt in os.listdir(os.path.join(outputs_path, subject)):
        if prompt[0] != 'a':
            eval_prompt = f"a {info_data['subjects']['subject_with_cls'][subject]}" + prompt
        else:
            eval_prompt = prompt
        print(f"**Prompt**: {eval_prompt}")

        res_for_each_prompt =dict()

        for generate_img_name in os.listdir(os.path.join(outputs_path, subject, prompt)):
            generate_img_path = os.path.join(outputs_path, subject, prompt, generate_img_name)
            res_for_each_prompt[generate_img_name] = [evaluate_i2i(generate_img_path, os.path.join(dataset_path, subject)), evaluate_t2i(generate_img_path, eval_prompt)]
        
        res_for_each_subject[eval_prompt] = res_for_each_prompt
        print(res_for_each_prompt)

    evaluation_res[subject] = res_for_each_subject

    add_evaluation(os.path.join(eval_res_path, "evaluation_results.json"), evaluation_res)


***** Subject: chair *****
**Prompt**: a shiny chair
{'0003.jpg': [0.875, 0.2332], '0000.jpg': [0.875, 0.2314], '0002.jpg': [0.8486, 0.2444], '0001.jpg': [0.885, 0.2412]}
**Prompt**: a chair on top of a white rug
{'0003.jpg': [0.8486, 0.2303], '0000.jpg': [0.892, 0.276], '0002.jpg': [0.896, 0.2576], '0001.jpg': [0.8555, 0.2416]}
**Prompt**: a chair on top of a mirror
{'0003.jpg': [0.8955, 0.2274], '0000.jpg': [0.888, 0.2343], '0002.jpg': [0.8555, 0.2615], '0001.jpg': [0.883, 0.2625]}
**Prompt**: a chair in the jungle
{'0003.jpg': [0.8447, 0.278], '0000.jpg': [0.88, 0.2454], '0002.jpg': [0.8574, 0.2715], '0001.jpg': [0.8623, 0.2717]}
**Prompt**: a chair floating on top of water
{'0003.jpg': [0.8037, 0.319], '0000.jpg': [0.8813, 0.2296], '0002.jpg': [0.87, 0.2476], '0001.jpg': [0.866, 0.2134]}
**Prompt**: a wet chair
{'0003.jpg': [0.8745, 0.2432], '0000.jpg': [0.8623, 0.2332], '0002.jpg': [0.8345, 0.2344], '0001.jpg': [0.881, 0.237]}
**Prompt**: a chair with a mountain in the background


### Calculate the average similarity

In [6]:
import json

eval_res_path = "../../eval_results/subjects/blip_diffusion/evaluation_results.json"

with open(eval_res_path, "r") as f:
    data = json.load(f)

img_sim = 0.0
text_sim = 0.0
cnt = 0

for subject in data:
    for prompt in data[subject]:
        for sample in data[subject][prompt]:
            img_sim = img_sim + data[subject][prompt][sample][0]
            text_sim = text_sim + data[subject][prompt][sample][1]
            cnt = cnt + 1
print("Image Similarity: ", img_sim/cnt, "\nText Similarity:", text_sim/cnt)

Image Similarity:  0.766163044381649 
Text Similarity: 0.23408021318151595
