In [None]:
import os
import json
import spacy
import random

## VISOR

In [None]:
visor_bad_object_appearance = ['a bottle above a hair drier',
       'a toothbrush above a fire hydrant',
       'a toaster to the left of an airplane',
       'a parking meter to the right of a couch',
       'a spoon to the left of a refrigerator',
       'a parking meter to the left of a tennis racket',
       'a sports ball to the left of a refrigerator',
       'a remote to the left of a keyboard',
       'a motorcycle below a scissors',
       'a book to the left of a hair drier',
       'an orange to the right of a bicycle',
       'a dining table to the left of a baseball glove',
       'a book to the right of a baseball bat',
       'a tennis racket to the left of a motorcycle',
       'a clock to the left of a hair drier',
       'a baseball bat to the right of a tennis racket',
       'a hair drier below a baseball glove',
       'a baseball glove to the right of an airplane',
       'a wine glass to the left of a hair drier',
       'a traffic light below a hair drier',
       'a skateboard to the right of a keyboard',
       'a sandwich to the right of a stop sign',
       'a sink to the right of a bus',
       'an airplane to the left of a sink',
       'an oven to the right of a cell phone']

In [None]:
visor_wrong_samples = random.sample(visor_bad_object_appearance, 4)

In [None]:
visor_low_spatial_score = ['a giraffe to the right of a scissors',
       'an elephant above a horse', 'a fork below a cell phone',
       'a fork to the right of a dining table',
       'a fork above an elephant',
       'a baseball bat to the left of an airplane',
       'a donut to the right of a sports ball',
       'a skateboard below a baseball bat', 'a baseball bat above a boat',
       'a train below a sink', 'a train below a skis',
       'a train to the right of a wine glass',
       'a banana to the right of a refrigerator',
       'a banana to the right of a sink',
       'a giraffe to the left of a mouse', 'a giraffe below a remote',
       'an airplane below a skis', 'an airplane below a snowboard',
       'an airplane to the left of a spoon', 'a zebra below a cat',
       'a zebra below a carrot', 'an airplane below a wine glass',
       'a baseball bat below an apple',
       'a baseball bat to the right of a banana',
       'a scissors below a traffic light']

In [None]:
visor_wrong_samples.extend(random.sample(visor_low_spatial_score, 3))

In [None]:
visor_correct_object_low_spatial_score = ['a fire hydrant above a baseball bat',
 'a motorcycle above a person',
 'a fire hydrant above a skateboard',
 'a skateboard above a person',
 'a fire hydrant below a scissors',
 'an airplane below a surfboard',
 'a frisbee to the left of a person',
 'a sports ball above a baseball bat',
 'a mouse below a surfboard',
 'a mouse below a stop sign']

In [None]:
visor_wrong_samples.extend(random.sample(visor_correct_object_low_spatial_score, 3))

In [None]:
with open(os.path.join('json_files', 'text_spatial_rel_phrases.json'), 'r') as f:
    visor_text_data = json.load(f)

In [None]:
visor_failures = []
for data in visor_text_data:
    if data["text"] in visor_wrong_samples:
        visor_failures.append(data)

In [None]:
with open(os.path.join('json_files', 'visor_wrong_prompts.json'), 'w') as f:
    json.dump(visor_failures, f, indent=4)

## T2I-Comp-Bench

In [None]:
with open(os.path.join('json_files', 't2i_prompts.json'), 'r') as f:
    t2i_data = json.load(f)

In [None]:
t2i_prompts = []
for data in t2i_data:
    t2i_prompts.append(data["prompt"])

In [None]:
len(t2i_prompts)

In [None]:
t2i_annotations_path = os.path.join("objdet_results", "t2i", "sdxl_relu_sg", "labels", "annotation_obj_detection_2d")
with open(os.path.join(t2i_annotations_path, 'vqa_result.json'), 'r') as f:
    t2i_spatial_ann = json.load(f)

In [None]:
t2i_num_images_per_prompt = 10

In [None]:
prompts_score_0 = set()
for prompt in t2i_prompts:
    ann_data = [ann for ann in t2i_spatial_ann if ann["prompt"] == prompt]
    i = 0
    for data in ann_data:
        if data["score"] == 0:
            i += 1
    # print(f"{prompt}: {i}")
    if i == 10:
        prompts_score_0.add(prompt)

In [None]:
print(len(prompts_score_0), "out of", len(t2i_prompts), "prompts have score 0 in all", t2i_num_images_per_prompt, "images")

In [None]:
prompts_score_0

In [None]:
def get_object(doc, index):
    return [token.text for token in doc if token.pos_=='NOUN'][index]

nlp = spacy.load("en_core_web_sm")
t2i_objects = set()
for prompt in t2i_prompts:
    doc = nlp(prompt)
    obj1 = get_object(doc, 0)
    obj2 = get_object(doc, -1)
    t2i_objects.add(obj1)
    t2i_objects.add(obj2)

In [None]:
len(t2i_objects)

In [None]:
problematic_objects = set()
for prompt in prompts_score_0:
    doc = nlp(prompt)
    obj1 = get_object(doc, 0)
    obj2 = get_object(doc, -1)
    problematic_objects.add(obj1)
    problematic_objects.add(obj2)

In [None]:
len(problematic_objects)

In [None]:
problematic_objects = list(problematic_objects)

In [None]:
t2i_sample_objects = random.sample(problematic_objects, 20)

In [None]:
def get_prompt_objects(prompt, nlp):
    doc = nlp(prompt)
    obj1 = get_object(doc, 0)
    obj2 = get_object(doc, -1)
    return obj1, obj2

object_to_prompts = {obj: [] for obj in t2i_sample_objects}

for prompt in prompts_score_0:
    obj1, obj2 = get_prompt_objects(prompt, nlp)
    if obj1 in t2i_sample_objects:
        object_to_prompts[obj1].append(prompt)
    if obj2 in t2i_sample_objects:
        object_to_prompts[obj2].append(prompt)

selected_prompts = []
covered_objects = set()

while len(covered_objects) < len(t2i_sample_objects) and len(selected_prompts) < 10:
    best_prompt = None
    best_new_coverage = 0

    for prompt in prompts_score_0:
        if prompt in selected_prompts:
            continue

        obj1, obj2 = get_prompt_objects(prompt, nlp)
        new_objects = 0
        if obj1 in t2i_sample_objects and obj1 not in covered_objects:
            new_objects += 1
        if obj2 in t2i_sample_objects and obj2 not in covered_objects:
            new_objects += 1

        if new_objects > best_new_coverage:
            best_new_coverage = new_objects
            best_prompt = prompt

    if best_prompt is None or best_new_coverage == 0:
        break

    selected_prompts.append(best_prompt)
    obj1, obj2 = get_prompt_objects(best_prompt, nlp)
    covered_objects.update([obj1, obj2])

remaining_prompts = list(prompts_score_0 - set(selected_prompts))
if len(selected_prompts) < 10 and remaining_prompts:
    additional_prompts = random.sample(remaining_prompts, min(10 - len(selected_prompts), len(remaining_prompts)))
    selected_prompts.extend(additional_prompts)

print(f"Selected {len(selected_prompts)} prompts covering {len(covered_objects)} of {len(t2i_sample_objects)} objects")
for prompt in selected_prompts:
    obj1, obj2 = get_prompt_objects(prompt, nlp)
    print(f"Prompt: {prompt}, Objects: {obj1}, {obj2}")

In [None]:
with open(os.path.join('json_files', 't2i_prompts.json'), 'r') as f:
    t2i_data = json.load(f)

In [None]:
t2i_failures = []
for data in t2i_data:
    if data["prompt"] in selected_prompts:
        t2i_failures.append(data)

In [None]:
t2i_failures

In [57]:
with open(os.path.join('json_files', 't2i_wrong_prompts.json'), 'w') as f:
    json.dump(t2i_failures, f, indent=4)

## GenEval