In [12]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import torch

In [13]:
def compute_iou(annotation, mask):
    """Annotation: ground truth (512, 512), Mask: prediction (512, 512)"""

    # Compute intersection
    intersection = np.sum(np.logical_and(annotation, mask))

    # Compute union
    union = np.sum(np.logical_or(annotation, mask))

    # Compute intersection over union
    iou_score = intersection / union

    return intersection, union, iou_score

def interpolate(heatmap: np.ndarray, size=(512, 512), mode="bilinear"):
    """Interpolate heatmap to match the size of the ground truth"""

    # Convert to torch tensor
    heatmap = torch.from_numpy(heatmap)
    # Add batch and channel dimension
    heatmap = heatmap.unsqueeze(0).unsqueeze(0)
    # Interpolate
    heatmap = torch.nn.functional.interpolate(heatmap, size=size, mode=mode)
    # Convert back to numpy
    heatmap = heatmap.squeeze().squeeze().numpy()
    
    return heatmap

## DAAM

## VOC-sim

In [65]:
dataset_path = Path('voc_sim')
annotations_folder = dataset_path / 'annotations'
threshold = 0.4 # Threshold used in the paper DAAM
threshold_optimized = 0.8 # Threshold found empirically using the training set employed for optimizng the tokens


# Iterate throuth annotations
results = []
for annotation_path in annotations_folder.iterdir():
    example_result_dict = {}
    classname, model, seed, _ = annotation_path.stem.split('_')
    image_path = dataset_path / f"images/{classname}_{model}_{seed}.png"
    heatmap_path = dataset_path / f"heatmaps/{classname}_{model}_{seed}_heatmap_token0.npy"
    heatmap_optimized_path = dataset_path / f"heatmaps/{classname}_{model}_{seed}_heatmap_token1.npy"
    
    # Check all exists
    assert annotation_path.exists(), f"Annotation {annotation_path} does not exist"
    assert image_path.exists(), f"Image {image_path} does not exist"
    assert heatmap_path.exists(), f"Heatmap {heatmap_path} does not exist"
    assert heatmap_optimized_path.exists(), f"Heatmap {heatmap_optimized_path} does not exist"

    # Add paths to result dict
    example_result_dict['classname'] = classname
    example_result_dict['model'] = model
    example_result_dict['seed'] = seed
    example_result_dict['image_path'] = image_path.name
    example_result_dict['annotation_path'] = annotation_path.name
    example_result_dict['heatmap_path'] = heatmap_path.name
    example_result_dict['heatmap_optimized_path'] = heatmap_optimized_path.name

    # Load annotation. Convert in binary mask
    annotation = np.array(Image.open(annotation_path))
    assert annotation.shape == (512, 512, 3), f"Annotation {annotation_path} has wrong shape {annotation.shape}"
    annotation = annotation.sum(axis=-1) != 0
    assert annotation.shape == (512, 512), f"Annotation aggregated {annotation_path} has wrong shape {annotation.shape}"
    
    # Load mask (normal)
    heatmap = np.load(heatmap_path)
    heatmap = heatmap[1] # We stored in 0 the background and in 1 the token related to the foreground object
    assert heatmap.shape == (64, 64), f"Heatmap {heatmap_path} has wrong shape {heatmap.shape}"
    heatmap = interpolate(heatmap, size=(512, 512))
    assert heatmap.shape == (512, 512), f"Heatmap {heatmap_path} has wrong shape {heatmap.shape}"
    
    # Binarize using DAAM procedure
    heatmap = heatmap / heatmap.max()
    mask = heatmap > threshold

    i_normal, u_normal, iou_normal = compute_iou(annotation=annotation, mask=mask)
    example_result_dict['iou_normal'] = iou_normal
    example_result_dict['i_normal'] = i_normal
    example_result_dict['u_normal'] = u_normal

    # Load mask (optimized)
    heatmap_optimized = np.load(heatmap_optimized_path)
    heatmap_optimized = heatmap_optimized[1] # We stored in 0 the background and in 1 the token related to the foreground object
    assert heatmap_optimized.shape == (64, 64), f"Heatmap {heatmap_optimized_path} has wrong shape {heatmap_optimized.shape}"
    heatmap_optimized = interpolate(heatmap_optimized, size=(512, 512))
    assert heatmap_optimized.shape == (512, 512), f"Heatmap {heatmap_optimized_path} has wrong shape {heatmap_optimized.shape}"

    # Binarize using DAAM procedure
    heatmap_optimized = heatmap_optimized / heatmap_optimized.max()
    mask_optimized = heatmap_optimized > threshold_optimized

    i_optimized, u_optimized, iou_optimized = compute_iou(annotation=annotation, mask=mask_optimized)
    example_result_dict['iou_optimized'] = iou_optimized
    example_result_dict['i_optimized'] = i_optimized
    example_result_dict['u_optimized'] = u_optimized

    results.append(example_result_dict)

    
# Aggregated by example
df_results = pd.DataFrame(results)
df_results['experiment'] = "voc-sim - daam"
df_results.to_csv(dataset_path / 'daam_voc_sim_results.csv', index=False)


# Aggregated results by class
df_classes = df_results.groupby(['classname', 'model']).aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum'}).reset_index()
df_classes['iou_normal'] = df_classes['i_normal'] / df_classes['u_normal']
df_classes['iou_optimized'] = df_classes['i_optimized'] / df_classes['u_optimized']
df_classes['experiment'] = "voc-sim - daam"
df_classes = df_classes.sort_values('classname').reset_index(drop=True)
df_classes.to_csv(dataset_path / 'daam_voc_sim_class_results.csv', index=False)

df_overall = df_classes.groupby('model').aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum',
                                        'iou_normal': 'mean', 'iou_optimized': 'mean'}).reset_index()

df_overall.rename(columns={'iou_normal': 'miou_normal', 'iou_optimized': 'miou_optimized'}, inplace=True)
df_overall['iou_overall_normal'] = df_overall['i_normal'] / df_overall['u_normal']
df_overall['iou_overall_optimized'] = df_overall['i_optimized'] / df_overall['u_optimized']
df_overall['experiment'] = "voc-sim - daam"
df_overall.to_csv(dataset_path / 'daam_voc_sim_overall_results.csv', index=False)

df_overall_display = df_overall[["miou_normal","iou_overall_normal", "miou_optimized",  "iou_overall_optimized"]]
df_overall_display = (100*df_overall_display).round(1)
display(df_overall_display)

assert dataset_path.name == 'voc_sim', f"Dataset path {dataset_path} is not voc_sim"
df_classes_display = df_classes[['classname', 'iou_normal', 'iou_optimized']].copy()
df_classes_display['iou_normal'] = (100*df_classes_display['iou_normal']).round(1)
df_classes_display['iou_optimized'] = (100*df_classes_display['iou_optimized']).round(1)
df_classes_display.T.to_excel(dataset_path / 'daam_voc_sim_class_results.xlsx', index=False)
display(df_classes_display.T)

Unnamed: 0,miou_normal,iou_overall_normal,miou_optimized,iou_overall_optimized
0,66.2,63.3,79.7,77.9


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
classname,aeroplane,bicycle,bird,boat,bottle,bus,car,cat,chair,cow,dining-table,dog,horse,motorbike,person,potted-plant,sheep,sofa,train,tv
iou_normal,50.0,52.8,72.0,59.0,68.2,86.7,79.3,78.9,34.2,85.4,24.6,81.8,81.6,70.0,53.1,68.4,86.2,64.7,52.0,74.1
iou_optimized,68.8,80.3,83.1,82.1,85.4,92.7,88.7,92.2,47.6,87.4,30.8,87.7,85.9,86.9,82.9,79.8,89.6,84.7,88.8,68.6


# COCO-cap

In [84]:
dataset_path = Path('coco_captions')
annotations_folder = dataset_path / 'annotations'
df_coco_captions = pd.read_csv('../coco_captions_sampled.csv')

threshold = 0.4
threshold_optimized = 0.8

# Iterate throuth annotations
results = []
for annotation_path in annotations_folder.iterdir():
    example_result_dict = {}
    classname, model, _, caption, _, seed = annotation_path.stem.split('_')
    model = model.replace('-', '')
    caption = f"caption{caption}"
    seed = f"seed{seed}"
    image_path = dataset_path / f"images/{classname}_{model}_{caption}_{seed}.png"
    heatmap_path = dataset_path / f"heatmaps/{classname}_{model}_{caption}_{seed}_heatmap_classname.npy"
    heatmap_optimized_path = dataset_path / f"heatmaps/{classname}_{model}_{caption}_{seed}_heatmap_token1.npy"

    assert annotation_path.exists(), f"Annotation {annotation_path} does not exist"
    assert image_path.exists(), f"Image {image_path} does not exist"
    assert heatmap_path.exists(), f"Mask {heatmap_path} does not exist"
    assert heatmap_optimized_path.exists(), f"Mask {heatmap_optimized_path} does not exist"
    
    # Add paths to result dict
    example_result_dict['classname'] = classname
    example_result_dict['model'] = model
    example_result_dict['seed'] = seed
    
    example_result_dict['image_path'] = image_path.name
    example_result_dict['annotation_path'] = annotation_path.name
    example_result_dict['heatmap_path'] = heatmap_path.name
    example_result_dict['heatmap_optimized_path'] = heatmap_optimized_path.name

    # Get info of coco caption used using caption_id
    caption_id = int(caption.replace('caption', ''))
    row = df_coco_captions.query("caption_id==@caption_id")
    assert len(row) == 1, f"Caption {caption_id} not found in df_coco_captions"
    row = row.iloc[0]
    prompt = row['caption']
    word_included = row['word_included']
    coco_categories = row['categories']

    # Add info to results
    example_result_dict['coco_caption_id'] = caption_id
    example_result_dict['prompt'] = prompt
    example_result_dict['word_included'] = word_included
    example_result_dict['coco_categories'] = coco_categories

    # Load annotation. Convert in binary mask
    annotation = np.array(Image.open(annotation_path))
    
    assert annotation.shape == (512, 512, 3), f"Annotation {annotation_path} has wrong shape {annotation.shape}"
    annotation = annotation.sum(axis=-1) != 0
    assert annotation.shape == (512, 512), f"Annotation aggregated {annotation_path} has wrong shape {annotation.shape}"
    
    # Load heatmap
    heatmap = np.load(heatmap_path)
    heatmap = heatmap[1] # We stored in 0 the background and in 1 the token related to the foreground object
    assert heatmap.shape == (64, 64), f"Heatmap {heatmap_path} has wrong shape {heatmap.shape}"
    heatmap = interpolate(heatmap, size=(512, 512), mode="bilinear")
    assert heatmap.shape == (512, 512), f"Heatmap {heatmap_path} has wrong shape {heatmap.shape}"
    
    # Binarize using DAAM procedure
    heatmap = heatmap / heatmap.max()
    mask = heatmap > threshold

    i_normal, u_normal, iou_normal = compute_iou(annotation=annotation, mask=mask)
    example_result_dict['iou_normal'] = iou_normal
    example_result_dict['i_normal'] = i_normal
    example_result_dict['u_normal'] = u_normal

    # Load mask (optimized)
    heatmap_optimized = np.load(heatmap_optimized_path)
    heatmap_optimized = heatmap_optimized[1] # We stored in 0 the background and in 1 the token related to the foreground object
    assert heatmap_optimized.shape == (64, 64), f"Heatmap {heatmap_optimized_path} has wrong shape {heatmap_optimized.shape}"
    heatmap_optimized = interpolate(heatmap_optimized, size=(512, 512), mode="bilinear")
    assert heatmap_optimized.shape == (512, 512), f"Heatmap {heatmap_optimized_path} has wrong shape {heatmap_optimized.shape}"
    
    # Binarize using DAAM procedure
    heatmap_optimized = heatmap_optimized / heatmap_optimized.max()
    mask_optimized = heatmap_optimized > threshold_optimized


    i_optimized, u_optimized, iou_optimized = compute_iou(annotation=annotation, mask=mask_optimized)
    example_result_dict['iou_optimized'] = iou_optimized
    example_result_dict['i_optimized'] = i_optimized
    example_result_dict['u_optimized'] = u_optimized

    results.append(example_result_dict)    

df_results = pd.DataFrame(results)
df_results['experiment'] = "coco-cap - daam"
df_results.to_csv(dataset_path / 'daam_coco_captions_results.csv', index=False)

# All results (included and not included)
assert dataset_path.name == 'coco_captions', f"Dataset path {dataset_path} is not coco_captions"

# Aggregated results by class
df_classes = df_results.groupby(['classname', 'model']).aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum'}).reset_index()
df_classes['iou_normal'] = df_classes['i_normal'] / df_classes['u_normal']
df_classes['iou_optimized'] = df_classes['i_optimized'] / df_classes['u_optimized']
df_classes['experiment'] = "voc-sim - daam"
df_classes = df_classes.sort_values('classname').reset_index(drop=True)
df_classes.to_csv(dataset_path / 'daam-cap_class_results_all.csv', index=False)

df_classes_display = df_classes[["classname", 'iou_normal', 'iou_optimized']].copy()
df_classes_display['iou_normal'] = (100*df_classes_display['iou_normal']).round(1)
df_classes_display['iou_optimized'] = (100*df_classes_display['iou_optimized']).round(1)
df_classes_display.T.to_excel(dataset_path / 'daam-cap_class_results_all.xlsx', index=False)

display(df_classes_display.T)

# Aggregate overall results
df_overall = df_classes.groupby('model').aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum',
                                    'iou_normal': 'mean', 'iou_optimized': 'mean'}).reset_index()

df_overall.rename(columns={'iou_normal': 'miou_normal', 'iou_optimized': 'miou_optimized'}, inplace=True)
df_overall['iou_overall_normal'] = df_overall['i_normal'] / df_overall['u_normal']
df_overall['iou_overall_optimized'] = df_overall['i_optimized'] / df_overall['u_optimized']
df_overall['experiment'] = "coco-cap - grounded diffusion"
df_overall.to_csv(dataset_path / 'daam-cap_overall_results_all.csv', index=False)
df_overall_display = df_overall[['iou_overall_normal', 'miou_normal', 'iou_overall_optimized', 'miou_optimized']].copy()
df_overall_display = (100*df_overall_display).round(1)
display(df_overall_display[["miou_normal","iou_overall_normal", "miou_optimized",  "iou_overall_optimized"]])



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
classname,aeroplane,bicycle,bird,boat,bottle,bus,car,cat,chair,cow,dining table,dog,horse,motorbike,person,potted plant,sheep,sofa,train,tv
iou_normal,30.6,33.8,53.0,31.9,19.1,82.6,42.8,83.0,16.8,64.6,93.1,77.9,49.8,44.6,22.7,44.8,68.6,34.1,44.6,30.0
iou_optimized,59.1,67.2,67.0,61.2,39.8,92.8,63.4,83.6,26.5,77.9,87.3,79.0,67.3,72.4,56.9,67.6,72.4,41.8,78.0,60.3


Unnamed: 0,miou_normal,iou_overall_normal,miou_optimized,iou_overall_optimized
0,48.4,44.5,66.1,67.1


In [None]:

assert dataset_path.name == 'coco_captions', f"Dataset path {dataset_path} is not coco_captions"

for included in [True, False]:
    included_str = 'included' if included else 'non_included'
    print(included_str)
    # Aggregated results by class
    df_classes = df_results.query("word_included==@included").groupby(['classname', 'model']).aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum'}).reset_index()
    df_classes['iou_normal'] = df_classes['i_normal'] / df_classes['u_normal']
    df_classes['iou_optimized'] = df_classes['i_optimized'] / df_classes['u_optimized']
    df_classes['experiment'] = "voc-sim - grounded diffusion"
    df_classes = df_classes.sort_values('classname').reset_index(drop=True)
    #df_classes.to_csv(dataset_path / 'grounded_diffusion_coco-cap_class_results_all.csv', index=False)

    df_classes_display = df_classes[["classname", 'iou_normal', 'iou_optimized']].copy()
    df_classes_display['iou_normal'] = (100*df_classes_display['iou_normal']).round(1)
    df_classes_display['iou_optimized'] = (100*df_classes_display['iou_optimized']).round(1)
    df_classes_display.T.to_excel(dataset_path / f'daam_coco-cap_class_results_{included_str}.xlsx', index=False)

    display(df_classes_display.T)

    # Aggregate overall results
    df_overall = df_classes.groupby('model').aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum',
                                        'iou_normal': 'mean', 'iou_optimized': 'mean'}).reset_index()

    df_overall.rename(columns={'iou_normal': 'miou_normal', 'iou_optimized': 'miou_optimized'}, inplace=True)
    df_overall['iou_overall_normal'] = df_overall['i_normal'] / df_overall['u_normal']
    df_overall['iou_overall_optimized'] = df_overall['i_optimized'] / df_overall['u_optimized']
    df_overall['experiment'] = "coco-cap - grounded diffusion"
    df_overall.to_csv(dataset_path / f'daam_coco-cap_overall_results_{included_str}.csv', index=False)

    df_overall_display = df_overall[['miou_normal', 'iou_overall_normal',  'miou_optimized', 'iou_overall_optimized']].copy()
    df_overall_display = (100*df_overall_display).round(1)
    display(df_overall_display)
