In [43]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import torch

In [44]:
def compute_iou(annotation, mask):
    """Annotation: ground truth (512, 512), Mask: prediction (512, 512)"""

    # Compute intersection
    intersection = np.sum(np.logical_and(annotation, mask))

    # Compute union
    union = np.sum(np.logical_or(annotation, mask))

    # Compute intersection over union
    iou_score = intersection / union

    return intersection, union, iou_score

def interpolate(heatmap: np.ndarray, size=(512, 512), mode="bilinear"):
    """Interpolate heatmap to match the size of the ground truth"""

    # Convert to torch tensor
    heatmap = torch.from_numpy(heatmap)
    # Add batch and channel dimension
    heatmap = heatmap.unsqueeze(0)
    if len(heatmap.shape) == 3:
        heatmap = heatmap.unsqueeze(0)
    
    # Interpolate
    heatmap = torch.nn.functional.interpolate(heatmap, size=size, mode=mode)
    # Convert back to numpy
    heatmap = heatmap.squeeze().squeeze().numpy()
    
    return heatmap
    

In [144]:
import denseCRF
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import sys

def densecrf(I, P):
    """
    input parameters:
        I    : a numpy array of shape [H, W, C], where C should be 3.
               type of I should be np.uint8, and the values are in [0, 255]
        P    : a probability map of shape [H, W, L], where L is the number of classes
               type of P should be np.float32
        param: a tuple giving parameters of CRF (w1, alpha, beta, w2, gamma, it), where
                w1    :   weight of bilateral term, e.g. 10.0
                alpha :   spatial distance std, e.g., 80
                beta  :   rgb value std, e.g., 15
                w2    :   weight of spatial term, e.g., 3.0
                gamma :   spatial distance std for spatial term, e.g., 3
                it    :   iteration number, e.g., 5
    output parameters:
        out  : a numpy array of shape [H, W], where pixel values represent class indices. 
    """
    w1    = 10.0  # weight of bilateral term
    alpha = 80    # spatial std
    beta  = 13    # rgb  std
    w2    = 3.0   # weight of spatial term
    gamma = 3     # spatial std
    it    = 5.0   # iteration
    param = (w1, alpha, beta, w2, gamma, it)
    out = denseCRF.densecrf(I, P, param) 
    return out   

## Attn2Mask

## VOC-sim

In [None]:
dataset_path = Path('voc_sim')
annotations_folder = dataset_path / 'annotations'


apply_crf = False
beta = 0.2
beta_optimized = -0.2

# Iterate throuth annotations
results = []
for annotation_path in tqdm(list(annotations_folder.iterdir())):
    example_result_dict = {}
    model, classname, seed, _ = annotation_path.stem.split('_')
    image_path = dataset_path / f"images/{model}_{classname}_{seed}_image.png"
    heatmap_path = dataset_path / f"voc_sim_fixed/{model}_{classname}_{seed}_heatmap.npy"
    heatmap_optimized_path = dataset_path / f"voc_sim_fixed/{model}_{classname}_{seed}_heatmapopt.npy"
    
    # Check all exists
    assert annotation_path.exists(), f"Annotation {annotation_path} does not exist"
    assert image_path.exists(), f"Image {image_path} does not exist"
    assert heatmap_path.exists(), f"Heatmap {heatmap_path} does not exist"
    assert heatmap_optimized_path.exists(), f"Heatmap {heatmap_optimized_path} does not exist"
    
    # Add paths to result dict
    example_result_dict['classname'] = classname
    example_result_dict['model'] = model
    example_result_dict['seed'] = seed
    example_result_dict['image_path'] = image_path.name
    example_result_dict['annotation_path'] = annotation_path.name
    example_result_dict['heatmap_path'] = heatmap_path.name
    example_result_dict['heatmap_optimized_path'] = heatmap_optimized_path.name

    # Load annotation. Convert in binary mask
    annotation = np.array(Image.open(annotation_path))
    image = np.array(Image.open(image_path))
    assert annotation.shape == (512, 512, 3), f"Annotation {annotation_path} has wrong shape {annotation.shape}"
    annotation = annotation.sum(axis=-1) != 0
    assert annotation.shape == (512, 512), f"Annotation aggregated {annotation_path} has wrong shape {annotation.shape}"
    
    heatmap = np.load(heatmap_path)[0]
    heatmap = heatmap[-2] # Channel for the selected class
    heatmap = (heatmap - heatmap.min()) / (heatmap.max() - heatmap.min())
    #heatmap = heatmap / heatmap.max()
    a0 = 1 - heatmap - beta # Background channel
    attentions = np.stack([a0, heatmap], axis=0)
    attentions = interpolate(attentions, size=(512, 512), mode="bilinear")
    mask = attentions.argmax(axis=0)
    if apply_crf:
        mask = np.stack([1 - mask, mask], axis=-1).astype(np.float32)
        mask = densecrf(image, mask)
        assert mask.shape == (512, 512), f"Mask {heatmap_path} has wrong shape {mask.shape}"

    
    # fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(15, 5))
    # ax1.imshow(image)
    # ax2.imshow(attentions[0])
    # ax3.imshow(attentions[1])
    # ax4.imshow(mask)
    # continue



    i_normal, u_normal, iou_normal = compute_iou(annotation=annotation, mask=mask)
    example_result_dict['iou_normal'] = iou_normal
    example_result_dict['i_normal'] = i_normal
    example_result_dict['u_normal'] = u_normal

    # Load mask (optimized)
    heatmap_optimized = np.load(heatmap_optimized_path)
    heatmap_optimized = heatmap_optimized[0, 1] # We stored in 0 the background and in 1 the token related to the foreground object
    heatmap_optimized = (heatmap_optimized - heatmap_optimized.min()) / (heatmap_optimized.max() - heatmap_optimized.min())
    #heatmap_optimized = heatmap_optimized / heatmap_optimized.max()
    #print("EL heatmap range", heatmap_optimized.max(), heatmap_optimized.min())
    
    a0_optimized = 1 - heatmap_optimized - beta_optimized
    #print("EL a0 range", a0_optimized.max(), a0_optimized.min())
    attentions_optimized = np.stack([a0_optimized, heatmap_optimized], axis=0)
    attentions_optimized = interpolate(attentions_optimized, size=(512, 512), mode="bilinear")
    mask_optimized = attentions_optimized.argmax(axis=0)
    if apply_crf:
        mask_optimized = np.stack([1 - mask_optimized, mask_optimized], axis=-1).astype(np.float32)
        mask_optimized = densecrf(image, mask_optimized)

    assert mask_optimized.shape == (512, 512), f"Mask {heatmap_optimized_path} has wrong shape {mask_optimized.shape}"
    
    # fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(15, 5))
    # ax1.imshow(image)
    # ax2.imshow(attentions_optimized[0])
    # ax3.imshow(attentions_optimized[1])
    # ax4.imshow(mask_optimized)
    # continue
    

    i_optimized, u_optimized, iou_optimized = compute_iou(annotation=annotation, mask=mask_optimized)
    example_result_dict['iou_optimized'] = iou_optimized
    example_result_dict['i_optimized'] = i_optimized
    example_result_dict['u_optimized'] = u_optimized

    results.append(example_result_dict)

    
# Aggregated by example
df_results = pd.DataFrame(results)
df_results['experiment'] = "voc-sim - daam"
df_results.to_csv(dataset_path / 'daam_voc_sim_results.csv', index=False)


# Aggregated results by class
df_classes = df_results.groupby(['classname', 'model']).aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum'}).reset_index()
df_classes['iou_normal'] = df_classes['i_normal'] / df_classes['u_normal']
df_classes['iou_optimized'] = df_classes['i_optimized'] / df_classes['u_optimized']
df_classes['experiment'] = "voc-sim - daam"
df_classes = df_classes.sort_values('classname').reset_index(drop=True)
df_classes.to_csv(dataset_path / 'daam_voc_sim_class_results.csv', index=False)

df_overall = df_classes.groupby('model').aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum',
                                        'iou_normal': 'mean', 'iou_optimized': 'mean'}).reset_index()

df_overall.rename(columns={'iou_normal': 'miou_normal', 'iou_optimized': 'miou_optimized'}, inplace=True)
df_overall['iou_overall_normal'] = df_overall['i_normal'] / df_overall['u_normal']
df_overall['iou_overall_optimized'] = df_overall['i_optimized'] / df_overall['u_optimized']
df_overall['experiment'] = "voc-sim - daam"
df_overall.to_csv(dataset_path / 'daam_voc_sim_overall_results.csv', index=False)

df_overall_display = df_overall[["miou_normal","iou_overall_normal", "miou_optimized",  "iou_overall_optimized"]]
df_overall_display = (100*df_overall_display).round(1)
display(df_overall_display)

assert dataset_path.name == 'voc_sim', f"Dataset path {dataset_path} is not voc_sim"
df_classes_display = df_classes[['classname', 'iou_normal', 'iou_optimized']].copy()
df_classes_display['iou_normal'] = (100*df_classes_display['iou_normal']).round(1)
df_classes_display['iou_optimized'] = (100*df_classes_display['iou_optimized']).round(1)
df_classes_display.T.to_excel(dataset_path / 'daam_voc_sim_class_results.xlsx', index=False)
display(df_classes_display.T)

# COCO-cap

In [None]:
from tqdm.notebook import tqdm
dataset_path = Path('coco_cap')
annotations_folder = dataset_path / 'annotations'
df_coco_captions = pd.read_csv('../coco_captions_sampled.csv')

apply_crf = True
beta = 0.2
beta_optimized = -0.2
# Iterate throuth annotations
results = []
for annotation_path in tqdm(list(annotations_folder.iterdir())):
    example_result_dict = {}
    model, classname, caption, seed, _ = annotation_path.stem.split('_')
    model = model.replace('-', '')
    image_path = dataset_path / f"images/{model}_{classname}_{caption}_{seed}_image.png"
    heatmap_path = dataset_path / f"heatmaps/{model}_{classname}_{caption}_{seed}_heatmap.npy"
    heatmap_optimized_path = dataset_path / f"heatmaps_optimized/{model}_{classname}_{caption}_{seed}_heatmapopt.npy"

    assert annotation_path.exists(), f"Annotation {annotation_path} does not exist"
    assert image_path.exists(), f"Image {image_path} does not exist"
    assert heatmap_path.exists(), f"Mask {heatmap_path} does not exist"
    assert heatmap_optimized_path.exists(), f"Mask {heatmap_optimized_path} does not exist"
    
    # Add paths to result dict
    example_result_dict['classname'] = classname
    example_result_dict['model'] = model
    example_result_dict['seed'] = seed
    
    example_result_dict['image_path'] = image_path.name
    example_result_dict['annotation_path'] = annotation_path.name
    example_result_dict['heatmap_path'] = heatmap_path.name
    example_result_dict['heatmap_optimized_path'] = heatmap_optimized_path.name

    # Get info of coco caption used using caption_id
    caption_id = int(caption.replace('caption', ''))
    row = df_coco_captions.query("caption_id==@caption_id")
    assert len(row) == 1, f"Caption {caption_id} not found in df_coco_captions"
    row = row.iloc[0]
    prompt = row['caption']
    word_included = row['word_included']
    coco_categories = row['categories']

    # Add info to results
    example_result_dict['coco_caption_id'] = caption_id
    example_result_dict['prompt'] = prompt
    example_result_dict['word_included'] = word_included
    example_result_dict['coco_categories'] = coco_categories

    # Load annotation. Convert in binary mask
    annotation = np.array(Image.open(annotation_path))
    
    assert annotation.shape == (512, 512, 3), f"Annotation {annotation_path} has wrong shape {annotation.shape}"
    annotation = annotation.sum(axis=-1) != 0
    assert annotation.shape == (512, 512), f"Annotation aggregated {annotation_path} has wrong shape {annotation.shape}"
    image = np.array(Image.open(image_path))
    # Load heatmap
    heatmap = np.load(heatmap_path)[0]
    heatmap = heatmap[-2] # Channel for the selected class
    heatmap = (heatmap - heatmap.min()) / (heatmap.max() - heatmap.min())
    #heatmap = heatmap / heatmap.max()
    a0 = 1 - heatmap - beta # Background channel
    attentions = np.stack([a0, heatmap], axis=0)
    attentions = interpolate(attentions, size=(512, 512), mode="bilinear")
    mask = attentions.argmax(axis=0)
    if apply_crf:
        mask = np.stack([1 - mask, mask], axis=-1).astype(np.float32)
        mask = densecrf(image, mask)
        assert mask.shape == (512, 512), f"Mask {heatmap_path} has wrong shape {mask.shape}"

    # # Plot image, heatmap, a0, mask
    # fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(15, 5))
    # ax1.imshow(Image.open(image_path))
    # ax1.set_title("Image")
    # ax2.imshow(attentions[1])
    # ax2.set_title("Heatmap")
    # ax3.imshow(attentions[0])
    # ax3.set_title("a0")
    # ax4.imshow(mask)
    # ax4.set_title("Mask")

    assert mask.shape == (512, 512), f"Mask {heatmap_path} has wrong shape {mask.shape}"

    i_normal, u_normal, iou_normal = compute_iou(annotation=annotation, mask=mask)
    example_result_dict['iou_normal'] = iou_normal
    example_result_dict['i_normal'] = i_normal
    example_result_dict['u_normal'] = u_normal
    

    # Load mask (optimized)
    heatmap_optimized = np.load(heatmap_optimized_path)
    heatmap_optimized = heatmap_optimized[0, 1] # We stored in 0 the background and in 1 the token related to the foreground object
    heatmap_optimized = (heatmap_optimized - heatmap_optimized.min()) / (heatmap_optimized.max() - heatmap_optimized.min())
    #heatmap_optimized = heatmap_optimized / heatmap_optimized.max()
    #print("EL heatmap range", heatmap_optimized.max(), heatmap_optimized.min())
    
    a0_optimized = 1 - heatmap_optimized - beta_optimized
    #print("EL a0 range", a0_optimized.max(), a0_optimized.min())
    attentions_optimized = np.stack([a0_optimized, heatmap_optimized], axis=0)
    attentions_optimized = interpolate(attentions_optimized, size=(512, 512), mode="bilinear")
    mask_optimized = attentions_optimized.argmax(axis=0)
    if apply_crf:
        mask_optimized = np.stack([1 - mask_optimized, mask_optimized], axis=-1).astype(np.float32)
        mask_optimized = densecrf(image, mask_optimized)

    assert mask_optimized.shape == (512, 512), f"Mask {heatmap_optimized_path} has wrong shape {mask_optimized.shape}"

    i_optimized, u_optimized, iou_optimized = compute_iou(annotation=annotation, mask=mask_optimized)

    example_result_dict['iou_optimized'] = iou_optimized
    example_result_dict['i_optimized'] = i_optimized
    example_result_dict['u_optimized'] = u_optimized

    
    # # Plot image, heatmap, a0, mask
    # fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(15, 5))
    # ax1.imshow(Image.open(image_path))
    # ax1.set_title("Image")
    # ax2.imshow(attentions_optimized[1])
    # ax2.set_title("Heatmap")
    # ax3.imshow(attentions_optimized[0])
    # ax3.set_title("a0")
    # ax4.imshow(mask_optimized)
    # ax4.set_title("Mask")

    results.append(example_result_dict)    

df_results = pd.DataFrame(results)
df_results['experiment'] = "coco-cap - daam"
df_results.to_csv(dataset_path / 'attn2mask_coco_captions_results.csv', index=False)

# All results (included and not included)
assert dataset_path.name == 'coco_cap', f"Dataset path {dataset_path} is not coco_captions"

# Aggregated results by class
df_classes = df_results.groupby(['classname', 'model']).aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum'}).reset_index()
df_classes['iou_normal'] = df_classes['i_normal'] / df_classes['u_normal']
df_classes['iou_optimized'] = df_classes['i_optimized'] / df_classes['u_optimized']
df_classes['experiment'] = "voc-sim - attn2mask"
df_classes = df_classes.sort_values('classname').reset_index(drop=True)
df_classes.to_csv(dataset_path / 'daam-cap_class_results_all.csv', index=False)

df_classes_display = df_classes[["classname", 'iou_normal', 'iou_optimized']].copy()
df_classes_display['iou_normal'] = (100*df_classes_display['iou_normal']).round(1)
df_classes_display['iou_optimized'] = (100*df_classes_display['iou_optimized']).round(1)
df_classes_display.T.to_excel(dataset_path / 'daam-cap_class_results_all.xlsx', index=False)

display(df_classes_display.T)

# Aggregate overall results
df_overall = df_classes.groupby('model').aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum',
                                    'iou_normal': 'mean', 'iou_optimized': 'mean'}).reset_index()

df_overall.rename(columns={'iou_normal': 'miou_normal', 'iou_optimized': 'miou_optimized'}, inplace=True)
df_overall['iou_overall_normal'] = df_overall['i_normal'] / df_overall['u_normal']
df_overall['iou_overall_optimized'] = df_overall['i_optimized'] / df_overall['u_optimized']
df_overall['experiment'] = "coco-cap - grounded diffusion"
df_overall.to_csv(dataset_path / 'daam-cap_overall_results_all.csv', index=False)
df_overall_display = df_overall[['iou_overall_normal', 'miou_normal', 'iou_overall_optimized', 'miou_optimized']].copy()
df_overall_display = (100*df_overall_display).round(1)
display(df_overall_display[["miou_normal","iou_overall_normal", "miou_optimized",  "iou_overall_optimized"]])



In [None]:

assert dataset_path.name == 'coco_cap', f"Dataset path {dataset_path} is not coco_captions"

for included in [True, False]:
    included_str = 'included' if included else 'non_included'
    print(included_str)
    # Aggregated results by class
    df_classes = df_results.query("word_included==@included").groupby(['classname', 'model']).aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum'}).reset_index()
    df_classes['iou_normal'] = df_classes['i_normal'] / df_classes['u_normal']
    df_classes['iou_optimized'] = df_classes['i_optimized'] / df_classes['u_optimized']
    df_classes['experiment'] = "voc-sim - grounded diffusion"
    df_classes = df_classes.sort_values('classname').reset_index(drop=True)
    #df_classes.to_csv(dataset_path / 'grounded_diffusion_coco-cap_class_results_all.csv', index=False)

    df_classes_display = df_classes[["classname", 'iou_normal', 'iou_optimized']].copy()
    df_classes_display['iou_normal'] = (100*df_classes_display['iou_normal']).round(1)
    df_classes_display['iou_optimized'] = (100*df_classes_display['iou_optimized']).round(1)
    df_classes_display.T.to_excel(dataset_path / f'daam_coco-cap_class_results_{included_str}.xlsx', index=False)

    display(df_classes_display.T)

    # Aggregate overall results
    df_overall = df_classes.groupby('model').aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum',
                                        'iou_normal': 'mean', 'iou_optimized': 'mean'}).reset_index()

    df_overall.rename(columns={'iou_normal': 'miou_normal', 'iou_optimized': 'miou_optimized'}, inplace=True)
    df_overall['iou_overall_normal'] = df_overall['i_normal'] / df_overall['u_normal']
    df_overall['iou_overall_optimized'] = df_overall['i_optimized'] / df_overall['u_optimized']
    df_overall['experiment'] = "coco-cap - grounded diffusion"
    df_overall.to_csv(dataset_path / f'daam_coco-cap_overall_results_{included_str}.csv', index=False)

    df_overall_display = df_overall[['miou_normal', 'iou_overall_normal',  'miou_optimized', 'iou_overall_optimized']].copy()
    df_overall_display = (100*df_overall_display).round(1)
    display(df_overall_display)
