In [1]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path

In [26]:
def compute_iou(annotation, mask):
    """Annotation: ground truth (512, 512), Mask: prediction (512, 512)"""

    # Compute intersection
    intersection = np.sum(np.logical_and(annotation, mask))

    # Compute union
    union = np.sum(np.logical_or(annotation, mask))

    # Compute intersection over union
    iou_score = intersection / union

    return intersection, union, iou_score

## Grounded Diffusion

## VOC-sim

In [68]:
dataset_path = Path('voc_sim')
annotations_folder = dataset_path / 'annotations'

# Iterate throuth annotations
results = []
for annotation_path in annotations_folder.iterdir():
    example_result_dict = {}
    classname, model, seed, _ = annotation_path.stem.split('_')
    image_path = dataset_path / f"images/{classname}_{model}_{seed}.png"
    mask_path = dataset_path / f"masks/{classname.replace(' ', '-')}_{model}_{seed}_gd_mask.png"
    mask_optimized_path = dataset_path / f"masks_optimized/{classname.replace(' ', '-')}_{model}_{seed}_gdopt_mask.png"
    # Check all exists
    assert annotation_path.exists(), f"Annotation {annotation_path} does not exist"
    assert image_path.exists(), f"Image {image_path} does not exist"
    assert mask_path.exists(), f"Mask {mask_path} does not exist"
    assert mask_optimized_path.exists(), f"Mask {mask_optimized} does not exist"

    # Add paths to result dict
    example_result_dict['classname'] = classname
    example_result_dict['model'] = model
    example_result_dict['seed'] = seed
    example_result_dict['image_path'] = image_path.name
    example_result_dict['annotation_path'] = annotation_path.name
    example_result_dict['mask_path'] = mask_path.name

    # Load annotation. Convert in binary mask
    annotation = np.array(Image.open(annotation_path))
    assert annotation.shape == (512, 512, 3), f"Annotation {annotation_path} has wrong shape {annotation.shape}"
    annotation = annotation.sum(axis=-1) != 0
    assert annotation.shape == (512, 512), f"Annotation aggregated {annotation_path} has wrong shape {annotation.shape}"
    
    # Load mask (normal)
    mask = np.array(Image.open(mask_path))
    assert mask.shape == (512, 512, 3), f"Mask {mask_path} has wrong shape {mask.shape}"
    mask = mask.sum(axis=-1) != 0
    assert mask.shape == (512, 512), f"Mask aggregated {mask_path} has wrong shape {mask.shape}"

    i_normal, u_normal, iou_normal = compute_iou(annotation=annotation, mask=mask)
    example_result_dict['iou_normal'] = iou_normal
    example_result_dict['i_normal'] = i_normal
    example_result_dict['u_normal'] = u_normal

    # Load mask (optimized)
    mask_optimized = np.array(Image.open(mask_optimized_path))
    assert mask_optimized.shape == (512, 512, 3), f"Mask optimized {mask_optimized_path} has wrong shape {mask_optimized.shape}"
    mask_optimized = mask_optimized.sum(axis=-1) != 0
    assert mask_optimized.shape == (512, 512), f"Mask optimized aggregated {mask_optimized_path} has wrong shape {mask_optimized.shape}"

    i_optimized, u_optimized, iou_optimized = compute_iou(annotation=annotation, mask=mask_optimized)
    example_result_dict['iou_optimized'] = iou_optimized
    example_result_dict['i_optimized'] = i_optimized
    example_result_dict['u_optimized'] = u_optimized

    results.append(example_result_dict)
    
# Aggregated by example
df_results = pd.DataFrame(results)
df_results['experiment'] = "voc-sim - grounded diffusion"
df_results.to_csv(dataset_path / 'grounded_diffusion_voc_sim_results.csv', index=False)

# Aggregated results by class
df_classes = df_results.groupby(['classname', 'model']).aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum'}).reset_index()
df_classes['iou_normal'] = df_classes['i_normal'] / df_classes['u_normal']
df_classes['iou_optimized'] = df_classes['i_optimized'] / df_classes['u_optimized']
df_classes['experiment'] = "voc-sim - grounded diffusion"
df_classes = df_classes.sort_values('classname').reset_index(drop=True)
df_classes.to_csv(dataset_path / 'grounded_diffusion_voc_sim_class_results.csv', index=False)

df_overall = df_classes.groupby('model').aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum',
                                       'iou_normal': 'mean', 'iou_optimized': 'mean'}).reset_index()

df_overall.rename(columns={'iou_normal': 'miou_normal', 'iou_optimized': 'miou_optimized'}, inplace=True)
df_overall['iou_overall_normal'] = df_overall['i_normal'] / df_overall['u_normal']
df_overall['iou_overall_optimized'] = df_overall['i_optimized'] / df_overall['u_optimized']
df_overall['experiment'] = "voc-sim - grounded diffusion"
df_overall.to_csv(dataset_path / 'grounded_diffusion_voc_sim_overall_results.csv', index=False)
df_overall

Unnamed: 0,model,i_normal,u_normal,i_optimized,u_optimized,miou_normal,miou_optimized,iou_overall_normal,iou_overall_optimized,experiment
0,sdgd,29638809,45103639,40240766,46476065,0.621276,0.865672,0.657127,0.865838,voc-sim - grounded diffusion


In [69]:
assert dataset_path.name == 'voc_sim', f"Dataset path {dataset_path} is not voc_sim"
df_classes_display = df_classes[['classname', 'iou_normal', 'iou_optimized']].copy()
df_classes_display['iou_normal'] = (100*df_classes_display['iou_normal']).round(1)
df_classes_display['iou_optimized'] = (100*df_classes_display['iou_optimized']).round(1)
df_classes_display.T.to_excel(dataset_path / 'grounded_diffusion_voc_sim_class_results.xlsx', index=False)
df_classes_display.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
classname,aeroplane,bicycle,bird,boat,bottle,bus,car,cat,chair,cow,dining table,dog,horse,motorbike,person,potted plant,sheep,sofa,train,tv
iou_normal,89.9,62.9,93.4,2.5,59.0,55.8,59.5,88.0,61.7,85.9,0.2,95.0,91.1,74.7,73.3,72.5,87.4,50.9,19.6,19.1
iou_optimized,90.9,81.3,93.5,90.3,92.1,97.2,97.0,88.8,63.7,92.7,81.6,95.1,91.9,92.3,91.1,84.7,88.1,95.0,81.2,42.9


# COCO-cap

In [108]:
dataset_path = Path('coco_captions')
annotations_folder = dataset_path / 'annotations'
df_coco_captions = pd.read_csv('../coco_captions_sampled.csv')

# Iterate throuth annotations
results = []
for annotation_path in annotations_folder.iterdir():
    example_result_dict = {}
    classname, model, caption, seed, _ = annotation_path.stem.split('_')
    image_path = dataset_path / f"images/{classname}_{model}_{caption}_{seed}.png"
    mask_path = dataset_path / f"masks/{classname}_{model}_{caption}_{seed}_gd_mask.png"
    mask_optimized_path = dataset_path / f"masks_optimized/{classname}_{model}_{caption}_{seed}_gdopt_mask.png"
    assert annotation_path.exists(), f"Annotation {annotation_path} does not exist"
    assert image_path.exists(), f"Image {image_path} does not exist"
    assert mask_path.exists(), f"Mask {mask_path} does not exist"
    assert mask_optimized_path.exists(), f"Mask {mask_optimized_path} does not exist"

    # Add paths to result dict
    example_result_dict['classname'] = classname
    example_result_dict['model'] = model
    example_result_dict['seed'] = seed
    
    example_result_dict['image_path'] = image_path.name
    example_result_dict['annotation_path'] = annotation_path.name
    example_result_dict['mask_path'] = mask_path.name

    # Get info of coco caption used using caption_id
    caption_id = int(caption.replace('caption', ''))
    row = df_coco_captions.query("caption_id==@caption_id")
    assert len(row) == 1, f"Caption {caption_id} not found in df_coco_captions"
    row = row.iloc[0]
    prompt = row['caption']
    word_included = row['word_included']
    coco_categories = row['categories']

    # Add info to results
    example_result_dict['coco_caption_id'] = caption_id
    example_result_dict['prompt'] = prompt
    example_result_dict['word_included'] = word_included
    example_result_dict['coco_categories'] = coco_categories

    # Load annotation. Convert in binary mask
    annotation = np.array(Image.open(annotation_path))
    assert annotation.shape == (512, 512, 3), f"Annotation {annotation_path} has wrong shape {annotation.shape}"
    annotation = annotation.sum(axis=-1) != 0
    assert annotation.shape == (512, 512), f"Annotation aggregated {annotation_path} has wrong shape {annotation.shape}"
    
    # Load mask (normal)
    mask = np.array(Image.open(mask_path))
    assert mask.shape == (512, 512, 3), f"Mask {mask_path} has wrong shape {mask.shape}"
    mask = mask.sum(axis=-1) != 0
    assert mask.shape == (512, 512), f"Mask aggregated {mask_path} has wrong shape {mask.shape}"

    i_normal, u_normal, iou_normal = compute_iou(annotation=annotation, mask=mask)
    example_result_dict['iou_normal'] = iou_normal
    example_result_dict['i_normal'] = i_normal
    example_result_dict['u_normal'] = u_normal

    # Load mask (optimized)
    mask_optimized = np.array(Image.open(mask_optimized_path))
    assert mask_optimized.shape == (512, 512, 3), f"Mask optimized {mask_optimized_path} has wrong shape {mask_optimized.shape}"
    mask_optimized = mask_optimized.sum(axis=-1) != 0
    assert mask_optimized.shape == (512, 512), f"Mask optimized aggregated {mask_optimized_path} has wrong shape {mask_optimized.shape}"

    i_optimized, u_optimized, iou_optimized = compute_iou(annotation=annotation, mask=mask_optimized)
    example_result_dict['iou_optimized'] = iou_optimized
    example_result_dict['i_optimized'] = i_optimized
    example_result_dict['u_optimized'] = u_optimized

    results.append(example_result_dict)    

df_results = pd.DataFrame(results)
df_results['experiment'] = "coco-cap - grounded diffusion"
df_results.to_csv(dataset_path / 'grounded_diffusion_coco_captions_results.csv', index=False)



In [120]:
# All results (included and not included)

assert dataset_path.name == 'coco_captions', f"Dataset path {dataset_path} is not coco_captions"

# Aggregated results by class
df_classes = df_results.groupby(['classname', 'model']).aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum'}).reset_index()
df_classes['iou_normal'] = df_classes['i_normal'] / df_classes['u_normal']
df_classes['iou_optimized'] = df_classes['i_optimized'] / df_classes['u_optimized']
df_classes['experiment'] = "voc-sim - grounded diffusion"
df_classes = df_classes.sort_values('classname').reset_index(drop=True)
df_classes.to_csv(dataset_path / 'grounded_diffusion_coco-cap_class_results_all.csv', index=False)

df_classes_display = df_classes[["classname", 'iou_normal', 'iou_optimized']].copy()
df_classes_display['iou_normal'] = (100*df_classes_display['iou_normal']).round(1)
df_classes_display['iou_optimized'] = (100*df_classes_display['iou_optimized']).round(1)
df_classes_display.T.to_excel(dataset_path / 'grounded_diffusion_coco-cap_class_results_all.xlsx', index=False)

display(df_classes_display.T)

# Aggregate overall results
df_overall = df_classes.groupby('model').aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum',
                                       'iou_normal': 'mean', 'iou_optimized': 'mean'}).reset_index()

df_overall.rename(columns={'iou_normal': 'miou_normal', 'iou_optimized': 'miou_optimized'}, inplace=True)
df_overall['iou_overall_normal'] = df_overall['i_normal'] / df_overall['u_normal']
df_overall['iou_overall_optimized'] = df_overall['i_optimized'] / df_overall['u_optimized']
df_overall['experiment'] = "coco-cap - grounded diffusion"
df_overall.to_csv(dataset_path / 'grounded_diffusion_coco-cap_overall_results_all.csv', index=False)

df_overall_display = df_overall[['iou_overall_normal', 'miou_normal', 'iou_overall_optimized', 'miou_optimized']].copy()
df_overall_display = (100*df_overall_display).round(1)
df_overall_display



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
classname,aeroplane,bicycle,bird,boat,bottle,bus,car,cat,chair,cow,dining table,dog,horse,motorbike,person,potted plant,sheep,sofa,train,tv
iou_normal,84.6,54.9,81.9,30.8,8.2,81.4,25.1,87.3,5.6,73.7,3.1,84.4,84.1,49.8,51.8,50.8,68.6,13.3,47.2,16.5
iou_optimized,88.3,75.9,82.3,67.1,85.8,95.0,54.3,89.0,60.1,78.1,88.6,85.5,85.6,85.6,79.1,75.2,68.7,18.4,86.6,15.0


Unnamed: 0,iou_overall_normal,miou_normal,iou_overall_optimized,miou_optimized
0,59.4,50.2,74.2,73.2


In [None]:
# All results (word_included)

assert dataset_path.name == 'coco_captions', f"Dataset path {dataset_path} is not coco_captions"

# Aggregated results by class
df_classes = df_results.query("word_included==True").groupby(['classname', 'model']).aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum'}).reset_index()
df_classes['iou_normal'] = df_classes['i_normal'] / df_classes['u_normal']
df_classes['iou_optimized'] = df_classes['i_optimized'] / df_classes['u_optimized']
df_classes['experiment'] = "voc-sim - grounded diffusion"
df_classes = df_classes.sort_values('classname').reset_index(drop=True)
#df_classes.to_csv(dataset_path / 'grounded_diffusion_coco-cap_class_results_all.csv', index=False)

df_classes_display = df_classes[["classname", 'iou_normal', 'iou_optimized']].copy()
df_classes_display['iou_normal'] = (100*df_classes_display['iou_normal']).round(1)
df_classes_display['iou_optimized'] = (100*df_classes_display['iou_optimized']).round(1)
#df_classes_display.T.to_excel(dataset_path / 'grounded_diffusion_coco-cap_class_results_non_included.xlsx', index=False)

display(df_classes_display.T)

# Aggregate overall results
df_overall = df_classes.groupby('model').aggregate({'i_normal': 'sum', 'u_normal': 'sum', 'i_optimized': 'sum', 'u_optimized': 'sum',
                                       'iou_normal': 'mean', 'iou_optimized': 'mean'}).reset_index()

df_overall.rename(columns={'iou_normal': 'miou_normal', 'iou_optimized': 'miou_optimized'}, inplace=True)
df_overall['iou_overall_normal'] = df_overall['i_normal'] / df_overall['u_normal']
df_overall['iou_overall_optimized'] = df_overall['i_optimized'] / df_overall['u_optimized']
df_overall['experiment'] = "coco-cap - grounded diffusion"
#df_overall.to_csv(dataset_path / 'grounded_diffusion_coco-cap_overall_results_all.csv', index=False)

df_overall_display = df_overall[['iou_overall_normal', 'miou_normal', 'iou_overall_optimized', 'miou_optimized']].copy()
df_overall_display = (100*df_overall_display).round(1)
df_overall_display

