In [8]:
def mask_score(cm_score):
    positive_or_zero_mask = cm_score >= 0
    negative_mask = cm_score < 0
    cm_score[positive_or_zero_mask] = 1
    cm_score[negative_mask] = 0
    return cm_score

In [6]:
# Image Method

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, Subset, Dataset
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import spearmanr
from tqdm import tqdm
from utils import CaptionGenerator

from utils.dataset import get_dataset
from utils.model_utils import load_model, get_text_embeddings, get_image_embeddings
import os
import glob
from PIL import Image

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "openai/clip-vit-base-patch32"

datasets_info = [
    {"name": "NWPU-RESISC45", "template": "a satellite photo containing {}."},
    {"name": "Stanford_dogs", "template": "a photo of {}, a type of dog."},
    {"name": "CUB_200_2011", "template": "a photo of {}, a type of bird."},
    {"name": "Flower102", "template": "a photo of {}, a type of flower."},
]

class PseudoImageDataset(Dataset):
    def __init__(self, image_folder, class_names, transform=None):
        self.image_paths = []
        self.labels = []
        self.transform = transform
        self.class_names = class_names

        # Store class_names in a set for faster lookup
        class_name_set = set(class_names)

        for image_path in glob.glob(os.path.join(image_folder, '*.png')):
            filename = os.path.basename(image_path)
            class_name = filename.split('-')[0].replace('_', ' ')
            if class_name in class_name_set:
                self.image_paths.append(image_path)
                self.labels.append(class_name)

        # Convert labels to indices
        self.labels = [class_names.index(label) for label in self.labels]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]
        image = Image.open(image_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

def get_image_loader(dataset, class_indices, batch_size, shuffle=False):
    if hasattr(dataset, 'targets'):
        selected_indices = [i for i, label in enumerate(dataset.targets) if label in class_indices]
    else:
        selected_indices = [i for i, (_, label) in enumerate(dataset.imgs) if label in class_indices]
    selected_dataset = Subset(dataset, selected_indices)

    # Ensure no transform or compatible transform is applied
    selected_dataset.dataset.transform = None

    loader = DataLoader(
        selected_dataset,
        batch_size=batch_size,
        shuffle=shuffle,
        collate_fn=lambda batch: (list(zip(*batch))[0], list(zip(*batch))[1])
    )
    return loader

def pairwise_difference(a, b):
  """
  Computes pairwise difference between rows of two matrices.

  Args:
    a: NumPy array of shape (N_a, D)
    b: NumPy array of shape (N_b, D)

  Returns:
    NumPy array of shape (N_a, N_b, D) containing pairwise differences.
  """
  return a[:, np.newaxis, :] - b[np.newaxis, :, :]

def run_experiment(dataset_name, template, use_pseudo_data):
    # 1. Load dataset (test split)
    dataset = get_dataset(dataset_name, data_root='data', split='test')
    class_names = dataset.classes
    num_classes = len(class_names)

    if use_pseudo_data:
        PSEUDO_IMAGES_FOLDER = f'pseudo_images/{dataset_name}'
        pseudo_dataset = PseudoImageDataset(
            image_folder=PSEUDO_IMAGES_FOLDER,
            class_names=class_names,
            transform=None
        )
        dataloader = DataLoader(
            pseudo_dataset,
            batch_size=64,
            shuffle=False,
            pin_memory=True,
            collate_fn=lambda batch: (list(zip(*batch))[0], list(zip(*batch))[1])
        )
    else:
        selected_class_indices = list(range(num_classes))
        dataloader = get_image_loader(dataset, selected_class_indices, 64, shuffle=False)

    # 2. Load model
    model_info = load_model(model_name, device=device)

    # 3. Create captions and compute text embeddings
    captions = [template.format(c) for c in class_names]
    text_embeddings = get_text_embeddings(captions, model_info, device, batch_size=64)

    # optional: create alternative captions and compute text embeddings
    # Initialize CaptionGenerator and generate alternative captions
    capGenerator = CaptionGenerator(dataset_name=dataset_name, class_names=class_names, num_captions=40)

    alter_caption_list = []
    labels = []
    for i,class_name in tqdm(enumerate(class_names), desc="Generating Alternative Captions"):
        alter_captions = capGenerator.get_alternative_captions(class_name)
        alter_caption_list.extend(alter_captions)
        labels.extend([i] * len(alter_captions))
    labels = np.array(labels)
    # Compute text embeddings for alternative captions
    alter_text_embeddings = get_text_embeddings(alter_caption_list, model_info, device, batch_size=64)
    alter_text_embeddings = alter_text_embeddings / np.linalg.norm(alter_text_embeddings, axis=1, keepdims=True)

    # 4. Compute image embeddings
    image_embeddings, all_targets = get_image_embeddings(dataloader, model_info, device)
    all_targets = np.array(all_targets)

    # Normalize embeddings
    image_embeddings = image_embeddings / np.linalg.norm(image_embeddings, axis=1, keepdims=True)
    text_embeddings = text_embeddings / np.linalg.norm(text_embeddings, axis=1, keepdims=True)

    # img shape: (N_img, D), text shape: (N_text, D)

    # 5. Compute similarities and predictions
    temperature = 0.01
    similarities = np.matmul(image_embeddings, text_embeddings.T) / temperature
    probs = np.exp(similarities) / np.sum(np.exp(similarities), axis=1, keepdims=True)

    preds = np.argmax(probs, axis=1)
    confidences = np.max(probs, axis=1)

    predicted_cm_scores = []
    # do some class-wise operations
    for c in range(num_classes):
        # Select rows of image_embeddings where the target is class c
        c_embeddings = image_embeddings[all_targets == c] # shape (N_c, D)

        other_class_text_embeddings = np.concatenate([text_embeddings[i:i+1] for i in range(num_classes) if i != c], axis=0) # shape (N_t-1, D)
        text_diff = pairwise_difference(text_embeddings[c:c+1], other_class_text_embeddings) # shape (1, N_t-1, D)
        
        c_embeddings = c_embeddings / np.linalg.norm(c_embeddings, axis=-1, keepdims=True)
        text_diff = text_diff / np.linalg.norm(text_diff, axis=-1, keepdims=True)

        cm_scores = np.einsum('cd, td -> ct', c_embeddings, text_diff[0]) # shape (N_c, N_t-1)
        
        cm_score = np.min(cm_scores, axis=-1) # shape (N_c, 1)
        # # cm_score = cm_score / (1 + cm_score) # shape (N_c, 1)
        
        def mask_score(cm_score):
            positive_or_zero_mask = cm_score >= 0
            negative_mask = cm_score < 0
            cm_score[positive_or_zero_mask] = 1
            cm_score[negative_mask] = 0
            return cm_score

        cm_score = mask_score(cm_score)
        cm_score = np.mean(cm_score, axis=0) # shape (1,)


        # use text_cm_scores as weights
        c_alter_text_embeddings = alter_text_embeddings[labels == c]
        cm_text_scores = np.einsum('cd, td -> ct', c_embeddings, c_alter_text_embeddings) # shape (N_c, N_t)
        cm_text_scores = np.min(cm_text_scores, axis=-1) # shape (N_c, 1)
        cm_text_scores = np.mean(cm_text_scores, axis=0) # shape (1,)
        if c < 5:
            print(f"Class {c}, {class_names[c]}: {cm_score}, {cm_text_scores}")
        cm_score = cm_score * cm_text_scores
        if np.isnan(cm_score):
            print(f"Class {c}, {class_names[c]} has NaN cm_score")
            cm_score = 0.0
        predicted_cm_scores.append(cm_score)

    # Compute class-wise actual and predicted accuracies
    actual_accuracies = []
    # predicted_accuracies = []
    for c in range(num_classes):
        cur_class_indices = (all_targets == c)
        cur_class_pred = preds[cur_class_indices]
        cur_class_accuracy = (cur_class_pred == c).mean()

        if np.isnan(cur_class_accuracy):
            print(f"Class {c}, {class_names[c]} has NaN accuracy")
            cur_class_accuracy = 0.0
        # predicted_cur_class_indices = (preds == c)
        # predicted_confidences = confidences[predicted_cur_class_indices]
        # predicted_acc = predicted_confidences.mean() if predicted_confidences.size > 0 else np.nan

        actual_accuracies.append(cur_class_accuracy)
        # predicted_accuracies.append(predicted_acc if not np.isnan(predicted_acc) else 0.0)
            
    # Compute Spearman correlation
    spearman_corr = spearmanr(np.array(actual_accuracies), predicted_cm_scores )# np.array(predicted_accuracies))

    # Return data for plotting outside
    return np.array(actual_accuracies), np.array(predicted_cm_scores), spearman_corr

# Now we call run_experiment for pseudo and real data and plot them together
for dinfo in datasets_info:
    actual_pseudo, predicted_pseudo, spearman_pseudo = run_experiment(dinfo["name"], dinfo["template"], use_pseudo_data=True)
    actual_real, predicted_real, spearman_real = run_experiment(dinfo["name"], dinfo["template"], use_pseudo_data=False)

    # Recompute pseudo Spearman correlation using actual_real for the y-values
    pseudo_spearman_corr = spearmanr(actual_real, predicted_pseudo)

    # Create a single plot for both pseudo and real
    fig, ax = plt.subplots(figsize=(8,8))
    # For pseudo, use actual_real as y-axis
    ax.scatter(predicted_pseudo, actual_real, color='tab:orange', label='Pseudo Images', alpha=0.5)
    ax.scatter(predicted_real, actual_real, color='tab:blue', label='Real Images', alpha=0.5)

    ax.set_xlabel("Predicted Accuracy (Confidence)")
    ax.set_ylabel("Actual Accuracy (Real Data)")
    pseudo_corr_str = f"{pseudo_spearman_corr.correlation:.2f}" if pseudo_spearman_corr.correlation is not None else "N/A"
    real_corr_str = f"{spearman_real.correlation:.2f}" if spearman_real.correlation is not None else "N/A"
    ax.set_title(f"Calibration Approach: {dinfo['name']}\nSpearman (Pseudo vs Real Accuracy): {pseudo_corr_str}, Spearman (Real vs Real Accuracy): {real_corr_str}")
    print(f"Calibration Approach: {dinfo['name']}\nSpearman (Pseudo vs Real Accuracy): {pseudo_corr_str}, Spearman (Real vs Real Accuracy): {real_corr_str}")
    ax.grid(True)
    # ax.set_xlim([0, 1])
    ax.set_ylim([0, 1])
    ax.legend()

    # Save the figure
    plot_filename = f"figures/{dinfo['name']}_CM_softmax.png"
    plt.savefig(plot_filename)
    plt.close(fig)

Processed batch 1/1
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45_global_traits.json
Using OpenAI model: gpt-4o-mini-2024-07-18
Configured to generate 40 captions.
Meta Prompt: You are an AI assistant that generates creative and diverse image captions suitable for use with image generation models like DALL-E. Given a subject, provide 40 distinct, diverse and descriptive captions, considering the following global taxonomical traits when generating captions: ['human-made structure', 'natural landscape', 'water body', 'recreational area', 'transportation hub', 'residential zone', 'industrial zone', 'agricultural area', 'sports facility', 'ecological setting', 'urban environment', 'natural feature', 'seasonal variations', 'geographical formations', 'climatic zone'].


Generating Alternative Captions: 45it [00:00, 1576.02it/s]

Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\airplane.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\airport.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\baseball_diamond.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\basketball_court.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\beach.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\bridge.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\chaparral.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\church.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\circular_farmland.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\cloud.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\commercial_area.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\N




Processed batch 1/28
Processed batch 2/28
Processed batch 3/28
Processed batch 4/28
Processed batch 5/28
Processed batch 6/28
Processed batch 7/28
Processed batch 8/28
Processed batch 9/28
Processed batch 10/28
Processed batch 11/28
Processed batch 12/28
Processed batch 13/28
Processed batch 14/28
Processed batch 15/28
Processed batch 16/28
Processed batch 17/28
Processed batch 18/28
Processed batch 19/28
Processed batch 20/28
Processed batch 21/28
Processed batch 22/28
Processed batch 23/28
Processed batch 24/28
Processed batch 25/28
Processed batch 26/28
Processed batch 27/28
Processed batch 28/28


Processing Images: 100%|██████████| 25/25 [00:21<00:00,  1.19it/s]


Class 0, airplane: 0.34375, 0.14402268826961517
Class 1, airport: 0.2857142984867096, 0.12461353093385696
Class 2, baseball diamond: 0.8125, 0.20280376076698303
Class 3, basketball court: 0.7058823704719543, 0.1855473816394806
Class 4, beach: 0.9428571462631226, 0.14640764892101288
Processed batch 1/1
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45_global_traits.json
Using OpenAI model: gpt-4o-mini-2024-07-18
Configured to generate 40 captions.
Meta Prompt: You are an AI assistant that generates creative and diverse image captions suitable for use with image generation models like DALL-E. Given a subject, provide 40 distinct, diverse and descriptive captions, considering the following global taxonomical traits when generating captions: ['human-made structure', 'natural landscape', 'water body', 'recreational area', 'transportation hub', 'residential zone', 'industrial zone', 'agricultural area', 'sports facility', 'ecological setting', 'urban environment', 'natural f

Generating Alternative Captions: 45it [00:00, 3214.90it/s]

Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\airplane.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\airport.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\baseball_diamond.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\basketball_court.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\beach.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\bridge.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\chaparral.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\church.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\circular_farmland.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\cloud.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\NWPU-RESISC45\40\commercial_area.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\N




Processed batch 10/28
Processed batch 11/28
Processed batch 12/28
Processed batch 13/28
Processed batch 14/28
Processed batch 15/28
Processed batch 16/28
Processed batch 17/28
Processed batch 18/28
Processed batch 19/28
Processed batch 20/28
Processed batch 21/28
Processed batch 22/28
Processed batch 23/28
Processed batch 24/28
Processed batch 25/28
Processed batch 26/28
Processed batch 27/28
Processed batch 28/28


Processing Images: 100%|██████████| 493/493 [04:39<00:00,  1.77it/s]


Class 0, airplane: 0.5099999904632568, 0.13204503059387207
Class 1, airport: 0.7128571271896362, 0.14218279719352722
Class 2, baseball diamond: 0.7885714173316956, 0.1921466886997223
Class 3, basketball court: 0.6785714030265808, 0.15609660744667053
Class 4, beach: 0.8642857074737549, 0.1109732836484909
Calibration Approach: NWPU-RESISC45
Spearman (Pseudo vs Real Accuracy): 0.60, Spearman (Real vs Real Accuracy): 0.96
Processed batch 1/2
Processed batch 2/2
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs_global_traits.json
Using OpenAI model: gpt-4o-mini-2024-07-18
Configured to generate 40 captions.
Meta Prompt: You are an AI assistant that generates creative and diverse image captions suitable for use with image generation models like DALL-E. Given a subject, provide 40 distinct, diverse and descriptive captions, considering the following global taxonomical traits when generating captions: ['Size variations (toy, miniature, standard, giant)', 'Coat types (short, lo

Generating Alternative Captions: 120it [00:00, 1646.61it/s]

Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Chihuahua.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Japanese_spaniel.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Maltese_dog.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Pekinese.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Shih_Tzu.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Blenheim_spaniel.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\papillon.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\toy_terrier.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Rhodesian_ridgeback.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Afghan_hound.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\basset.json
Loaded data from cache: cache\gpt-4o-min




Processed batch 1/69
Processed batch 2/69
Processed batch 3/69
Processed batch 4/69
Processed batch 5/69
Processed batch 6/69
Processed batch 7/69
Processed batch 8/69
Processed batch 9/69
Processed batch 10/69
Processed batch 11/69
Processed batch 12/69
Processed batch 13/69
Processed batch 14/69
Processed batch 15/69
Processed batch 16/69
Processed batch 17/69
Processed batch 18/69
Processed batch 19/69
Processed batch 20/69
Processed batch 21/69
Processed batch 22/69
Processed batch 23/69
Processed batch 24/69
Processed batch 25/69
Processed batch 26/69
Processed batch 27/69
Processed batch 28/69
Processed batch 29/69
Processed batch 30/69
Processed batch 31/69
Processed batch 32/69
Processed batch 33/69
Processed batch 34/69
Processed batch 35/69
Processed batch 36/69
Processed batch 37/69
Processed batch 38/69
Processed batch 39/69
Processed batch 40/69
Processed batch 41/69
Processed batch 42/69
Processed batch 43/69
Processed batch 44/69
Processed batch 45/69
Processed batch 46/

Processing Images: 100%|██████████| 59/59 [00:55<00:00,  1.06it/s]


Class 0, Chihuahua: 0.6774193644523621, 0.20342130959033966
Class 1, Japanese spaniel: 0.6774193644523621, 0.22346416115760803
Class 2, Maltese dog: 0.6774193644523621, 0.2321224957704544
Class 3, Pekinese: 1.0, 0.26246076822280884
Class 4, Shih Tzu: 0.9677419066429138, 0.239768847823143
Processed batch 1/2
Processed batch 2/2
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs_global_traits.json
Using OpenAI model: gpt-4o-mini-2024-07-18
Configured to generate 40 captions.
Meta Prompt: You are an AI assistant that generates creative and diverse image captions suitable for use with image generation models like DALL-E. Given a subject, provide 40 distinct, diverse and descriptive captions, considering the following global taxonomical traits when generating captions: ['Size variations (toy, miniature, standard, giant)', 'Coat types (short, long, curly, wiry)', 'Head shape (flat, rounded, elongated)', 'Ear shape (standing, floppy, semi-erect)', 'Tail types (curled, straight

Generating Alternative Captions: 120it [00:00, 2424.23it/s]

Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Chihuahua.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Japanese_spaniel.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Maltese_dog.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Pekinese.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Shih_Tzu.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Blenheim_spaniel.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\papillon.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\toy_terrier.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Rhodesian_ridgeback.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\Afghan_hound.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Stanford_dogs\40\basset.json
Loaded data from cache: cache\gpt-4o-min




Processed batch 4/69
Processed batch 5/69
Processed batch 6/69
Processed batch 7/69
Processed batch 8/69
Processed batch 9/69
Processed batch 10/69
Processed batch 11/69
Processed batch 12/69
Processed batch 13/69
Processed batch 14/69
Processed batch 15/69
Processed batch 16/69
Processed batch 17/69
Processed batch 18/69
Processed batch 19/69
Processed batch 20/69
Processed batch 21/69
Processed batch 22/69
Processed batch 23/69
Processed batch 24/69
Processed batch 25/69
Processed batch 26/69
Processed batch 27/69
Processed batch 28/69
Processed batch 29/69
Processed batch 30/69
Processed batch 31/69
Processed batch 32/69
Processed batch 33/69
Processed batch 34/69
Processed batch 35/69
Processed batch 36/69
Processed batch 37/69
Processed batch 38/69
Processed batch 39/69
Processed batch 40/69
Processed batch 41/69
Processed batch 42/69
Processed batch 43/69
Processed batch 44/69
Processed batch 45/69
Processed batch 46/69
Processed batch 47/69
Processed batch 48/69
Processed batch 

Processing Images: 100%|██████████| 322/322 [03:18<00:00,  1.62it/s]


Class 0, Chihuahua: 0.5263158082962036, 0.1952715367078781
Class 1, Japanese spaniel: 0.2864864766597748, 0.210490420460701
Class 2, Maltese dog: 0.64682537317276, 0.22480270266532898
Class 3, Pekinese: 0.6442952752113342, 0.23637105524539948
Class 4, Shih Tzu: 0.7383177280426025, 0.21679602563381195
Calibration Approach: Stanford_dogs
Spearman (Pseudo vs Real Accuracy): 0.55, Spearman (Real vs Real Accuracy): 0.96
Processed batch 1/4
Processed batch 2/4
Processed batch 3/4
Processed batch 4/4
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011_global_traits.json
Using OpenAI model: gpt-4o-mini-2024-07-18
Configured to generate 40 captions.
Meta Prompt: You are an AI assistant that generates creative and diverse image captions suitable for use with image generation models like DALL-E. Given a subject, provide 40 distinct, diverse and descriptive captions, considering the following global taxonomical traits when generating captions: ['Feathered body', 'Beak shape and size'

Generating Alternative Captions: 200it [00:00, 2020.23it/s]

Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Black_footed_Albatross.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Laysan_Albatross.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Sooty_Albatross.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Groove_billed_Ani.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Crested_Auklet.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Least_Auklet.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Parakeet_Auklet.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Rhinoceros_Auklet.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Brewer_Blackbird.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Red_winged_Blackbird.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Rusty_Blackbird.j




Processed batch 3/117
Processed batch 4/117
Processed batch 5/117
Processed batch 6/117
Processed batch 7/117
Processed batch 8/117
Processed batch 9/117
Processed batch 10/117
Processed batch 11/117
Processed batch 12/117
Processed batch 13/117
Processed batch 14/117
Processed batch 15/117
Processed batch 16/117
Processed batch 17/117
Processed batch 18/117
Processed batch 19/117
Processed batch 20/117
Processed batch 21/117
Processed batch 22/117
Processed batch 23/117
Processed batch 24/117
Processed batch 25/117
Processed batch 26/117
Processed batch 27/117
Processed batch 28/117
Processed batch 29/117
Processed batch 30/117
Processed batch 31/117
Processed batch 32/117
Processed batch 33/117
Processed batch 34/117
Processed batch 35/117
Processed batch 36/117
Processed batch 37/117
Processed batch 38/117
Processed batch 39/117
Processed batch 40/117
Processed batch 41/117
Processed batch 42/117
Processed batch 43/117
Processed batch 44/117
Processed batch 45/117
Processed batch 46

Processing Images: 100%|██████████| 98/98 [04:20<00:00,  2.66s/it]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Class 0, Black footed Albatross: 0.6666666865348816, 0.217391237616539
Class 1, Laysan Albatross: 0.3870967626571655, 0.1815800964832306
Class 2, Sooty Albatross: 0.375, 0.24657835066318512
Class 3, Groove billed Ani: 0.29032257199287415, 0.24348944425582886
Class 4, Crested Auklet: 0.15625, 0.2060243785381317
Class 93, White breasted Nuthatch has NaN cm_score


  cur_class_accuracy = (cur_class_pred == c).mean()
  ret = ret.dtype.type(ret / rcount)


Class 93, White breasted Nuthatch has NaN accuracy
Processed batch 1/4
Processed batch 2/4
Processed batch 3/4
Processed batch 4/4
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011_global_traits.json
Using OpenAI model: gpt-4o-mini-2024-07-18
Configured to generate 40 captions.
Meta Prompt: You are an AI assistant that generates creative and diverse image captions suitable for use with image generation models like DALL-E. Given a subject, provide 40 distinct, diverse and descriptive captions, considering the following global taxonomical traits when generating captions: ['Feathered body', 'Beak shape and size', 'Wing structure and length', 'Color patterns and plumage', 'Size and weight', 'Tail shape and length', 'Behavior (e.g. migratory, territorial)', 'Habitat preference (e.g. aquatic, forest, grassland)', 'Vocalizations and songs', 'Nesting habits (e.g. ground, trees, cliffs)'].


Generating Alternative Captions: 200it [00:00, 2941.17it/s]


Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Black_footed_Albatross.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Laysan_Albatross.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Sooty_Albatross.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Groove_billed_Ani.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Crested_Auklet.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Least_Auklet.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Parakeet_Auklet.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Rhinoceros_Auklet.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Brewer_Blackbird.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Red_winged_Blackbird.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\CUB_200_2011\40\Rusty_Blackbird.j

Processing Images: 100%|██████████| 185/185 [02:13<00:00,  1.39it/s]


Class 0, Black footed Albatross: 0.699999988079071, 0.21971110999584198
Class 1, Laysan Albatross: 0.6333333253860474, 0.18751534819602966
Class 2, Sooty Albatross: 0.5517241358757019, 0.2540305554866791
Class 3, Groove billed Ani: 0.6166666746139526, 0.24049733579158783
Class 4, Crested Auklet: 0.4545454680919647, 0.24015261232852936
Calibration Approach: CUB_200_2011
Spearman (Pseudo vs Real Accuracy): 0.74, Spearman (Real vs Real Accuracy): 0.98
Processed batch 1/2
Processed batch 2/2
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102_global_traits.json
Using OpenAI model: gpt-4o-mini-2024-07-18
Configured to generate 40 captions.
Meta Prompt: You are an AI assistant that generates creative and diverse image captions suitable for use with image generation models like DALL-E. Given a subject, provide 40 distinct, diverse and descriptive captions, considering the following global taxonomical traits when generating captions: ['Petal arrangement', 'Leaf shape', 'Flower color

Generating Alternative Captions: 102it [00:00, 1608.31it/s]

Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\alpine_sea_holly.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\anthurium.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\artichoke.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\azalea.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\ball_moss.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\balloon_flower.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\barbeton_daisy.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\bearded_iris.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\bee_balm.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\bird_of_paradise.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\bishop_of_llandaff.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\black-eyed_




Processed batch 4/60
Processed batch 5/60
Processed batch 6/60
Processed batch 7/60
Processed batch 8/60
Processed batch 9/60
Processed batch 10/60
Processed batch 11/60
Processed batch 12/60
Processed batch 13/60
Processed batch 14/60
Processed batch 15/60
Processed batch 16/60
Processed batch 17/60
Processed batch 18/60
Processed batch 19/60
Processed batch 20/60
Processed batch 21/60
Processed batch 22/60
Processed batch 23/60
Processed batch 24/60
Processed batch 25/60
Processed batch 26/60
Processed batch 27/60
Processed batch 28/60
Processed batch 29/60
Processed batch 30/60
Processed batch 31/60
Processed batch 32/60
Processed batch 33/60
Processed batch 34/60
Processed batch 35/60
Processed batch 36/60
Processed batch 37/60
Processed batch 38/60
Processed batch 39/60
Processed batch 40/60
Processed batch 41/60
Processed batch 42/60
Processed batch 43/60
Processed batch 44/60
Processed batch 45/60
Processed batch 46/60
Processed batch 47/60
Processed batch 48/60
Processed batch 

Processing Images: 100%|██████████| 51/51 [00:48<00:00,  1.05it/s]


Class 0, alpine sea holly: 0.6363636255264282, 0.2701209783554077
Class 1, anthurium: 0.970588207244873, 0.23110075294971466
Class 2, artichoke: 0.9677419066429138, 0.2559536099433899
Class 3, azalea: 0.5757575631141663, 0.23367458581924438
Class 4, ball moss: 0.9696969985961914, 0.22230002284049988
Class 11, black-eyed susan has NaN cm_score
Class 26, colt's foot has NaN cm_score
Class 32, desert-rose has NaN cm_score
Class 44, globe-flower has NaN cm_score
Class 47, hard-leaved pocket orchid has NaN cm_score
Class 72, pink-yellow dahlia has NaN cm_score
Class 79, ruby-lipped cattleya has NaN cm_score
Class 11, black-eyed susan has NaN accuracy
Class 26, colt's foot has NaN accuracy
Class 32, desert-rose has NaN accuracy
Class 44, globe-flower has NaN accuracy
Class 47, hard-leaved pocket orchid has NaN accuracy
Class 72, pink-yellow dahlia has NaN accuracy
Class 79, ruby-lipped cattleya has NaN accuracy
Processed batch 1/2
Processed batch 2/2
Loaded data from cache: cache\gpt-4o-mini

Generating Alternative Captions: 102it [00:00, 2400.04it/s]

Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\alpine_sea_holly.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\anthurium.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\artichoke.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\azalea.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\ball_moss.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\balloon_flower.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\barbeton_daisy.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\bearded_iris.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\bee_balm.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\bird_of_paradise.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\bishop_of_llandaff.json
Loaded data from cache: cache\gpt-4o-mini-2024-07-18\Flower102\40\black-eyed_




Processed batch 5/60
Processed batch 6/60
Processed batch 7/60
Processed batch 8/60
Processed batch 9/60
Processed batch 10/60
Processed batch 11/60
Processed batch 12/60
Processed batch 13/60
Processed batch 14/60
Processed batch 15/60
Processed batch 16/60
Processed batch 17/60
Processed batch 18/60
Processed batch 19/60
Processed batch 20/60
Processed batch 21/60
Processed batch 22/60
Processed batch 23/60
Processed batch 24/60
Processed batch 25/60
Processed batch 26/60
Processed batch 27/60
Processed batch 28/60
Processed batch 29/60
Processed batch 30/60
Processed batch 31/60
Processed batch 32/60
Processed batch 33/60
Processed batch 34/60
Processed batch 35/60
Processed batch 36/60
Processed batch 37/60
Processed batch 38/60
Processed batch 39/60
Processed batch 40/60
Processed batch 41/60
Processed batch 42/60
Processed batch 43/60
Processed batch 44/60
Processed batch 45/60
Processed batch 46/60
Processed batch 47/60
Processed batch 48/60
Processed batch 49/60
Processed batch

Processing Images: 100%|██████████| 128/128 [01:41<00:00,  1.26it/s]


Class 0, alpine sea holly: 0.6279069781303406, 0.26257994771003723
Class 1, anthurium: 0.9904761910438538, 0.22898823022842407
Class 2, artichoke: 0.7435897588729858, 0.24526415765285492
Class 3, azalea: 0.7604166865348816, 0.23608458042144775
Class 4, ball moss: 0.0, 0.13882482051849365
Calibration Approach: Flower102
Spearman (Pseudo vs Real Accuracy): 0.48, Spearman (Real vs Real Accuracy): 0.94


In [10]:
from prettytable import PrettyTable

table = PrettyTable()
table.field_names = ["Dataset", "Spearman (Pseudo vs Real) - Discretization Method", "Spearman (Pseudo vs Real) - Default method", "Spearman (Real vs Real) - Discretization Method", "Spearman (Real vs Real) - Default method"]
table.add_row(["Stanford_dogs", "0.59", "**0.62**", "**1.00**", "0.98"])
table.add_row(["NWPU-RESISC45", "**0.61**", "0.58", "**1.00**", "0.98"])
table.add_row(["Flower102", "**0.48**", "0.35", "**1.00**", "0.96"])
table.add_row(["CUB_200_2011", "0.73", "**0.76**", "**1.00**", "0.99"])

print(table)

+---------------+---------------------------------------------------+--------------------------------------------+-------------------------------------------------+------------------------------------------+
|    Dataset    | Spearman (Pseudo vs Real) - Discretization Method | Spearman (Pseudo vs Real) - Default method | Spearman (Real vs Real) - Discretization Method | Spearman (Real vs Real) - Default method |
+---------------+---------------------------------------------------+--------------------------------------------+-------------------------------------------------+------------------------------------------+
| Stanford_dogs |                        0.59                       |                  **0.62**                  |                     **1.00**                    |                   0.98                   |
| NWPU-RESISC45 |                      **0.61**                     |                    0.58                    |                     **1.00**                    |    