In [7]:
# post_process_results.ipynb

# Import necessary libraries
import os
import json
import yaml
import pandas as pd
import numpy as np

# Function to map dataset names to display names
def get_display_name(dataset_name):
    mapping = {
        'CUB_200_2011': 'CUB',
        'Flower102': 'Flowers',
        'wds_vtab-flowers': 'VTAB-Flowers',
        'wds_vtab-pets': 'VTAB-Pets',
        'wds_vtab-resisc45': 'VTAB-Resisc45',
        'wds_vtab-cifar100': 'VTAB-CIFAR100',
        # 'Stanford_dogs': 'Stanford Dogs',
        'wds_cars': 'Cars',
        'wds_fgvc_aircraft': 'Aircraft',
        'wds_food101': 'Food101',
        'wds_imagenetv2': 'ImageNetV2',
        'wds_objectnet': 'ObjectNet',
        'wds_sun397': 'SUN397',
        # Add other mappings as needed
    }
    return mapping.get(dataset_name, dataset_name)

# Load the configuration file
config_path = 'configs/config.yaml'
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Get the list of datasets and models from the config
datasets = [dataset['name'] for dataset in config['datasets']]
models = [model['name'] for model in config['models']]
gpt_model = config['gpt_model']['name']
# Initialize an empty DataFrame with datasets as index
df = pd.DataFrame()

# Directory where results are stored
results_dir = config['experiment_params']['results_dir']

# Iterate over models and datasets to populate the DataFrame
for model in models:
    model_name = model.replace('/', '_')
    model_col_prefix = get_display_name(model_name)
    for dataset in datasets:
        # Construct the filename for the result JSON
        filename = os.path.join(results_dir, f"{model_name}_{gpt_model}_{dataset}_results.json")
        # Check if the result file exists
        if os.path.exists(filename):
            with open(filename, 'r') as f:
                results = json.load(f)
            # Extract the required correlation scores
            corr_knn = results.get('corr_betweem_actual_accuracies_and_knn_probabilities')
            corr_text_consistency = results.get('corr_between_text_only_consistency_scores_and_accuracy')
            corr_classification_margin = results.get('corr_between_classification_margin_scores_and_accuracy')
            mae = results.get('mae')
            # Map dataset name to display name
            display_dataset_name = get_display_name(dataset)
            # Add the scores to the DataFrame
            df.loc[display_dataset_name, f'{model_col_prefix} Corr(KNN)'] = corr_knn
            df.loc[display_dataset_name, f'{model_col_prefix} Corr(Text Consistency)'] = corr_text_consistency
            df.loc[display_dataset_name, f'{model_col_prefix} Corr(Classification Margin)'] = corr_classification_margin
            df.loc[display_dataset_name, f'{model_col_prefix} (MAE)'] = mae
        else:
            print(f"Result file {filename} not found.")

# Reorder the DataFrame to match your specified dataset order
specified_order = [
    'CUB',
    'Cars',
    'Aircraft',
    'Food101',
    'ImageNetV2',
    'ObjectNet',
    'SUN397',
    'VTAB-CIFAR100',
    'Flowers',
    'VTAB-Pets',
    'VTAB-Resisc45',
    'Stanford Dogs'
]
df = df.reindex(specified_order)

# Display the DataFrame with rounded values
df_rounded = df.round(3)
df_rounded

# Save the DataFrame to a CSV file (optional)
output_csv = 'correlation_results.csv'
df_rounded.to_csv(output_csv)
print(f"Results saved to {output_csv}")

Results saved to correlation_results.csv


In [8]:
df_rounded

Unnamed: 0,openai_clip-vit-large-patch14 Corr(KNN),openai_clip-vit-large-patch14 Corr(Text Consistency),openai_clip-vit-large-patch14 Corr(Classification Margin),openai_clip-vit-large-patch14 (MAE),google_siglip-base-patch16-224 Corr(KNN),google_siglip-base-patch16-224 Corr(Text Consistency),google_siglip-base-patch16-224 Corr(Classification Margin),google_siglip-base-patch16-224 (MAE),facebook_flava-full Corr(KNN),facebook_flava-full Corr(Text Consistency),facebook_flava-full Corr(Classification Margin),facebook_flava-full (MAE)
CUB,0.393,0.416,0.423,0.269,0.496,0.422,0.475,0.181,0.439,0.482,0.464,0.266
Cars,0.452,0.407,0.329,0.2,0.372,0.277,0.274,0.107,0.54,0.552,0.585,0.266
Aircraft,0.403,0.405,0.418,0.254,0.514,0.546,0.632,0.229,0.261,0.337,0.329,0.151
Food101,0.444,0.403,0.421,0.217,0.193,0.127,0.109,0.173,-0.015,-0.076,-0.116,0.181
ImageNetV2,0.307,0.238,0.31,0.214,0.191,0.114,0.179,0.227,0.304,0.085,0.146,0.263
ObjectNet,,,,,,,,,,,,
SUN397,,,,,,,,,,,,
VTAB-CIFAR100,0.014,-0.144,-0.107,0.239,-0.212,-0.055,-0.124,0.261,0.128,0.085,0.153,0.201
Flowers,0.163,0.192,0.193,0.228,-0.006,-0.088,-0.036,0.289,0.372,0.33,0.276,0.289
VTAB-Pets,0.305,0.221,0.256,0.118,0.057,0.091,-0.016,0.264,-0.045,-0.072,-0.222,0.22


In [3]:
results

{'corr_between_actual_and_pseudo_accuracies': None,
 'corr_betweem_actual_accuracies_and_knn_probabilities': -0.04389375903155421,
 'mae': 0.3128984349878419,
 'mase_baseline': 0.2924679001372217,
 'overall_accuracy': 0.4361904761904762,
 'corr_between_text_only_consistency_scores_and_accuracy': 0.04467138917461374,
 'corr_between_classification_margin_scores_and_accuracy': 0.11721298132984932}