In [9]:
# post_process_results.ipynb

# Import necessary libraries
import os
import json
import yaml
import pandas as pd
import numpy as np

# Function to map dataset names to display names
def get_display_name(dataset_name):
    mapping = {
        'CUB_200_2011': 'CUB',
        'Flower102': 'Flowers',
        'wds_vtab-flowers': 'VTAB-Flowers',
        'wds_vtab-pets': 'VTAB-Pets',
        'wds_vtab-resisc45': 'VTAB-Resisc45',
        'wds_vtab-cifar100': 'VTAB-CIFAR100',
        # 'Stanford_dogs': 'Stanford Dogs',
        'wds_cars': 'Cars',
        'wds_fgvc_aircraft': 'Aircraft',
        'wds_food101': 'Food101',
        'wds_imagenetv2': 'ImageNetV2',
        'wds_objectnet': 'ObjectNet',
        'wds_sun397': 'SUN397',
        # Add other mappings as needed
    }
    return mapping.get(dataset_name, dataset_name)

# Load the configuration file
config_path = 'configs/config.yaml'
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Get the list of datasets and models from the config
datasets = [dataset['name'] for dataset in config['datasets']]
models = [model['name'] for model in config['models']]
gpt_model = config['gpt_model']['name']
# Initialize an empty DataFrame with datasets as index
df = pd.DataFrame()

# Directory where results are stored
results_dir = config['experiment_params']['results_dir']

# Iterate over models and datasets to populate the DataFrame
for model in models:
    model_name = model.replace('/', '_')
    model_col_prefix = get_display_name(model_name)
    for dataset in datasets:
        # Construct the filename for the result JSON
        filename = os.path.join(results_dir, f"{model_name}_{gpt_model}_{dataset}_results.json")
        # Check if the result file exists
        if os.path.exists(filename):
            with open(filename, 'r') as f:
                results = json.load(f)
            # Extract the required correlation scores
            corr_knn = results.get('corr_betweem_actual_accuracies_and_knn_probabilities')
            corr_text_consistency = results.get('corr_between_text_only_consistency_scores_and_accuracy')
            corr_classification_margin = results.get('corr_between_classification_margin_scores_and_accuracy')
            mae = results.get('mae')
            # Map dataset name to display name
            display_dataset_name = get_display_name(dataset)
            # Add the scores to the DataFrame
            df.loc[display_dataset_name, f'{model_col_prefix} Corr(KNN)'] = corr_knn
            df.loc[display_dataset_name, f'{model_col_prefix} Corr(Text Consistency)'] = corr_text_consistency
            df.loc[display_dataset_name, f'{model_col_prefix} Corr(Classification Margin)'] = corr_classification_margin
            df.loc[display_dataset_name, f'{model_col_prefix} (MAE)'] = mae
        else:
            print(f"Result file {filename} not found.")

# Reorder the DataFrame to match your specified dataset order
specified_order = [
    'CUB',
    'Cars',
    'Aircraft',
    'Food101',
    'ImageNetV2',
    'ObjectNet',
    'SUN397',
    'VTAB-CIFAR100',
    'Flowers',
    'VTAB-Pets',
    'VTAB-Resisc45',
    'Stanford Dogs'
]
df = df.reindex(specified_order)

# Display the DataFrame with rounded values
df_rounded = df.round(3)
df_rounded

# Save the DataFrame to a CSV file (optional)
output_csv = 'correlation_results.csv'
df_rounded.to_csv(output_csv)
print(f"Results saved to {output_csv}")

Results saved to correlation_results.csv


In [10]:
df_rounded

Unnamed: 0,openai_clip-vit-large-patch14 Corr(KNN),openai_clip-vit-large-patch14 Corr(Text Consistency),openai_clip-vit-large-patch14 Corr(Classification Margin),openai_clip-vit-large-patch14 (MAE),google_siglip-base-patch16-224 Corr(KNN),google_siglip-base-patch16-224 Corr(Text Consistency),google_siglip-base-patch16-224 Corr(Classification Margin),google_siglip-base-patch16-224 (MAE),facebook_flava-full Corr(KNN),facebook_flava-full Corr(Text Consistency),facebook_flava-full Corr(Classification Margin),facebook_flava-full (MAE)
CUB,0.445,0.405,0.498,0.292,0.533,0.468,0.505,0.187,0.467,0.495,0.495,0.293
Cars,0.456,0.455,0.398,0.211,0.32,0.3,0.339,0.078,0.531,0.544,0.594,0.374
Aircraft,0.395,0.456,0.443,0.48,0.58,0.658,0.658,0.437,0.224,0.262,0.233,0.48
Food101,0.358,0.396,0.427,0.075,0.17,0.247,0.205,0.074,0.022,0.019,-0.05,0.172
ImageNetV2,0.182,0.167,0.187,0.251,0.139,0.123,0.178,0.216,0.195,0.075,0.139,0.297
ObjectNet,,,,,,,,,,,,
SUN397,,,,,,,,,,,,
VTAB-CIFAR100,0.054,-0.077,-0.035,0.168,-0.012,0.018,0.015,0.187,0.306,0.224,0.269,0.217
Flowers,0.245,0.256,0.255,0.226,0.143,0.052,0.111,0.157,0.344,0.418,0.353,0.356
VTAB-Pets,0.19,0.216,0.257,0.094,0.041,0.164,0.121,0.079,0.237,0.092,-0.036,0.29


In [11]:
results

{'corr_betweem_actual_accuracies_and_knn_probabilities': 0.06305386348098134,
 'mae': 0.3770448895555218,
 'mase_baseline': 0.2924679001372217,
 'overall_accuracy': 0.4361904761904762,
 'corr_between_text_only_consistency_scores_and_accuracy': 0.12406522981681073,
 'corr_between_classification_margin_scores_and_accuracy': 0.20760995175399394}