In [None]:
import pandas as pd
import pickle as pkl
import seaborn as sns
import numpy as np

from matplotlib import pyplot as plt
from scipy.stats import pearsonr, ttest_rel, f_oneway
from scipy.cluster import hierarchy
from os import listdir, path
from utils import create_attribute_dict, create_model_association_df, cohens_d, compute_normalized_frobenius_product

In [None]:
# Set path constants
OMI_PATH = 'omi/attribute_means.csv'

ATTRIBUTE_PATH = 'prompts'
MODEL_IMPRESSIONS_PATH = 'first_impression_similarities'

In [None]:
# Read in OMI attribute rating data
omi_ratings = pd.read_csv(OMI_PATH, index_col=0)

# Get a list of the 34 OMI attributes
omi_attributes = omi_ratings.columns.to_list()

# Compute the human-human correlations and significance
omi_correlations = [[pearsonr(omi_ratings[attribute], omi_ratings[attribute_])[0] for attribute in omi_attributes] for attribute_ in omi_attributes]

# Create dataframes of the human-human correlation data
omi_correlation_df = pd.DataFrame(omi_correlations, columns=omi_attributes, index=omi_attributes)
omi_correlation_df.index.name = 'Attribute'

In [None]:
# Create a dictionary mapping each attribute to its positive polar prompt
attribute_dict = create_attribute_dict(path.join(ATTRIBUTE_PATH,'attributes.txt'))

# Create a dictionary mapping each attribute to its opposite prompt (the prompt for the opposing pole of the attribute)
opposite_dict = create_attribute_dict(path.join(ATTRIBUTE_PATH,'attributes_opposites.txt'))

In [None]:
# Get a list of the models with similarity data saved as pickles
model_pickles = [i for i in listdir(MODEL_IMPRESSIONS_PATH) if i.split('.')[-1] == 'pkl']

# Get a list of the model names from the pickle file names
model_names = [i.split('_first_impression_similarities.pkl')[0] for i in model_pickles]

# Create a dictionary mapping model names to their similarity data
models_dict = dict(zip(model_names, model_pickles))

In [None]:
# Create empty dict to store model-model correlation dataframes
model_correlation_dfs, frob_products = {}, {}

# Iterate through each model
for model_name, model_file in models_dict.items():

    # Read in the model similarity data
    with open(path.join(MODEL_IMPRESSIONS_PATH, model_file), 'rb') as f:
        model_similarity_dict = pkl.load(f)
    
    # Create a dataframe of the model similarity data
    model_similarity_df = pd.DataFrame(model_similarity_dict)

    # Create a dataframe of the difference between the cosine similarity of each image to the positive prompt and the negative prompt for each attribute in a model
    model_association_df = create_model_association_df(model_similarity_df, attribute_dict, opposite_dict, baseline='difference')

    # Compute the model-model correlations and significance
    model_correlations = [[pearsonr(model_association_df[attribute], model_association_df[attribute_])[0] for attribute in omi_attributes] for attribute_ in omi_attributes]
        
    # Create dataframes of the model-model correlation data
    model_correlation_df = pd.DataFrame(model_correlations, columns=omi_attributes, index=omi_attributes)
    model_correlation_df.index.name = 'Attribute'

    # Compute the normalized Frobenius inner product between the model-model and human-human correlation matrices
    normalized_frob_inner_product = compute_normalized_frobenius_product(omi_correlation_df.to_numpy(), model_correlation_df.to_numpy())

    # Add the model correlation dataframe and the normalized Frobenius inner product to dictionaries
    model_correlation_dfs[model_name] = model_correlation_df
    frob_products[model_name] = normalized_frob_inner_product

In [None]:
# Group Frobenius products by model family
models2b, models400m, models80m = [], [], []

for model_name in frob_products.keys():
    if '2B' in model_name and 'g-14' not in model_name and 'H-14' not in model_name: # Exclude g-14 and H-14 models
        models2b.append(frob_products[model_name])
    elif '400M' in model_name:
        models400m.append(frob_products[model_name])
    elif '80M' in model_name:
        models80m.append(frob_products[model_name])


In [None]:
# Get boxplot data for 80m, 400m, and 2b models

min_80m, quartile_80m, median_80m, quartile3_80m, max_80m = min(models80m), np.percentile(models80m, 25), np.percentile(models80m, 50), np.percentile(models80m, 75), max(models80m)
min_400m, quartile_400m, median_400m, quartile3_400m, max_400m = min(models400m), np.percentile(models400m, 25), np.percentile(models400m, 50), np.percentile(models400m, 75), max(models400m)
min_2b, quartile_2b, median_2b, quartile3_2b, max_2b = min(models2b), np.percentile(models2b, 25), np.percentile(models2b, 50), np.percentile(models2b, 75), max(models2b)

print(f'lower whisker={min_80m}, lower quartile={quartile_80m}, median={median_80m}, upper quartile={quartile3_80m}, upper whisker={max_80m}')
print(f'lower whisker={min_400m}, lower quartile={quartile_400m}, median={median_400m}, upper quartile={quartile3_400m}, upper whisker={max_400m}')
print(f'lower whisker={min_2b}, lower quartile={quartile_2b}, median={median_2b}, upper quartile={quartile3_2b}, upper whisker={max_2b}')

In [None]:
# Boxplot of model-model correlation data by dataset size, approximating the visualization in the paper
sns.boxplot(data=[models2b, models400m, models80m], showfliers=False)
plt.xticks([0,1,2], ['Scaling-2b', 'Scaling-400m', 'Scaling-80m'], fontname='Times New Roman', fontsize=12, rotation=15, ha='center')
plt.yticks(fontname='Times New Roman', fontsize=12)
plt.ylabel('Normalized Inner Product', fontname='Times New Roman', fontsize=14)
plt.xlabel('Dataset', fontname='Times New Roman', fontsize=14)
plt.title('Similarity of Model and Human Correlations by Dataset Size', fontsize=14, fontname='Times New Roman')
plt.show()

In [None]:
# ANOVA of model-model correlation data by dataset size
print(f_oneway(models2b, models400m, models80m))

# Correct p-value for multiple comparisons
print(f_oneway(models2b, models400m, models80m)[1]*3)

In [None]:
# Paired t-tests of model-model correlation data vs. human-human correlation data by dataset size
print('2b-400m')
print(ttest_rel(models2b, models400m))
print('2b-80m')
print(ttest_rel(models2b, models80m))
print('400m-80m')
print(ttest_rel(models400m, models80m))

In [None]:
# Compute Cohen's d for each paired t-test
print('2b-400m')
print(cohens_d(models2b, models400m))
print('2b-80m')
print(cohens_d(models2b, models80m))
print('400m-80m')
print(cohens_d(models400m, models80m))

In [None]:
# Select model for further analysis
corr_df = model_correlation_dfs['scaling_ViT-L-14__Model-L-14_Data-2B_Samples-13B_lr-1e-3_bs-86k']

In [None]:
# Plot the human and model-model correlation dendrograms as subfigures in a figure
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(4.2, 10))
fig.suptitle('Human and Model Attribute Correlation Clustering', fontsize=16, fontname='Times New Roman', y=0.93)

# Plot the human-human correlation dendrogram
i = hierarchy.linkage(omi_correlation_df.corr(), method='ward')
dn = hierarchy.dendrogram(i, labels=corr_df.columns, leaf_font_size=12, leaf_rotation=0, ax=ax1, orientation='right')
ax1.set_title('Human (OMI Dataset)', fontsize=15, fontname='Times New Roman')
plt.setp(ax1.get_yticklabels(), fontname='Times New Roman', size=14)
plt.setp(ax1.get_xticklabels(), visible=False)
ax1.set_xticks([])

# Plot the model-model correlation dendrogram
k = hierarchy.linkage(corr_df, method='ward')
dn = hierarchy.dendrogram(k, labels=corr_df.columns, leaf_font_size=12, leaf_rotation=0,  ax=ax2, orientation='left')
ax2.set_title('CLIP-ViT-L-14', fontsize=15, fontname='Times New Roman')
plt.setp(ax2.get_yticklabels(), fontname='Times New Roman', size=14)
plt.setp(ax2.get_xticklabels(), visible=False)
ax2.set_xticks([])

current_plot = plt.gcf()
plt.xticks([])

plt.show()

In [None]:
# Create heatmap with dendrogram of model-model correlation data for CLIP-ViT-L-14
g = sns.clustermap(corr_df, cmap='coolwarm', row_cluster=True, col_cluster=True, figsize=(10,10), cbar_kws={'label': 'Pearson\'s r'}, vmin=-1, vmax=1,
               row_linkage=hierarchy.linkage(corr_df, method='ward'), col_linkage=hierarchy.linkage(corr_df, method='ward'),
               xticklabels=corr_df.columns, yticklabels=corr_df.columns, cbar_pos=(0.0, 0.87, 0.03, 0.1),
               dendrogram_ratio=(.1, .1), edgecolor='black', linewidths=.5)

# Set plot labels
plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), fontname='Times New Roman', size=14)
plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), fontname='Times New Roman', size=14)
plt.setp(g.ax_heatmap.yaxis.get_label(), fontname='Times New Roman', size=16)
plt.setp(g.ax_cbar.yaxis.get_majorticklabels(), fontname='Times New Roman', size=12)
plt.setp(g.ax_cbar.yaxis.get_label(), fontname='Times New Roman', size=12)

# Show plot
plt.show()

In [None]:
# Create heatmap with dendrogram of human correlation data from OMI
g = sns.clustermap(omi_correlation_df, cmap='coolwarm', row_cluster=True, col_cluster=True, figsize=(10,10), cbar_kws={'label': 'Pearson\'s r'}, vmin=-1, vmax=1,
               row_linkage=hierarchy.linkage(omi_correlation_df, method='ward'), col_linkage=hierarchy.linkage(omi_correlation_df, method='ward'),
               xticklabels=omi_correlation_df.columns, yticklabels=omi_correlation_df.columns, cbar_pos=(0.0, 0.87, 0.03, 0.1),
               dendrogram_ratio=(.1, .1), edgecolor='black', linewidths=.5)

# Set plot labels
plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), fontname='Times New Roman', size=14)
plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), fontname='Times New Roman', size=14)
plt.setp(g.ax_heatmap.yaxis.get_label(), fontname='Times New Roman', size=16)
plt.setp(g.ax_cbar.yaxis.get_majorticklabels(), fontname='Times New Roman', size=12)
plt.setp(g.ax_cbar.yaxis.get_label(), fontname='Times New Roman', size=12)

# Show plot
plt.show()