In [None]:
import pandas as pd
import pickle as pkl
import numpy as np
import seaborn as sns

from matplotlib import pyplot as plt
from os import listdir, path
from scipy.stats import pearsonr, spearmanr

In [None]:
attribute_map = {
    'Valence': 'a photo of something good',
    'Arousal': 'a photo of something arousing',
    'Beauty': 'a photo of something beautiful',
}

opposite_map = {
    'Valence': 'a photo of something bad',
    'Arousal': 'a photo of something dull',
    'Beauty': 'a photo of something ugly',
}

In [None]:
# See https://osf.io/6pnd7 for the OASIS data
oasis_data = pd.read_csv('OASIS.csv')
oai_model_similarities = [i for i in listdir('oasis_similarities') if i.startswith('openai')]

In [None]:
# See https://github.com/aenneb/OASIS-beauty for the OASIS beauty data
beauty_data = pd.read_csv('beauty_means.csv')
beauty_data.head()

In [None]:
oasis_data['Theme'] = oasis_data['Theme'].apply(lambda x: x.strip() + '.jpg')
oasis_data['Beauty_mean'] = beauty_data['beauty_mean']
oasis_data['Beauty_men'] = beauty_data['men_beauty_mean']
oasis_data['Beauty_women'] = beauty_data['women_beauty_mean']
oasis_data.index = oasis_data['Theme']
oasis_data.head()

In [None]:
sim_dict = {
    'Valence': [],
    'Arousal': [],
    'Beauty': [],
}

for sim_file in oai_model_similarities:
    try:
        with open(f'oasis_similarities/{sim_file}', 'rb') as f:
            data = pd.DataFrame(pkl.load(f))
    except:
        continue

    # Sort data according to index of oasis_data
    data = data.loc[oasis_data.index]

    for key in attribute_map.keys():
        diff = data[attribute_map[key]] - data[opposite_map[key]]
        binary = diff.to_list()
        ground_truth = oasis_data[f'{key}_mean'].to_list()
        pearson, spearman = pearsonr(binary, ground_truth), spearmanr(binary, ground_truth)
        
        sim_dict[key].append(round(spearman[0],2))

sim_df = pd.DataFrame(sim_dict, index=['CLIP ' + i.split('_')[1].split('_')[0] for i in oai_model_similarities])

print(sim_df)