In [None]:
import pandas as pd
import statsmodels.api as sm
import numpy as np

from scipy.stats import spearmanr, ttest_rel
from typing import Tuple

In [None]:
# Set path constants
OMI_PATH = 'omi/attribute_means.csv'
HUMAN_IRR_PATH = 'peterson_irr/human_irr.csv'

In [None]:
# Read in human IRR data to a dictionary
human_irr = pd.read_csv(HUMAN_IRR_PATH)
human_irr_dict = {row['attribute']: row['human_irr'] for index, row in human_irr.iterrows()}

In [None]:
# Read in OMI attribute rating data
omi_ratings = pd.read_csv(OMI_PATH, index_col=0)

# Get a list of the 34 OMI attributes
omi_attributes = omi_ratings.columns.to_list()

# View the first 5 rows of the data
omi_ratings.head()

In [None]:
# Get ViT-embedded OMI attribute vectors
vecs = np.load('./text_to_image/vit_vecs.npy')

In [None]:
def f1_score(y_true: np.ndarray, 
             y_pred: np.ndarray) -> Tuple[float, float, float]:
    """
    Calculate the F1 score of a model's predictions and return precision and recall.
    """

    # Ensure that the true and predicted labels are comparable
    y_true = y_true.astype(int)
    y_pred = y_pred.astype(int)

    # Calculate true positives, false positives, and false negatives
    tp = np.sum(y_true * y_pred)
    fp = np.sum((1 - y_true) * y_pred)
    fn = np.sum(y_true * (1 - y_pred))

    # Calculate precision, recall, and F1 score
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * (precision * recall) / (precision + recall)

    # Return F1 score, precision, and recall
    return f1, precision, recall

In [None]:
f1_dict = {}
param_dict = {}

# Iterate over each OMI attribute, obtaining weight vectors and calculating F1 scores based on projections
for attribute in omi_attributes:
    
    y = omi_ratings[attribute]
    X = np.array(vecs)
    model = sm.OLS(y, X).fit()

    params = np.array(model.params)
    param_dict[attribute] = params

    tti_vecs = np.load(f'./text_to_image/{attribute}_sdxl_vit_vecs.npy')
    projection = np.dot(tti_vecs, params)
    projection = projection / np.linalg.norm(params)

    pred = [1 if i > 0 else 0 for i in projection]
    ground_truth = [1 for i in range(25)] + [0 for i in range(25)]
    f1, precision, recall = f1_score(np.array(ground_truth), np.array(pred))
    f1_dict[attribute] = f1

# Sort the F1 scores in descending order to identify the attributes most reflected in the model
sorted_f1_attributes = sorted(f1_dict.items(), key=lambda x: x[1], reverse=True)

In [None]:
latex_table_string = ''
f1_only_table_string = ''
tti_models = ['sdxl', 'sd2', 'runwaysd']
tti_f1_dict = {'sdxl': {}, 'sd2': {}, 'runwaysd': {}}

# Compute F1 scores and write to a LaTeX table
for i in sorted_f1_attributes:

    attribute = i[0]
    params = param_dict[attribute]

    attribute_text_string = f'{attribute}'
    attribute_f1_string = f'{attribute}'

    # Iterate over each TTI model to calculate F1 scores
    for idx, tti_model in enumerate(tti_models):
        tti_vecs = np.load(f'./text_to_image/{attribute}_{tti_model}_vit_vecs.npy')
        projection = np.dot(tti_vecs, params)
        projection = projection / np.linalg.norm(params)

        pred = [1 if i > 0 else 0 for i in projection]
        ground_truth = [1 for i in range(25)] + [0 for i in range(25)]

        f1, precision, recall = f1_score(np.array(ground_truth), np.array(pred))
        tti_latex_string = f'\gray{{{int(round(f1, 2)*100)}}}{round(f1,2)} & \gray{{{int(round(precision, 2)*100)}}}{round(precision,2)} & \gray{{{int(round(recall, 2)*100)}}}{round(recall,2)}'
        tti_f1_string = f'\gray{{{int(round(f1, 2)*100)}}}{round(f1,2)}'
        attribute_text_string = attribute_text_string + ' & ' + tti_latex_string
        attribute_f1_string = attribute_f1_string + ' & ' + tti_f1_string

        tti_f1_dict[tti_model][attribute] = f1

    latex_table_string = latex_table_string + attribute_text_string + ' \\\\ \n'
    f1_only_table_string = f1_only_table_string + attribute_f1_string + ' \\\\ \n'

# Write the LaTeX table to a file
with open('./text_to_image/f1_scores.txt', 'w') as f:
    f.write(latex_table_string)


In [None]:
# Compute Spearman correlation between human IRR and model F1 scores
for tti_model, tti_f1_dict in tti_f1_dict.items():
    
    human_irr_list = [human_irr_dict[attribute]/100 for attribute, value in tti_f1_dict.items()]
    model_f1_list = [value for attribute, value in tti_f1_dict.items()]

    spearman = spearmanr(human_irr_list, model_f1_list)

    print(f'{tti_model} Spearman: {spearman}')

In [None]:
effect_size_dict = {}

# Take positive prompt vectors
black_vecs = np.load(f'./text_to_image/black_sdxl_vit_vecs.npy')[:25]
white_vecs = np.load(f'./text_to_image/white_sdxl_vit_vecs.npy')[:25]

# Calculate effect sizes for each attribute
for idx, att in enumerate(list(param_dict.keys())):

    proj_black = np.dot(black_vecs, param_dict[att])
    proj_black = proj_black / np.linalg.norm(param_dict[att])

    proj_white = np.dot(white_vecs, param_dict[att])
    proj_white = proj_white / np.linalg.norm(param_dict[att])

    effect_size = (np.mean(proj_white) - np.mean(proj_black)) / (np.std(np.concatenate([proj_black, proj_white]), ddof=1))
    effect_size_dict[att] = effect_size

# Sort the effect sizes in descending order to order the plot below
sorted_attributes = sorted(effect_size_dict.items(), key=lambda x: x[1], reverse=True)

In [None]:
draw_code_list = []
latex_vis_string = ''

black_vecs = np.load(f'./text_to_image/black_sdxl_vit_vecs.npy')[:25]
white_vecs = np.load(f'./text_to_image/white_sdxl_vit_vecs.npy')[:25]

# Generate LaTeX code for visualizing effect sizes with boxplots
for idx, att in enumerate(sorted_attributes):

    proj_black = np.dot(black_vecs, param_dict[att[0]])
    proj_black = proj_black / np.linalg.norm(param_dict[att[0]])
    black_median, black_q1, black_q3 = np.percentile(proj_black, [50, 25, 75])
    # Write boxplot for distribution of projections
    black_print = f'\\addplot+[boxplot prepared={{lower whisker={black_q1}, lower quartile={black_q1}, median={black_median}, upper quartile={black_q3}, upper whisker={black_q3}}}, color=black, fill=red!65, solid] coordinates {{}};'

    proj_white = np.dot(white_vecs, param_dict[att[0]])
    proj_white = proj_white / np.linalg.norm(param_dict[att[0]])
    white_median, white_q1, white_q3 = np.percentile(proj_white, [50, 25, 75])
    # Write boxplot for distribution of projections
    white_print = f'\\addplot+[boxplot prepared={{lower whisker={white_q1}, lower quartile={white_q1}, median={white_median}, upper quartile={white_q3}, upper whisker={white_q3}}}, color=black, fill=blue!65, solid] coordinates {{}};'

    latex_vis_string = latex_vis_string + white_print + '\n'
    latex_vis_string = latex_vis_string + black_print + '\n'

    # Perform a paired t-test to determine significance of the difference between the two groups
    t_test = ttest_rel(proj_black, proj_white).pvalue
    sig_asterisks = f'\\textit{{n.s.}}'
    if t_test < 0.001:
        sig_asterisks = '***'
    elif t_test < 0.01:
        sig_asterisks = '**'
    elif t_test < 0.05:
        sig_asterisks = '*'

    # Get the effect size for the attribute and round to two decimal places
    effect_size = str(round(att[1], 2))

    draw_idx = 1 if idx == 0 else (idx*2)+1
    
    # Write LaTeX code for drawing a brace to the effect size and significance level
    drawing_code = f"""
    \draw [thick] (axis cs:{draw_idx},-10) -- (axis cs:{draw_idx},{white_q1});
    \draw [thick] (axis cs:{draw_idx+1},-10) -- (axis cs:{draw_idx+1},{black_q1});
    \draw [thick] (axis cs:{draw_idx},-10) -- (axis cs:{draw_idx+1},-10,);
    \\node at (axis cs:{draw_idx}.6,-11) {{\\tiny {sig_asterisks}}};
    \\node at (axis cs:{draw_idx}.6,-12) {{\\tiny {effect_size}}};
    """

    draw_code_list.append(drawing_code)

draw_codes = '\n'.join(draw_code_list)
latex_vis_string = latex_vis_string + '\n' + draw_codes

# Write the LaTeX code to a file
with open('./text_to_image/effect_size_vis.txt', 'w') as f:
    f.write(latex_vis_string)

In [None]:
# Get the xticks for the attribute labels
xticks = '.7, '.join([str(i+1) for i in range(len(sorted_attributes)*2)])
print(xticks)

In [None]:
# Label the xticks with the attribute names
xtick_labels = ', , '.join([att[0] for att in sorted_attributes])
print(xtick_labels)

In [None]:
# Print the effect sizes for each attribute
for attribute, effect in effect_size_dict.items():
    print(f'{attribute}: {effect}')