In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind, ttest_rel
import os

  from pandas.core import (


In [2]:
personality_columns = [
    "nrc.negative",
    "emo_pos",
    "Affect",
    "nrc.positive",
    "affiliation",
    "emotion",
    "nrc.joy",
    "Social",
    "emo_sad",
    "nrc.anticipation",
    "nrc.anger",
    "pronoun",
    "i",
    "emo_anger",
    "swear",
    "BigWords",
    "emo_neg",
    "nrc.disgust",
    "nrc.sadness",
    "nrc.trust",
]

facebook_columns = [
    "we",
    "socrefs",
    "pronoun",
    "friend",
    "mfd.authority.virtue",
    "mfd.care.virtue",
    "Affect",
    "i",
    "mfd.authority.vice",
    "affiliation",
    "prosocial",
    "family",
    "relig",
    # "mfd.purity.vice",
    # "mfd.purity.virtue",
    "mfd.sanctity.virtue",
    "mfd.sanctity.vice",
    "you",
    "Social",
]

wassa_columns = [
    "Affect",
    "differ",
    "we",
    "shehe",
    "pronoun",
    "emo_neg",
    "tentat",
    "empathy.low_empathy",
    "pronoun",
    "cogproc",
    "empathy.low_distress",
]

political_columns = [
    "mfd.authority.virtue",
    "mfd.authority.vice",
    "mfd.loyalty.virtue",
    "mfd.loyalty.vice",
    "mfd.fairness.virtue",
    "mfd.fairness.vice",
    "emo_anx",
    "adverb",
    "conj",
    "emo_neg",
    "emo_anger",
    "we",
    "relig",
    "swear",
    "i",
    "cogproc",
    "emo_pos",
    "certitude",
]

gender_columns = [
    "article",
    "social",
    "emo_anx",
    "pronoun",
    "i",
    "emo_pos",
    "emo_neg",
    "affect",
    "tentat",
    "motion",
    "swear",
    "quant",
    "number",
    "space",
    "cogproc",
]

age_columns = [
    "we",
    "cogproc",
    "prep",
    "article",
    "social",
    "focusfuture",
    "focuspast",
    "emo_neg",
    "emo_pos",
    "i",
]

## Essays

In [3]:
parent_path = "essays/with_dictionaries/"
all_files = os.listdir(parent_path)
original_df = pd.read_csv(
    os.path.join(parent_path, [file for file in all_files if "anon_full" in file][0])
)
syntax_grammar_llama = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_llama" in file][0]
    )
)
rephrase_llama = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "rephrase_llama" in file][0]
    )
)
syntax_grammar_gpt = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_gpt" in file][0]
    )
)
rephrase_gpt = pd.read_csv(
    os.path.join(parent_path, [file for file in all_files if "rephrase_gpt" in file][0])
)
syntax_grammar_gemini = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_gemini" in file][0]
    )
)
rephrase_gemini = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "rephrase_gemini" in file][0]
    )
)

all_data_dictionaries = {
    ("original", "-"): original_df,
    ("syntax_grammar", "llama"): syntax_grammar_llama,
    ("rephrase", "llama"): rephrase_llama,
    ("syntax_grammar", "gpt"): syntax_grammar_gpt,
    ("rephrase", "gpt"): rephrase_gpt,
    ("syntax_grammar", "gemini"): syntax_grammar_gemini,
    ("rephrase", "gemini"): rephrase_gemini,
}

In [4]:
for col in personality_columns:
    if col not in original_df.columns:
        print(col)

In [5]:
from scipy.stats import pearsonr

for col_of_interest in ["AGR", "OPN", "CON", "NEU", "EXT"]:

    categories = []
    llms = []
    prompts = []
    pearson_rs = []
    p_values = []

    threshold_based_on_bonferroni = 0.05 / 10

    for key, value in all_data_dictionaries.items():
        for column in personality_columns:
            categories.append(column)
            llms.append(key[0])
            prompts.append(key[1])

            pearson_r, p_value = pearsonr(
                value.dropna(subset=[f"z{col_of_interest}", column])[
                    f"z{col_of_interest}"
                ],
                value.dropna(subset=[f"z{col_of_interest}", column])[column],
            )
            pearson_rs.append(pearson_r)
            p_values.append(p_value)

    summary_df = pd.DataFrame(
        {
            "category": categories,
            "llm": llms,
            "prompt": prompts,
            "pearson_r": pearson_rs,
            "p_value": p_values,
        }
    )
    summary_df["significant"] = summary_df["p_value"] < threshold_based_on_bonferroni

    print("# column of interest:", col_of_interest)

    for category in summary_df[
        (summary_df["significant"]) & (summary_df["llm"] == "original")
    ]["category"].unique():
        print(f" & {category} & ", end="")
        original_values = summary_df[
            (summary_df["category"] == category) & (summary_df["llm"] == "original")
        ]
        is_significant = original_values["significant"].values[0]
        if is_significant:
            print(
                f"\\textbf{{{original_values['pearson_r'].values[0]:.2f}}}",
                end="",
            )
        else:
            print(
                f"{original_values['pearson_r'].values[0]:.2f}",
                end="",
            )
        for prompt in ["rephrase", "syntax_grammar"]:
            for llm in ["gemini", "gpt", "llama"]:
                values = summary_df[
                    (summary_df["category"] == category)
                    & (summary_df["llm"] == prompt)
                    & (summary_df["prompt"] == llm)
                ]
                is_significant = values["significant"].values[0]
                if is_significant:
                    print(
                        f" & \\textbf{{{values['pearson_r'].values[0]:.2f}}}",
                        end="",
                    )
                else:
                    print(
                        f" & {values['pearson_r'].values[0]:.2f}",
                        end="",
                    )
        print("\\\\")

# column of interest: AGR
 & nrc.negative & \textbf{-0.10} & -0.04 & -0.05 & -0.06 & \textbf{-0.08} & \textbf{-0.07} & \textbf{-0.07}\\
 & emo_pos & \textbf{0.06} & 0.04 & \textbf{0.06} & 0.05 & \textbf{0.07} & 0.05 & \textbf{0.07}\\
 & affiliation & \textbf{0.08} & \textbf{0.08} & \textbf{0.09} & \textbf{0.07} & \textbf{0.08} & \textbf{0.08} & \textbf{0.07}\\
 & nrc.anticipation & \textbf{0.07} & 0.04 & \textbf{0.08} & 0.03 & 0.04 & \textbf{0.06} & \textbf{0.07}\\
 & emo_anger & \textbf{-0.09} & \textbf{-0.08} & \textbf{-0.11} & \textbf{-0.08} & \textbf{-0.10} & \textbf{-0.08} & \textbf{-0.10}\\
 & swear & \textbf{-0.12} & -0.02 & \textbf{-0.06} & -0.03 & \textbf{-0.08} & \textbf{-0.08} & \textbf{-0.08}\\
 & nrc.disgust & \textbf{-0.08} & -0.06 & -0.05 & -0.05 & -0.05 & \textbf{-0.06} & -0.05\\
# column of interest: OPN
 & nrc.negative & \textbf{0.06} & 0.02 & -0.01 & -0.02 & 0.04 & 0.01 & 0.01\\
 & i & \textbf{-0.13} & \textbf{-0.07} & \textbf{-0.11} & \textbf{-0.07} & \textbf{-0.14}

## WASSA

In [6]:
parent_path = "wassa_individual/with_dictionaries/"
all_files = os.listdir(parent_path)
original_df = pd.read_csv(
    os.path.join(parent_path, [file for file in all_files if "clean_wassa" in file][0])
)
syntax_grammar_llama = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_llama" in file][0]
    )
)
rephrase_llama = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "rephrase_llama" in file][0]
    )
)
syntax_grammar_gpt = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_gpt" in file][0]
    )
)
rephrase_gpt = pd.read_csv(
    os.path.join(parent_path, [file for file in all_files if "rephrase_gpt" in file][0])
)
syntax_grammar_gemini = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_gemini" in file][0]
    )
)
rephrase_gemini = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "rephrase_gemini" in file][0]
    )
)

all_data_dictionaries = {
    ("original", "-"): original_df,
    ("syntax_grammar", "llama"): syntax_grammar_llama,
    ("rephrase", "llama"): rephrase_llama,
    ("syntax_grammar", "gpt"): syntax_grammar_gpt,
    ("rephrase", "gpt"): rephrase_gpt,
    ("syntax_grammar", "gemini"): syntax_grammar_gemini,
    ("rephrase", "gemini"): rephrase_gemini,
}

In [7]:
for col in wassa_columns:
    if col not in original_df.columns:
        print(col)

In [8]:
from scipy.stats import pearsonr

for col_of_interest in [
    "iri.perspective",
    "iri.distress",
    "iri.fantasy",
    "iri.concern",
]:

    categories = []
    llms = []
    prompts = []
    pearson_rs = []
    p_values = []

    threshold_based_on_bonferroni = 0.05 / len(wassa_columns)

    for key, value in all_data_dictionaries.items():
        for column in wassa_columns:
            categories.append(column)
            llms.append(key[0])
            prompts.append(key[1])

            pearson_r, p_value = pearsonr(
                value.dropna(subset=[f"z.{col_of_interest}", column])[
                    f"z.{col_of_interest}"
                ],
                value.dropna(subset=[f"z.{col_of_interest}", column])[column],
            )
            pearson_rs.append(pearson_r)
            p_values.append(p_value)

    summary_df = pd.DataFrame(
        {
            "category": categories,
            "llm": llms,
            "prompt": prompts,
            "pearson_r": pearson_rs,
            "p_value": p_values,
        }
    )
    summary_df["significant"] = summary_df["p_value"] < threshold_based_on_bonferroni

    print("# column of interest:", col_of_interest)

    for category in summary_df[
        (summary_df["significant"]) & (summary_df["llm"] == "original")
    ]["category"].unique():
        print(f" & {category} & ", end="")
        original_values = summary_df[
            (summary_df["category"] == category) & (summary_df["llm"] == "original")
        ]
        is_significant = original_values["significant"].values[0]
        if is_significant:
            print(
                f"\\textbf{{{original_values['pearson_r'].values[0]:.2f}}}",
                end="",
            )
        else:
            print(
                f"{original_values['pearson_r'].values[0]:.2f}",
                end="",
            )
        for prompt in ["rephrase", "syntax_grammar"]:
            for llm in ["gemini", "gpt", "llama"]:
                values = summary_df[
                    (summary_df["category"] == category)
                    & (summary_df["llm"] == prompt)
                    & (summary_df["prompt"] == llm)
                ]
                is_significant = values["significant"].values[0]
                if is_significant:
                    print(
                        f" & \\textbf{{{values['pearson_r'].values[0]:.2f}}}",
                        end="",
                    )
                else:
                    print(
                        f" & {values['pearson_r'].values[0]:.2f}",
                        end="",
                    )
        print("\\\\")

# column of interest: iri.perspective
 & we & \textbf{-0.22} & \textbf{-0.17} & \textbf{-0.17} & \textbf{-0.13} & \textbf{-0.19} & \textbf{-0.22} & \textbf{-0.18}\\
 & shehe & \textbf{0.09} & 0.03 & \textbf{0.10} & \textbf{0.08} & 0.04 & \textbf{0.09} & \textbf{0.09}\\
 & pronoun & \textbf{-0.22} & -0.06 & \textbf{-0.20} & -0.03 & \textbf{-0.14} & \textbf{-0.17} & \textbf{-0.07}\\
 & tentat & \textbf{-0.09} & -0.05 & \textbf{-0.11} & 0.00 & \textbf{-0.12} & \textbf{-0.12} & -0.05\\
 & cogproc & \textbf{-0.12} & \textbf{-0.14} & \textbf{-0.18} & \textbf{-0.10} & \textbf{-0.13} & \textbf{-0.14} & \textbf{-0.08}\\
# column of interest: iri.distress
 & Affect & \textbf{0.13} & 0.06 & 0.07 & \textbf{0.09} & 0.08 & 0.06 & \textbf{0.12}\\
 & differ & \textbf{0.09} & \textbf{0.11} & \textbf{0.16} & \textbf{0.09} & \textbf{0.12} & \textbf{0.10} & \textbf{0.08}\\
 & we & \textbf{0.08} & 0.02 & 0.07 & 0.05 & \textbf{0.11} & 0.07 & \textbf{0.08}\\
 & tentat & \textbf{0.11} & 0.06 & \textbf{0.07} &

## Facebook

In [9]:
parent_path = "facebook/with_dictionaries/"
all_files = os.listdir(parent_path)
original_df = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "dataset_clean" in file][0]
    )
)
syntax_grammar_llama = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_llama" in file][0]
    )
)
rephrase_llama = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "rephrase_llama" in file][0]
    )
)
syntax_grammar_gpt = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_gpt" in file][0]
    )
)
rephrase_gpt = pd.read_csv(
    os.path.join(parent_path, [file for file in all_files if "rephrase_gpt" in file][0])
)
syntax_grammar_gemini = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_gemini" in file][0]
    )
)
rephrase_gemini = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "rephrase_gemini" in file][0]
    )
)

all_data_dictionaries = {
    ("original", "-"): original_df,
    ("syntax_grammar", "llama"): syntax_grammar_llama,
    ("rephrase", "llama"): rephrase_llama,
    ("syntax_grammar", "gpt"): syntax_grammar_gpt,
    ("rephrase", "gpt"): rephrase_gpt,
    ("syntax_grammar", "gemini"): syntax_grammar_gemini,
    ("rephrase", "gemini"): rephrase_gemini,
}

In [10]:
for col in facebook_columns:
    if col not in original_df.columns:
        print(col)

In [11]:
from scipy.stats import pearsonr

for col_of_interest in [
    "care",
    "fairness",
    "loyalty",
    "authority",
    "purity",
]:

    categories = []
    llms = []
    prompts = []
    pearson_rs = []
    p_values = []

    threshold_based_on_bonferroni = 0.05 / len(facebook_columns) / 5

    for key, value in all_data_dictionaries.items():
        for column in facebook_columns:
            categories.append(column)
            llms.append(key[0])
            prompts.append(key[1])

            pearson_r, p_value = pearsonr(
                value.dropna(subset=[f"z.{col_of_interest}", column])[
                    f"z.{col_of_interest}"
                ],
                value.dropna(subset=[f"z.{col_of_interest}", column])[column],
            )
            pearson_rs.append(pearson_r)
            p_values.append(p_value)

    summary_df = pd.DataFrame(
        {
            "category": categories,
            "llm": llms,
            "prompt": prompts,
            "pearson_r": pearson_rs,
            "p_value": p_values,
        }
    )
    summary_df["significant"] = summary_df["p_value"] < threshold_based_on_bonferroni

    print("# column of interest:", col_of_interest)

    for category in summary_df[
        (summary_df["significant"]) & (summary_df["llm"] == "original")
    ]["category"].unique():
        print(f" & {category} & ", end="")
        original_values = summary_df[
            (summary_df["category"] == category) & (summary_df["llm"] == "original")
        ]
        is_significant = original_values["significant"].values[0]
        if is_significant:
            print(
                f"\\textbf{{{original_values['pearson_r'].values[0]:.2f}}}",
                end="",
            )
        else:
            print(
                f"{original_values['pearson_r'].values[0]:.2f}",
                end="",
            )
        for prompt in ["rephrase", "syntax_grammar"]:
            for llm in ["gemini", "gpt", "llama"]:
                values = summary_df[
                    (summary_df["category"] == category)
                    & (summary_df["llm"] == prompt)
                    & (summary_df["prompt"] == llm)
                ]
                is_significant = values["significant"].values[0]
                if is_significant:
                    print(
                        f" & \\textbf{{{values['pearson_r'].values[0]:.2f}}}",
                        end="",
                    )
                else:
                    print(
                        f" & {values['pearson_r'].values[0]:.2f}",
                        end="",
                    )
        print("\\\\")

# column of interest: care
 & mfd.care.virtue & \textbf{0.09} & \textbf{0.09} & \textbf{0.10} & 0.02 & \textbf{0.07} & \textbf{0.09} & 0.08\\
 & mfd.authority.vice & \textbf{-0.06} & -0.03 & -0.03 & -0.04 & -0.04 & -0.03 & -0.06\\
 & affiliation & \textbf{0.07} & \textbf{0.09} & \textbf{0.10} & 0.06 & \textbf{0.08} & \textbf{0.07} & 0.07\\
# column of interest: fairness
 & mfd.authority.vice & \textbf{-0.06} & -0.04 & -0.04 & -0.03 & -0.05 & -0.04 & -0.04\\
 & relig & \textbf{-0.06} & -0.03 & -0.05 & -0.01 & -0.05 & \textbf{-0.06} & -0.02\\
 & mfd.sanctity.virtue & \textbf{-0.07} & -0.04 & -0.05 & -0.01 & -0.06 & \textbf{-0.06} & -0.02\\
# column of interest: loyalty
 & socrefs & \textbf{0.09} & \textbf{0.13} & \textbf{0.11} & \textbf{0.16} & \textbf{0.11} & \textbf{0.10} & 0.09\\
 & friend & \textbf{0.07} & \textbf{0.06} & 0.06 & 0.07 & \textbf{0.06} & 0.06 & 0.06\\
 & mfd.care.virtue & \textbf{0.06} & 0.04 & 0.04 & 0.07 & 0.05 & 0.04 & 0.06\\
 & Affect & \textbf{0.10} & \textbf{0.10}

## Political

In [12]:
parent_path = "political/with_dictionaries_both/"
all_files = os.listdir(parent_path)
original_df = pd.read_csv(
    os.path.join(parent_path, [file for file in all_files if "clean_data" in file][0])
)
syntax_grammar_llama = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_llama" in file][0]
    )
)
rephrase_llama = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "rephrase_llama" in file][0]
    )
)
syntax_grammar_gpt = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_gpt" in file][0]
    )
)
rephrase_gpt = pd.read_csv(
    os.path.join(parent_path, [file for file in all_files if "rephrase_gpt" in file][0])
)
syntax_grammar_gemini = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_gemini" in file][0]
    )
)
rephrase_gemini = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "rephrase_gemini" in file][0]
    )
)

all_data_dictionaries = {
    ("original", "-"): original_df,
    ("syntax_grammar", "llama"): syntax_grammar_llama,
    ("rephrase", "llama"): rephrase_llama,
    ("syntax_grammar", "gpt"): syntax_grammar_gpt,
    ("rephrase", "gpt"): rephrase_gpt,
    ("syntax_grammar", "gemini"): syntax_grammar_gemini,
    ("rephrase", "gemini"): rephrase_gemini,
}

In [13]:
for col in political_columns:
    if col not in original_df.columns:
        print(col)

In [14]:
categories = []
llms = []
prompts = []
means_democrat = []
means_republican = []
stats = []
p_values = []

threshold_based_on_bonferroni = 0.05 / len(political_columns)

for key, value in all_data_dictionaries.items():
    for column in political_columns:
        categories.append(column)
        llms.append(key[0])
        prompts.append(key[1])
        means_democrat.append(value[value["party"] == "D"][column].mean())
        means_republican.append(value[value["party"] == "R"][column].mean())
        t_stat, p_value = ttest_ind(
            value[value["party"] == "D"][column],
            value[value["party"] == "R"][column],
        )
        stats.append(t_stat)
        p_values.append(p_value)

summary_df = pd.DataFrame(
    {
        "category": categories,
        "llm": llms,
        "prompt": prompts,
        "mean_democrat": means_democrat,
        "mean_republican": means_republican,
        "t_stat": stats,
        "p_value": p_values,
    }
)
summary_df["significant"] = summary_df["p_value"] < threshold_based_on_bonferroni
# summary_df = summary_df[summary_df["significant"]]

In [15]:
for category in summary_df[
    (summary_df["significant"]) & (summary_df["llm"] == "original")
]["category"].unique():
    print(f"{category} & ", end="")
    original_values = summary_df[
        (summary_df["category"] == category) & (summary_df["llm"] == "original")
    ]
    is_significant = original_values["significant"].values[0]
    if is_significant:
        print(
            f"\\textbf{{{original_values['mean_democrat'].values[0]:.2f}}} & \\textbf{{{original_values['mean_republican'].values[0]:.2f}}} & ",
            end="",
        )
    else:
        print(
            f"{original_values['mean_democrat'].values[0]:.2f} & {original_values['mean_republican'].values[0]:.2f} & ",
            end="",
        )
    for prompt in ["rephrase", "syntax_grammar"]:
        for llm in ["gemini", "gpt", "llama"]:
            values = summary_df[
                (summary_df["category"] == category)
                & (summary_df["llm"] == prompt)
                & (summary_df["prompt"] == llm)
            ]
            is_significant = values["significant"].values[0]
            if is_significant:
                print(
                    f"\\textbf{{{values['mean_democrat'].values[0]:.2f}}} & \\textbf{{{values['mean_republican'].values[0]:.2f}}} & ",
                    end="",
                )
            else:
                print(
                    f"{values['mean_democrat'].values[0]:.2f} & {values['mean_republican'].values[0]:.2f} & ",
                    end="",
                )
    print("\\\\")

mfd.loyalty.virtue & \textbf{0.02} & \textbf{0.02} & \textbf{0.03} & \textbf{0.02} & \textbf{0.03} & \textbf{0.03} & 0.03 & 0.03 & \textbf{0.02} & \textbf{0.02} & \textbf{0.03} & \textbf{0.03} & \textbf{0.03} & \textbf{0.02} & \\
emo_anx & \textbf{0.07} & \textbf{0.06} & \textbf{0.08} & \textbf{0.05} & 0.07 & 0.07 & 0.06 & 0.06 & \textbf{0.07} & \textbf{0.06} & 0.08 & 0.07 & \textbf{0.07} & \textbf{0.06} & \\
adverb & \textbf{3.41} & \textbf{3.71} & 1.98 & 2.07 & \textbf{2.22} & \textbf{2.35} & \textbf{2.31} & \textbf{2.45} & \textbf{3.06} & \textbf{3.28} & \textbf{2.38} & \textbf{2.52} & \textbf{2.82} & \textbf{3.04} & \\
emo_neg & \textbf{0.31} & \textbf{0.27} & 0.31 & 0.28 & 0.26 & 0.25 & 0.25 & 0.25 & \textbf{0.30} & \textbf{0.26} & 0.29 & 0.27 & 0.30 & 0.27 & \\
i & \textbf{1.64} & \textbf{1.75} & 1.16 & 1.27 & 1.78 & 1.88 & 1.88 & 2.00 & \textbf{1.66} & \textbf{1.79} & 1.63 & 1.68 & 1.73 & 1.82 & \\
cogproc & \textbf{9.28} & \textbf{9.58} & \textbf{8.32} & \textbf{8.77} & \textbf

## Gender

In [16]:
parent_path = "political/with_dictionaries_both/"
all_files = os.listdir(parent_path)
original_df = pd.read_csv(
    os.path.join(parent_path, [file for file in all_files if "clean_data" in file][0])
)
syntax_grammar_llama = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_llama" in file][0]
    )
)
rephrase_llama = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "rephrase_llama" in file][0]
    )
)
syntax_grammar_gpt = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_gpt" in file][0]
    )
)
rephrase_gpt = pd.read_csv(
    os.path.join(parent_path, [file for file in all_files if "rephrase_gpt" in file][0])
)
syntax_grammar_gemini = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_gemini" in file][0]
    )
)
rephrase_gemini = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "rephrase_gemini" in file][0]
    )
)

all_data_dictionaries = {
    ("original", "-"): original_df,
    ("syntax_grammar", "llama"): syntax_grammar_llama,
    ("rephrase", "llama"): rephrase_llama,
    ("syntax_grammar", "gpt"): syntax_grammar_gpt,
    ("rephrase", "gpt"): rephrase_gpt,
    ("syntax_grammar", "gemini"): syntax_grammar_gemini,
    ("rephrase", "gemini"): rephrase_gemini,
}

In [17]:
for col in gender_columns:
    if col not in original_df.columns:
        print(col)

In [18]:
categories = []
llms = []
prompts = []
means_males = []
means_females = []
stats = []
p_values = []

threshold_based_on_bonferroni = 0.05 / len(gender_columns)

for key, value in all_data_dictionaries.items():
    for column in gender_columns:
        categories.append(column)
        llms.append(key[0])
        prompts.append(key[1])
        means_males.append(value[value["gender"] == "M"][column].mean())
        means_females.append(value[value["gender"] == "F"][column].mean())
        t_stat, p_value = ttest_ind(
            value[value["gender"] == "M"][column],
            value[value["gender"] == "F"][column],
        )
        stats.append(t_stat)
        p_values.append(p_value)

summary_df = pd.DataFrame(
    {
        "category": categories,
        "llm": llms,
        "prompt": prompts,
        "mean_males": means_males,
        "mean_females": means_females,
        "t_stat": stats,
        "p_value": p_values,
    }
)
summary_df["significant"] = summary_df["p_value"] < threshold_based_on_bonferroni
# summary_df = summary_df[summary_df["significant"]]

In [19]:
for category in summary_df[
    (summary_df["significant"]) & (summary_df["llm"] == "original")
]["category"].unique():
    print(f"{category} & ", end="")
    original_values = summary_df[
        (summary_df["category"] == category) & (summary_df["llm"] == "original")
    ]
    is_significant = original_values["significant"].values[0]
    if is_significant:
        print(
            f"\\textbf{{{original_values['mean_males'].values[0]:.2f}}} & \\textbf{{{original_values['mean_females'].values[0]:.2f}}} & ",
            end="",
        )
    else:
        print(
            f"{original_values['mean_males'].values[0]:.2f} & {original_values['mean_females'].values[0]:.2f} & ",
            end="",
        )
    for prompt in ["rephrase", "syntax_grammar"]:
        for llm in ["gemini", "gpt", "llama"]:
            values = summary_df[
                (summary_df["category"] == category)
                & (summary_df["llm"] == prompt)
                & (summary_df["prompt"] == llm)
            ]
            is_significant = values["significant"].values[0]
            if is_significant:
                print(
                    f"\\textbf{{{values['mean_males'].values[0]:.2f}}} & \\textbf{{{values['mean_females'].values[0]:.2f}}} & ",
                    end="",
                )
            else:
                print(
                    f"{values['mean_males'].values[0]:.2f} & {values['mean_females'].values[0]:.2f} & ",
                    end="",
                )
    print("\\\\")

article & \textbf{8.52} & \textbf{8.01} & \textbf{8.72} & \textbf{8.20} & \textbf{8.45} & \textbf{7.99} & \textbf{8.98} & \textbf{8.41} & \textbf{8.60} & \textbf{8.01} & \textbf{8.79} & \textbf{8.31} & \textbf{8.75} & \textbf{8.17} & \\
social & \textbf{7.51} & \textbf{8.26} & \textbf{6.27} & \textbf{7.01} & \textbf{6.14} & \textbf{6.69} & \textbf{7.52} & \textbf{8.06} & \textbf{7.18} & \textbf{8.01} & \textbf{5.97} & \textbf{6.55} & \textbf{7.43} & \textbf{8.16} & \\
emo_anx & \textbf{0.06} & \textbf{0.07} & \textbf{0.05} & \textbf{0.08} & \textbf{0.06} & \textbf{0.08} & \textbf{0.05} & \textbf{0.07} & \textbf{0.06} & \textbf{0.07} & \textbf{0.07} & \textbf{0.08} & \textbf{0.06} & \textbf{0.07} & \\
i & \textbf{1.65} & \textbf{1.76} & 1.20 & 1.23 & 1.79 & 1.88 & 1.91 & 1.97 & \textbf{1.66} & \textbf{1.80} & \textbf{1.62} & \textbf{1.70} & 1.75 & 1.81 & \\
emo_neg & \textbf{0.28} & \textbf{0.30} & 0.30 & 0.30 & 0.25 & 0.27 & 0.24 & 0.26 & 0.27 & 0.29 & \textbf{0.27} & \textbf{0.29} & 0

## Age

In [20]:
parent_path = "political/with_dictionaries_both/"
all_files = os.listdir(parent_path)
original_df = pd.read_csv(
    os.path.join(parent_path, [file for file in all_files if "clean_data" in file][0])
)
syntax_grammar_llama = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_llama" in file][0]
    )
)
rephrase_llama = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "rephrase_llama" in file][0]
    )
)
syntax_grammar_gpt = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_gpt" in file][0]
    )
)
rephrase_gpt = pd.read_csv(
    os.path.join(parent_path, [file for file in all_files if "rephrase_gpt" in file][0])
)
syntax_grammar_gemini = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "syntax_grammar_gemini" in file][0]
    )
)
rephrase_gemini = pd.read_csv(
    os.path.join(
        parent_path, [file for file in all_files if "rephrase_gemini" in file][0]
    )
)

main_data_political = pd.read_csv("political/political_data.csv")
main_data_political_speaker_id_to_age = dict(
    zip(main_data_political["speakerid"], main_data_political["age"])
)

original_df["age"] = original_df["speakerid"].map(main_data_political_speaker_id_to_age)
syntax_grammar_llama["age"] = syntax_grammar_llama["speakerid"].map(
    main_data_political_speaker_id_to_age
)
rephrase_llama["age"] = rephrase_llama["speakerid"].map(
    main_data_political_speaker_id_to_age
)
syntax_grammar_gpt["age"] = syntax_grammar_gpt["speakerid"].map(
    main_data_political_speaker_id_to_age
)
rephrase_gpt["age"] = rephrase_gpt["speakerid"].map(
    main_data_political_speaker_id_to_age
)
syntax_grammar_gemini["age"] = syntax_grammar_gemini["speakerid"].map(
    main_data_political_speaker_id_to_age
)
rephrase_gemini["age"] = rephrase_gemini["speakerid"].map(
    main_data_political_speaker_id_to_age
)


all_data_dictionaries = {
    ("original", "-"): original_df,
    ("syntax_grammar", "llama"): syntax_grammar_llama,
    ("rephrase", "llama"): rephrase_llama,
    ("syntax_grammar", "gpt"): syntax_grammar_gpt,
    ("rephrase", "gpt"): rephrase_gpt,
    ("syntax_grammar", "gemini"): syntax_grammar_gemini,
    ("rephrase", "gemini"): rephrase_gemini,
}

In [21]:
for col in age_columns:
    if col not in original_df.columns:
        print(col)

In [24]:
from scipy.stats import pearsonr

categories = []
llms = []
prompts = []
pearson_rs = []
p_values = []

threshold_based_on_bonferroni = 0.05 / len(age_columns)

for key, value in all_data_dictionaries.items():
    for column in age_columns:
        categories.append(column)
        llms.append(key[0])
        prompts.append(key[1])
        pearson_r, p_value = pearsonr(
            value["age"],
            value[column],
        )
        pearson_rs.append(pearson_r)
        p_values.append(p_value)


summary_df = pd.DataFrame(
    {
        "category": categories,
        "llm": llms,
        "prompt": prompts,
        "pearson_r": pearson_rs,
        "p_value": p_values,
    }
)
summary_df["significant"] = summary_df["p_value"] < threshold_based_on_bonferroni

In [25]:
for category in summary_df[
    (summary_df["significant"]) & (summary_df["llm"] == "original")
]["category"].unique():
    print(f"{category} & ", end="")
    original_values = summary_df[
        (summary_df["category"] == category) & (summary_df["llm"] == "original")
    ]
    is_significant = original_values["significant"].values[0]
    if is_significant:
        print(
            f"\\textbf{{{original_values['pearson_r'].values[0]:.2f}}}",
            end="",
        )
    else:
        print(
            f"{original_values['pearson_r'].values[0]:.2f}",
            end="",
        )
    for prompt in ["rephrase", "syntax_grammar"]:
        for llm in ["gemini", "gpt", "llama"]:
            values = summary_df[
                (summary_df["category"] == category)
                & (summary_df["llm"] == prompt)
                & (summary_df["prompt"] == llm)
            ]
            is_significant = values["significant"].values[0]
            if is_significant:
                print(
                    f" & \\textbf{{{values['pearson_r'].values[0]:.2f}}}",
                    end="",
                )
            else:
                print(
                    f" & {values['pearson_r'].values[0]:.2f}",
                    end="",
                )
    print("\\\\")

focusfuture & \textbf{-0.09} & -0.05 & \textbf{-0.09} & \textbf{-0.10} & -0.07 & \textbf{-0.08} & \textbf{-0.09}\\
