In [71]:
from langchain_core.messages import AIMessage
import pandas as pd
import numpy as np
from langchain_ollama import ChatOllama
from collections import defaultdict
from tqdm.auto import tqdm
from sklearn.metrics import f1_score

In [64]:
llm = ChatOllama(
    model="llama3.1",
    temperature=0,
    num_predict=5,
)


personality_df = pd.read_csv("essays/essays_anon_full.csv")
facebook_df = pd.read_csv("facebook/full_dataset_clean.csv")
demographic_df = pd.read_csv("political/clean_data.csv")
wassa_df = pd.read_csv("wassa/clean_wassa.csv")

In [76]:
possible_values = ["D", "R"]
random_selects = np.random.choice(possible_values, demographic_df.shape[0])

print(
    "random baseline f1: ",
    f1_score(demographic_df["party"], random_selects, average="weighted"),
)

random baseline f1:  0.4902500486655


In [None]:
personality_df = personality_df.sample(n=100)
facebook_df = facebook_df.sample(n=100)
demographic_df = demographic_df.sample(n=100)
wassa_df = wassa_df.sample(n=50)

In [3]:
personality_df

Unnamed: 0,gender,cEXT,cNEU,cAGR,cCON,cOPN,sEXT,sNEU,sAGR,sCON,sOPN,zEXT,zNEU,zAGR,zCON,zOPN,#AUTHID,text
164,Female,n,y,n,n,y,22.000,50.000,34.000000,37.000000,50.0,-3.043693,1.722684,-1.208785,-0.591187,1.118603,1997_714973,I will now try to track my cluttered and rando...
362,Female,n,y,n,y,y,41.000,38.000,32.000000,42.000000,44.0,-0.362433,0.521287,-1.655192,0.175054,0.532741,1998_163952,He will be nineteen this Saturday. I think I a...
2201,Female,y,y,y,y,y,29.000,27.000,35.000000,37.000000,45.0,0.427746,0.486658,0.126914,0.868764,1.208117,2004_298,So. I am overwhelmed. I wouldn't say that I'm ...
1229,Female,y,y,n,n,n,3.875,4.125,3.111111,3.222222,3.3,0.623297,1.444437,-1.142632,-0.432014,-0.790909,2000_672399,I just woke up and I'm a little confused as I ...
689,Female,n,n,y,n,y,35.000,34.000,49.000000,39.000000,42.0,-1.183799,-0.087609,0.929513,-0.224488,0.174959,1999_498508,I can't wait to drive back from dallas to aust...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
421,Male,y,n,y,y,n,48.000,31.000,45.000000,49.000000,32.0,0.698073,-0.319336,0.355608,1.591758,-1.237685,1998_511209,I want to get good grades in all my classes. M...
1148,Male,n,n,n,n,n,3.000,2.500,2.666667,2.888889,3.4,-0.348052,-0.616330,-1.842456,-0.924518,-0.631214,2000_559995,I feel really good today because it is my birt...
10,Female,y,y,n,y,y,48.000,36.000,41.000000,45.000000,44.0,0.698598,0.042114,-0.067130,0.463697,0.241172,1997_071933,So far I have been at LOCNAME for I guess 0 ...
59,Female,n,y,n,y,n,31.000,45.000,39.000000,50.000000,33.0,-1.748285,1.122480,-0.393317,1.123000,-1.367451,1997_454257,"September 0, 0000 I am not too sure what to ..."


In [None]:
personality_results = defaultdict(list)
for col in ["EXT", "NEU", "AGR", "CON", "OPN"]:
    for index, row in tqdm(
        personality_df.iterrows(), total=personality_df.shape[0], leave=False
    ):
        text = row["text"]
        col_str = f"c{col}"
        complete_col_string_dictionary = {
            "EXT": "extrovert",
            "NEU": "neurotic",
            "AGR": "agreeable",
            "CON": "conscientious",
            "OPN": "open",
        }
        complete_col_string = complete_col_string_dictionary[col]
        messages = [
            (
                "system",
                f"You are a helpful assistant that predict whether the text given to you is written by an {complete_col_string} or not. Can you predict the personality of the following text? ONLY return YES or NO.",
            ),
            ("human", text),
        ]
        ai_msg = llm.invoke(messages)

        personality_results[col].append((ai_msg.content, row[col_str]))
import joblib

joblib.dump(personality_results, "personality_results.joblib")

In [None]:
wassa_results = defaultdict(list)
for col in ["c.iri.perspective", "c.iri.distress", "c.iri.fantasy", "c.iri.concern"]:
    for index, row in tqdm(wassa_df.iterrows(), total=wassa_df.shape[0], leave=False):
        text = row["text"]
        col_str = col
        complete_col_string_dictionary = {
            "c.iri.perspective": "perspective-taking",
            "c.iri.distress": "personal distress",
            "c.iri.fantasy": "fantasy",
            "c.iri.concern": "empathetic concern",
        }
        complete_col_string = complete_col_string_dictionary[col]
        messages = [
            (
                "system",
                f"The IRI is a 28-item self-report questionnaire that measures empathy across four dimensions: perspective-taking (PT), fantasy (FS), empathetic concern (EC), and personal distress (PD). You are a helpful assistant that predict whether the text given to you is written by a person who would have high levels of {complete_col_string} or not. Can you predict that for the following text? ONLY return YES or NO.",
            ),
            ("human", text),
        ]
        ai_msg = llm.invoke(messages)

        wassa_results[col].append((ai_msg.content, row[col_str]))
import joblib

joblib.dump(wassa_results, "wassa_results.joblib")

In [11]:
facebook_df.head(3)

Unnamed: 0,Care,Fairness,Loyalty,Authority,Purity,age,gender,z.care,z.fairness,z.loyalty,z.authority,z.purity,c.care,c.fairness,c.loyalty,c.authority,c.purity,subject_id,text
1784,1.833333,2.666667,1.166667,1.833333,0.333333,31,Female,-1.908364,-1.069538,-1.331858,-0.644126,-1.279359,n,n,n,n,n,1431503,It is done. Now alcohol.; deep breaths.....; P...
972,4.0,3.166667,2.0,1.0,1.333333,53,Male,0.630885,-0.408393,-0.440891,-1.466191,-0.484143,y,n,n,n,n,1418784,With Lisa Stephen to see Grisly hand and JD Mc...
505,2.0,4.166667,0.5,0.666667,0.5,26,Male,-1.713037,0.913898,-2.044632,-1.795017,-1.146823,n,y,n,n,n,1402941,"I can finally drive my car, just passed my dri..."


In [None]:
facebook_results = defaultdict(list)
for col in ["c.care", "c.fairness", "c.loyalty", "c.authority", "c.purity"]:
    for index, row in tqdm(
        facebook_df.iterrows(), total=facebook_df.shape[0], leave=False
    ):
        text = row["text"]
        col_str = col
        complete_col_string_dictionary = {
            "c.care": "care",
            "c.fairness": "fairness",
            "c.loyalty": "loyalty",
            "c.authority": "authority",
            "c.purity": "purity",
        }
        complete_col_string = complete_col_string_dictionary[col]
        messages = [
            (
                "system",
                f"The Moral Foundations Questionnaire (MFQ) is a measure of the five foundations of morality. You are a helpful assistant that predict whether the text given to you is written by a person who would have high levels of {complete_col_string} or not. Can you predict that for the following text? ONLY return YES or NO.",
            ),
            ("human", text),
        ]
        ai_msg = llm.invoke(messages)

        facebook_results[col].append((ai_msg.content, row[col_str]))

import joblib

joblib.dump(facebook_results, "facebook_results.joblib")

In [15]:
party_results = defaultdict(list)

for index, row in tqdm(
    demographic_df.iterrows(), total=demographic_df.shape[0], leave=False
):
    text = row["text"]
    messages = [
        (
            "system",
            "You are a helpful assistant that predict whether the text given to you is written by a person who would be a Democrat or not. Can you predict the political affiliation of the following text? ONLY return YES or NO.",
        ),
        ("human", text),
    ]
    ai_msg = llm.invoke(messages)

    party_results[col].append((ai_msg.content, row["party"]))


joblib.dump(party_results, "party_results.joblib")

  0%|          | 0/100 [00:00<?, ?it/s]

['party_results.joblib']

In [16]:
gender_results = defaultdict(list)

for index, row in tqdm(
    demographic_df.iterrows(), total=demographic_df.shape[0], leave=False
):
    text = row["text"]
    messages = [
        (
            "system",
            "You are a helpful assistant that predict whether the text given to you is written by a person who would be male or not. Can you predict the the gender of the following text? ONLY return YES or NO.",
        ),
        ("human", text),
    ]
    ai_msg = llm.invoke(messages)

    gender_results[col].append((ai_msg.content, row["gender"]))


joblib.dump(gender_results, "gender_results.joblib")

  0%|          | 0/100 [00:00<?, ?it/s]

['gender_results.joblib']

In [17]:
cohort_resutls = defaultdict(list)

for index, row in tqdm(
    demographic_df.iterrows(), total=demographic_df.shape[0], leave=False
):
    text = row["text"]
    messages = [
        (
            "system",
            "You are a helpful assistant that predict whether the text given to you is written by (a) a person between 27-40 years old, (b) a person between 41-55 years also, (c) a person between 56-70 years old, or (d) a person more than 70 years old. Can you predict the age cohort of the following text? ONLY return the letter (a, b, c, or d).",
        ),
        ("human", text),
    ]
    ai_msg = llm.invoke(messages)

    cohort_resutls[col].append((ai_msg.content, row["cohort"]))


joblib.dump(cohort_resutls, "cohort_resutls.joblib")

  0%|          | 0/100 [00:00<?, ?it/s]

['cohort_resutls.joblib']

In [18]:
len(personality_results), len(wassa_results), len(facebook_results), len(
    party_results
), len(cohort_resutls), len(gender_results)

(5, 4, 5, 1, 1, 1)

In [20]:
from sklearn.metrics import f1_score

In [58]:
all_values = []
for key in personality_results.keys():
    print(key)
    actual_values = [x[0] for x in personality_results[key]]
    actual_values = [0 if "NO" in x else 1 for x in actual_values]
    ground_truth = [x[1] for x in personality_results[key]]
    ground_truth = [0 if x == "n" else 1 for x in ground_truth]
    print(f1_score(ground_truth, actual_values))
    all_values.append(f1_score(ground_truth, actual_values))
    print("-----------------")
print("Average F1 Score:", np.mean(all_values))

EXT
0.3661971830985915
-----------------
NEU
0.6950354609929078
-----------------
AGR
0.6153846153846153
-----------------
CON
0.5688073394495413
-----------------
OPN
0.6
-----------------
Average F1 Score: 0.5690849197851312


In [59]:
all_values = []
for key in wassa_results.keys():
    print(key)
    actual_values = [x[0] for x in wassa_results[key]]
    actual_values = [0 if "NO" in x else 1 for x in actual_values]
    ground_truth = [x[1] for x in wassa_results[key]]
    ground_truth = [0 if x == "n" else 1 for x in ground_truth]
    print(f1_score(ground_truth, actual_values))
    all_values.append(f1_score(ground_truth, actual_values))
    print("-----------------")
print("Average F1 Score:", np.mean(all_values))

c.iri.perspective
0.5384615384615384
-----------------
c.iri.distress
0.5818181818181819
-----------------
c.iri.fantasy
0.0625
-----------------
c.iri.concern
0.5625
-----------------
Average F1 Score: 0.4363199300699301


In [60]:
all_values = []
for key in facebook_results.keys():
    print(key)
    actual_values = [x[0] for x in facebook_results[key]]
    actual_values = [0 if "NO" in x else 1 for x in actual_values]
    ground_truth = [x[1] for x in facebook_results[key]]
    ground_truth = [0 if x == "n" else 1 for x in ground_truth]
    print(f1_score(ground_truth, actual_values))
    all_values.append(f1_score(ground_truth, actual_values))
    print("-----------------")
print("Average F1 Score:", np.mean(all_values))

c.care
0.5567010309278351
-----------------
c.fairness
0.3888888888888889
-----------------
c.loyalty
0.47191011235955055
-----------------
c.authority
0.2857142857142857
-----------------
c.purity
0.32352941176470584
-----------------
Average F1 Score: 0.40534874593105313


In [53]:
for key in party_results.keys():
    actual_values = [x[0] for x in party_results[key]]
    actual_values = [0 if "NO" in x else 1 for x in actual_values]
    ground_truth = [x[1] for x in party_results[key]]
    ground_truth = [0 if x == "R" else 1 for x in ground_truth]
    print(f1_score(ground_truth, actual_values))
    print("-----------------")

0.6021505376344085
-----------------


In [54]:
for key in gender_results.keys():
    actual_values = [x[0] for x in gender_results[key]]
    actual_values = [0 if "NO" in x else 1 for x in actual_values]
    ground_truth = [x[1] for x in gender_results[key]]
    ground_truth = [0 if x == "F" else 1 for x in ground_truth]
    print(f1_score(ground_truth, actual_values))
    print("-----------------")

0.49484536082474234
-----------------


In [57]:
for key in cohort_resutls.keys():
    actual_values = [x[0] for x in cohort_resutls[key]]
    actual_values = [
        0 if "a" in x else 1 if "b" in x else 2 if "c" in x else 3
        for x in actual_values
    ]
    ground_truth = [x[1] for x in cohort_resutls[key]]
    ground_truth = [
        0 if x == "27-40" else 1 if x == "41-55" else 2 if x == "56-70" else 3
        for x in ground_truth
    ]
    print(f1_score(ground_truth, actual_values, average="macro"))
    print("-----------------")

0.12110783349721403
-----------------
