<h2>Data cleaning & validation</h2>
<h3>1. Detect completion time outliers beyond 3 SDs from the mean</h3>


In [1]:
import pandas as pd

def detect_outlier_participants(data):
    data['StartDate'] = pd.to_datetime(data['StartDate'])
    data['EndDate'] = pd.to_datetime(data['EndDate'])
    
    # Calculate survey duration in seconds
    data['Duration'] = (data['EndDate'] - data['StartDate']).dt.total_seconds()
    
    # Calculate mean and standard deviation of survey times
    mean_duration = data['Duration'].mean()
    std_duration = data['Duration'].std()

    # Define thresholds for outliers
    lower_threshold = mean_duration - 3 * std_duration
    upper_threshold = mean_duration + 3 * std_duration
    
    # Identify participants below and above thresholds
    below_3_std = data[data['Duration'] < lower_threshold]
    above_3_std = data[data['Duration'] > upper_threshold]
    
    # Filter out outliers from the original DataFrame
    participants_within_3_std = data[(data['Duration'] >= lower_threshold) & (data['Duration'] <= upper_threshold)]

    print(mean_duration)
    print(f'beloow: {len(df)}, after: {len(participants_within_3_std)}')

    return below_3_std, above_3_std, participants_within_3_std


df = pd.read_csv('../../data/prolific_participants_dataset.csv')

below_3_std, above_3_std, participants_within_3_std = detect_outlier_participants(df)

pd.set_option('display.max_columns', None)


1196.1438356164383
beloow: 438, after: 432


<h3>2. Detect participants failing multiple attention checks</h3>

In [2]:
def detect_participants_with_multiple_failed_attention_checks(df):
    checks = {
        'pq_attention_check_1': 5, 
        'pq_attention_check_2': 2, 
        'p_3_agree_att_check': 2, 
        'seriousness_check': 2
    }

    df = df.copy()
    df['failed_checks'] = df[list(checks)].apply(lambda row: sum(row[col] != val for col, val in checks.items()), axis=1)
    failed_participants = df[df['failed_checks'] > 1]

    return failed_participants

participants = detect_participants_with_multiple_failed_attention_checks(participants_within_3_std)

<h3>3. Exclude participants failing attention checks and suspected bots <a href="https://www.qualtrics.com/support/survey-platform/survey-module/survey-checker/fraud-detection/#BotDetection">bots</a> (Q_RecaptchaScore >= 0.5)</h3>

In [3]:
def exclude_failed_attention_checks_and_bots(df):
    # Define correct attention check responses
    passed_checks = (
        (df['pq_attention_check_1'] == 5) & 
        (df['pq_attention_check_2'] == 2) & 
        (df['p_3_agree_att_check'] == 2) &
        (df['seriousness_check'] == 2) & 
        (df['Q_RecaptchaScore'] >= 0.5)
    )
    
    return df[passed_checks].copy()

num_passed_captcha = (participants_within_3_std['Q_RecaptchaScore'] >= 0.5).sum()
print(f"Number of participants who passed CAPTCHA: {num_passed_captcha}")

num_likely_bots = (participants_within_3_std['Q_RecaptchaScore'] < 0.5).sum()
print(f"Number of likely bots (reCAPTCHA score < 0.5): {num_likely_bots}")

df_filtered = exclude_failed_attention_checks_and_bots(participants_within_3_std)

Number of participants who passed CAPTCHA: 419
Number of likely bots (reCAPTCHA score < 0.5): 13


<h1>Analysis</h1>
<h3>1. Participants Demographics</h3>

In [4]:
import sys
sys.path.append("../..")  

from utils.mappings import MAPPINGS


def summarize_us_participants(df):
    total_n = len(df)
    mean_age = round(df['age'].mean(), 2)
    std_age = round(df['age'].std(), 2)
    
    gender_counts = df['gender'].map(MAPPINGS['gender']).value_counts()
    female = gender_counts.get('Female', 0)
    male = gender_counts.get('Male', 0)
    non_binary = gender_counts.get('Non-binary / third gender', 0)
    prefer_not_to_say = gender_counts.get('Prefer not to say', 0)

    return {
        'Country': 'USA',
        'N': total_n,
        'M': mean_age,
        'SD': std_age,
        'Female': female,
        'Male': male,
        'Non-binary / third gender': non_binary,
        'Prefer not to answer': prefer_not_to_say
    }

# Apply to both datasets
summary_all = summarize_us_participants(df)
summary_filtered = summarize_us_participants(df_filtered)

us_summary_table = pd.DataFrame([
    {'Group': 'All participants', **summary_all},
    {'Group': 'Participants eligible for analyses after exclusions', **summary_filtered}
])

us_summary_table


Unnamed: 0,Group,Country,N,M,SD,Female,Male,Non-binary / third gender,Prefer not to answer
0,All participants,USA,438,37.52,10.34,204,219,12,1
1,Participants eligible for analyses after exclu...,USA,373,37.71,10.28,173,188,11,1


<h3>2. Calculat Personality Traits (Mini-IPIP) Scores</h3>

In [5]:
def calculate_mini_ipip_scores(dataframe):
    traits = {
        "extraversion": ["pq_1", "pq_6_reverse_score", "pq_11", "pq_16_reverse_score"],
        "agreeableness": ["pq_2", "pq_7_reverse_score", "pq_12", "pq_17_reverse_score"],
        "conscientiousness": ["pq_3", "pq_8_reverse_score", "pq_13", "pq_18_reverse_score"],
        "neuroticism": ["pq_4", "pq_9_reverse_score", "pq_14", "pq_19_reverse_score"],
        "openness": ["pq_5", "pq_10_reverse_score", "pq_15_reverse_score", "pq_20_reverse_score"]
    }

    df = dataframe.copy()

    # Reverse scoring logic: Apply 6 - response for specific items
    for trait, questions in traits.items():
        for q in questions:
            if "reverse_score" in q:
                df[q] = 6 - df[q]

    # Calculate the mean score for each trait (1-5 scale)
    for trait, questions in traits.items():
        # Validate responses are within 1-5 range
        for q in questions:
            if not df[q].between(1, 5).all():
                print(f"Warning: Found values outside 1-5 range in {q}")
                df.loc[~df[q].between(1, 5), q] = np.nan
        
        # Calculate mean score
        df[f"{trait}_score"] = df[questions].mean(axis=1)

    return df

def drop_invalid_trait_scores(df):
    columns_to_drop = [
        "intellectImagination_score",
        "extraversion_score",
        "agreeableness_score",
        "conscientiousness_score",
        "neuroticism_score"
    ]

    df = df.drop(columns=columns_to_drop, errors='ignore')
    return df

def validate_scores(df):
    traits = ["extraversion", "agreeableness", "conscientiousness", 
              "neuroticism", "openness"]
    
    valid = True
    for trait in traits:
        score_col = f"{trait}_score"
        if not df[score_col].between(1, 5).all():
            print(f"Error: {trait} scores outside valid range (1-5)")
            valid = False
        
        if df[score_col].isna().any():
            print(f"Warning: Found missing values in {trait} scores")
            valid = False
    
    return valid

#calcualate new scores
df_filtered = calculate_mini_ipip_scores(drop_invalid_trait_scores(df_filtered.copy()))

#validate scores
if not validate_scores(df_filtered):
    raise ValueError("Score validation failed! Please check the input data for inconsistencies.")

df_filtered

Unnamed: 0,StartDate,EndDate,Status,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,DistributionChannel,UserLanguage,Q_RecaptchaScore,Q_RelevantIDDuplicate,Q_RelevantIDDuplicateScore,Q_RelevantIDFraudScore,Q_RelevantIDLastStartDate,Intro,gender,age,races_1,races_2,races_3,races_4,races_5,races_6,races_7,employment_status,marital_status,children,household_income,education_level,state,country,political_orientaton,political_orientaton_4_TEXT,republican_strength,democrat_strength,party_alignment,political_Ideology_1,voted,vote_for,shopping_freq,monthly_spend,brands_type_pref_1,brands_type_pref_2,brands_type_pref_3,brands_type_pref_4,brands_type_pref_5,brands_type_pref_6,vacation_travel_freq,brand_importance,discount_importance,buy_decision_style,device_used_to_buy_1,device_used_to_buy_2,device_used_to_buy_3,device_used_to_buy_5,social_m_influence,eco_friendly_imp,pq_1,pq_2,pq_3,pq_4,pq_5,pq_6_reverse_score,pq_7_reverse_score,pq_8_reverse_score,pq_9_reverse_score,pq_attention_check_1,pq_10_reverse_score,pq_11,pq_12,pq_13,pq_attention_check_2,pq_14,pq_15_reverse_score,pq_16_reverse_score,pq_17_reverse_score,pq_18_reverse_score,pq_19_reverse_score,pq_20_reverse_score,p_1_openness_item_1,p_1_openness_item_2,p_1_openness_item_3,p_1_openness_item_4,p_1_openness_item_5,p_1_openness_item_6,p_1_consc_item_1,p_1_consc_item_2,p_1_consc_item_3,p_1_consc_item_4,p_1_consc_item_5,p_1_consc_item_6,p_1_extr_item_1,p_1_extr_item_2,p_1_extr_item_3,p_1_extr_item_4,p_1_extr_item_5,p_1_extr_item_6,p_1_agree_item_1,p_1_agree_item_2,p_1_agree_item_3,p_1_agree_item_4,p_1_agree_item_5,p_1_agree_item_6,p_1_neuro_item_1,p_1_neuro_item_2,p_1_neuro_item_3,p_1_neuro_item_4,p_1_neuro_item_5,p_1_neuro_item_6,p_2_openness_item_1,p_2_openness_item_2,p_2_openness_item_3,p_2_openness_item_4,p_2_openness_item_5,p_2_openness_item_6,p_2_consc_item_1,p_2_consc_item_2,p_2_consc_item_3,p_2_consc_item_4,p_2_consc_item_5,p_2_consc_item_6,p_2_extr_item_1,p_2_extr_item_2,p_2_extr_item_3,p_2_extr_item_4,p_2_extr_item_5,p_2_extr_item_6,p_2_agree_item_1,p_2_agree_item_2,p_2_agree_item_3,p_2_agree_item_4,p_2_agree_item_5,p_2_agree_item_6,p_2_neuro_item_1,p_2_neuro_item_2,p_2_neuro_item_3,p_2_neuro_item_4,p_2_neuro_item_5,p_2_neuro_item_6,p_3_openness_item_1,p_3_openness_item_2,p_3_openness_item_3,p_3_openness_item_4,p_3_openness_item_5,p_3_openness_item_6,p_3_consc_item_1,p_3_consc_item_2,p_3_consc_item_3,p_3_consc_item_4,p_3_consc_item_5,p_3_consc_item_6,p_3_extr_item_1,p_3_extr_item_2,p_3_extr_item_3,p_3_extr_item_4,p_3_extr_item_5,p_3_extr_item_6,p_3_agree_item_1,p_3_agree_item_2,p_3_agree_item_3,p_3_agree_item_4,p_3_agree_item_5,p_3_agree_item_6,p_3_agree_att_check,p_3_neuro_item_1,p_3_neuro_item_2,p_3_neuro_item_3,p_3_neuro_item_4,p_3_neuro_item_5,p_3_neuro_item_6,post_shop_survey_q_1,post_shop_survey_q_2,post_shop_survey_q_3,post_shop_survey_q_4_1,post_shop_survey_q_4_2,post_shop_survey_q_4_3,post_shop_survey_q_4_4,post_shop_survey_q_4_5,post_shop_survey_q_4_6,post_shop_survey_q_4_6_TEXT,post_shop_q1,post_shop_q2,seriousness_check,Q_TotalDuration,DeviceIdentifier,source,UserAgent,PROLIFIC_PID,STUDY_ID,SESSION_ID,EarlyExit,CompleteExit,Duration,extraversion_score,agreeableness_score,conscientiousness_score,neuroticism_score,openness_score
0,2024-12-11 11:55:06,2024-12-11 12:05:02,0,100,595,1,2024-12-11 12:05:03,R_1RsxKBs5qCxppyg,anonymous,EN,1.0,,0,0,,2,3.0,25.0,1.0,,,,,,,1.0,5.0,0.0,1.0,2.0,50.0,,2.0,,,2.0,,1.0,1.0,2.0,3.0,2.0,,1.0,1.0,,,,1.0,2.0,4.0,2.0,1.0,,1.0,,1.0,2.0,1.0,3.0,3.0,3.0,4.0,4.0,4.0,2.0,2.0,5.0,5.0,1.0,2.0,3.0,2.0,2.0,5.0,1.0,4.0,2.0,4.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,3.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,4.0,3.0,3.0,2.0,2.0,4.0,4.0,3.0,3.0,3.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,2.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,3.0,2.0,1.0,2.0,4.0,4.0,3.0,3.0,2.0,2.0,2.0,1.0,2.0,2.0,2.0,1.0,4.0,4.0,3.0,4.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,4.0,3.0,4.0,3.0,2.0,2.0,3.0,4.0,1.0,,,1.0,,1.0,Practicality,2.0,5.0,2.0,595,,prolific_final,Mozilla/5.0 (Linux; Android 10; K) AppleWebKit...,60196ee67d099006f74c5bb3,675867a866926f4f99073407,67597d8c55c8dde7afa6c10a,,1.0,596.0,1.75,3.25,2.50,2.75,4.50
1,2024-12-11 11:50:07,2024-12-11 12:05:04,0,100,896,1,2024-12-11 12:05:05,R_6EfWDRRqI2QjBIw,anonymous,EN,0.6,,0,0,,2,2.0,53.0,1.0,,,,,,,4.0,1.0,2.0,3.0,2.0,34.0,,1.0,,1.0,,,6.0,1.0,1.0,2.0,2.0,,1.0,,,,,4.0,1.0,4.0,2.0,1.0,,1.0,,1.0,2.0,1.0,5.0,2.0,4.0,5.0,4.0,4.0,5.0,2.0,5.0,2.0,4.0,5.0,4.0,2.0,4.0,2.0,1.0,5.0,4.0,2.0,5.0,4.0,4.0,2.0,2.0,2.0,2.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,2.0,2.0,2.0,4.0,4.0,2.0,2.0,2.0,2.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,2.0,2.0,2.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,2.0,2.0,2.0,2.0,4.0,4.0,2.0,4.0,2.0,2.0,2.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,5.0,1.0,,1.0,1.0,,,,3.0,3.0,2.0,896,,prolific_final,Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:1...,664e46d7a85eca66b546f596,675867a866926f4f99073407,67597c65bfabd14e9d7328e3,,1.0,897.0,2.50,4.75,3.75,3.00,3.50
2,2024-12-11 11:57:42,2024-12-11 12:08:14,0,100,632,1,2024-12-11 12:08:15,R_3gi1rpD86szmDA0,anonymous,EN,0.9,,0,45,,2,1.0,24.0,,1.0,,,,,,1.0,1.0,2.0,2.0,6.0,33.0,,2.0,,,2.0,,4.0,1.0,1.0,2.0,3.0,,,,,1.0,,4.0,4.0,4.0,2.0,1.0,,,,5.0,4.0,4.0,4.0,3.0,4.0,4.0,3.0,3.0,3.0,2.0,5.0,2.0,5.0,5.0,3.0,2.0,3.0,2.0,2.0,1.0,2.0,2.0,1.0,4.0,4.0,4.0,4.0,4.0,5.0,4.0,5.0,3.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,4.0,4.0,4.0,4.0,4.0,4.0,5.0,4.0,2.0,2.0,4.0,3.0,5.0,3.0,4.0,4.0,3.0,4.0,5.0,5.0,4.0,5.0,4.0,5.0,4.0,4.0,4.0,3.0,5.0,5.0,4.0,4.0,4.0,3.0,3.0,4.0,4.0,5.0,4.0,3.0,5.0,4.0,4.0,4.0,4.0,4.0,3.0,4.0,4.0,5.0,4.0,4.0,5.0,3.0,4.0,4.0,4.0,4.0,4.0,3.0,4.0,4.0,4.0,4.0,5.0,4.0,2.0,4.0,4.0,4.0,5.0,4.0,4.0,4.0,3.0,4.0,1.0,,1.0,1.0,1.0,,,4.0,3.0,2.0,632,,prolific_final,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,665acb8be8d04df6693caf4a,675867a866926f4f99073407,67597e2038e326389e732951,,1.0,632.0,3.50,3.25,2.75,2.75,2.25
3,2024-12-11 11:52:51,2024-12-11 12:08:26,0,100,934,1,2024-12-11 12:08:27,R_3J4R9lB4hKre9fl,anonymous,EN,0.7,,0,0,,2,2.0,23.0,1.0,,,,,,,1.0,5.0,0.0,1.0,1.0,26.0,,3.0,,,,2.0,2.0,1.0,2.0,2.0,2.0,,1.0,,,,,1.0,2.0,4.0,2.0,1.0,,1.0,1.0,4.0,4.0,4.0,5.0,2.0,3.0,5.0,2.0,4.0,4.0,3.0,5.0,5.0,1.0,5.0,4.0,2.0,3.0,5.0,2.0,3.0,3.0,4.0,5.0,3.0,3.0,2.0,3.0,2.0,1.0,5.0,5.0,4.0,4.0,5.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,4.0,3.0,4.0,2.0,1.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,3.0,2.0,3.0,2.0,1.0,4.0,4.0,4.0,4.0,4.0,3.0,4.0,4.0,3.0,3.0,3.0,2.0,4.0,4.0,4.0,5.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,4.0,4.0,4.0,4.0,4.0,3.0,5.0,5.0,5.0,5.0,5.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,4.0,4.0,4.0,4.0,4.0,3.0,2.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,4.0,1.0,,1.0,1.0,1.0,,,3.0,3.0,2.0,934,,prolific_final,Mozilla/5.0 (iPhone; CPU iPhone OS 18_1_1 like...,611171b294ac05e4a3ff358b,675867a866926f4f99073407,67597cfac04d321c32f5190e,,1.0,935.0,2.25,4.25,3.25,3.25,5.00
4,2024-12-11 11:51:57,2024-12-11 12:08:38,0,100,1000,1,2024-12-11 12:08:38,R_1TXGUtGN3HLFBNJ,anonymous,EN,0.8,,0,0,,2,2.0,32.0,1.0,1.0,1.0,,,,,2.0,5.0,3.0,1.0,3.0,14.0,,5.0,,,,2.0,3.0,2.0,,3.0,2.0,1.0,1.0,,,1.0,,1.0,4.0,3.0,1.0,1.0,1.0,,,2.0,1.0,4.0,4.0,1.0,4.0,4.0,5.0,4.0,3.0,5.0,5.0,3.0,4.0,4.0,3.0,2.0,3.0,4.0,4.0,4.0,3.0,3.0,4.0,3.0,3.0,2.0,3.0,2.0,2.0,4.0,3.0,2.0,3.0,2.0,2.0,3.0,4.0,3.0,3.0,3.0,3.0,4.0,4.0,3.0,3.0,4.0,4.0,3.0,3.0,2.0,2.0,2.0,2.0,4.0,3.0,4.0,2.0,3.0,3.0,4.0,3.0,3.0,2.0,2.0,3.0,3.0,3.0,3.0,2.0,2.0,2.0,3.0,3.0,3.0,2.0,2.0,2.0,3.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,2.0,2.0,2.0,2.0,4.0,3.0,3.0,2.0,2.0,3.0,4.0,4.0,3.0,3.0,3.0,3.0,4.0,4.0,3.0,3.0,2.0,2.0,2.0,4.0,4.0,3.0,2.0,3.0,3.0,2.0,2.0,2.0,1.0,,1.0,,,,,3.0,3.0,2.0,1000,,prolific_final,Mozilla/5.0 (Linux; Android 10; K) AppleWebKit...,66a8e4f1997807db5221e94f,675867a866926f4f99073407,67597cd1d68ff59c993155aa,,1.0,1001.0,4.25,4.00,2.50,3.75,3.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433,2024-12-12 21:35:55,2024-12-12 21:43:30,0,100,454,1,2024-12-12 21:43:31,R_3ab18asv3p68USs,anonymous,EN,1.0,,0,0,,2,1.0,25.0,1.0,,,,,,,1.0,5.0,0.0,7.0,3.0,21.0,,3.0,,,,1.0,4.0,2.0,,3.0,2.0,,1.0,,,,,1.0,2.0,3.0,2.0,,1.0,,1.0,1.0,1.0,2.0,2.0,2.0,1.0,5.0,2.0,1.0,5.0,4.0,5.0,4.0,1.0,3.0,4.0,2.0,1.0,5.0,2.0,2.0,5.0,4.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,4.0,4.0,4.0,4.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,5.0,5.0,5.0,5.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,4.0,4.0,4.0,4.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,5.0,5.0,5.0,5.0,5.0,5.0,2.0,2.0,2.0,,,1.0,,,,,4.0,5.0,2.0,454,,prolific_final,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,60df7dd7029cff41ea8eed9b,675867a866926f4f99073407,675b55142275e60fc5450be7,,1.0,455.0,1.75,2.00,4.00,2.50,4.75
434,2024-12-12 21:29:43,2024-12-12 21:46:39,0,100,1015,1,2024-12-12 21:46:39,R_5fpfufbkb47NaWR,anonymous,EN,1.0,,0,20,,2,2.0,30.0,1.0,,,,,,,3.0,5.0,0.0,4.0,5.0,22.0,,2.0,,,1.0,,0.0,1.0,2.0,4.0,2.0,,1.0,,1.0,,1.0,2.0,2.0,3.0,1.0,1.0,,1.0,,2.0,5.0,3.0,5.0,3.0,5.0,5.0,3.0,5.0,1.0,5.0,5.0,2.0,5.0,5.0,4.0,2.0,4.0,1.0,2.0,5.0,2.0,5.0,5.0,2.0,3.0,1.0,2.0,2.0,2.0,4.0,4.0,3.0,4.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,2.0,2.0,5.0,4.0,4.0,4.0,5.0,5.0,4.0,4.0,4.0,4.0,4.0,4.0,5.0,5.0,5.0,5.0,4.0,4.0,2.0,3.0,3.0,2.0,2.0,1.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,4.0,4.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,1.0,1.0,1.0,1.0,1.0,2.0,5.0,4.0,5.0,5.0,5.0,5.0,2.0,4.0,4.0,5.0,4.0,4.0,5.0,4.0,4.0,5.0,,,,,,1.0,"transparency of functions, materials it was cr...",2.0,3.0,2.0,1015,,prolific_final,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,666559e634fffd1ce19ba654,675867a866926f4f99073407,675b551ba440a027141b577c,,1.0,1016.0,3.25,5.00,2.50,4.75,3.25
435,2024-12-12 21:28:01,2024-12-12 22:00:54,0,100,1972,1,2024-12-12 22:00:55,R_6CVeGzO2Dfn3Hvs,anonymous,EN,1.0,,0,30,,2,1.0,53.0,,1.0,,,,,,1.0,1.0,2.0,5.0,6.0,33.0,,2.0,,,1.0,,5.0,1.0,2.0,4.0,3.0,,,,,1.0,,2.0,4.0,3.0,2.0,1.0,,,,4.0,5.0,4.0,4.0,5.0,4.0,4.0,1.0,1.0,5.0,2.0,5.0,4.0,2.0,4.0,2.0,2.0,1.0,4.0,3.0,4.0,1.0,3.0,4.0,4.0,4.0,5.0,4.0,4.0,4.0,4.0,1.0,5.0,5.0,5.0,5.0,4.0,3.0,4.0,4.0,3.0,3.0,4.0,5.0,4.0,4.0,4.0,5.0,4.0,2.0,2.0,3.0,4.0,3.0,4.0,5.0,5.0,4.0,5.0,4.0,4.0,5.0,5.0,5.0,5.0,4.0,4.0,5.0,5.0,5.0,4.0,5.0,5.0,4.0,5.0,4.0,5.0,5.0,4.0,4.0,4.0,4.0,4.0,5.0,5.0,4.0,5.0,4.0,5.0,5.0,5.0,5.0,4.0,4.0,4.0,4.0,5.0,4.0,5.0,5.0,5.0,5.0,5.0,3.0,4.0,5.0,5.0,5.0,2.0,5.0,5.0,5.0,4.0,4.0,5.0,4.0,3.0,4.0,1.0,1.0,,,,,,4.0,3.0,2.0,1972,,prolific_final,Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:1...,66d620579e7ea624a7a63e19,675867a866926f4f99073407,675b55510fa0335725e0ac1d,,1.0,1973.0,2.50,3.25,3.25,2.50,4.00
436,2024-12-12 21:30:04,2024-12-12 22:05:21,0,100,2117,1,2024-12-12 22:05:22,R_7Yg671143HnCPK1,anonymous,EN,0.9,,0,35,,2,2.0,34.0,,1.0,,,,,,1.0,1.0,2.0,5.0,5.0,48.0,,1.0,,2.0,,,1.0,1.0,1.0,3.0,2.0,,1.0,,1.0,1.0,,2.0,4.0,4.0,2.0,1.0,,1.0,,4.0,6.0,5.0,5.0,5.0,1.0,5.0,2.0,3.0,4.0,2.0,5.0,4.0,3.0,4.0,4.0,2.0,1.0,4.0,3.0,5.0,5.0,3.0,5.0,4.0,4.0,4.0,5.0,4.0,4.0,4.0,4.0,4.0,5.0,5.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,5.0,4.0,5.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,5.0,4.0,4.0,4.0,4.0,4.0,5.0,4.0,5.0,4.0,4.0,4.0,4.0,3.0,4.0,3.0,4.0,4.0,4.0,5.0,5.0,5.0,4.0,5.0,5.0,5.0,4.0,5.0,5.0,5.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,5.0,2.0,5.0,5.0,5.0,5.0,4.0,5.0,5.0,5.0,5.0,,1.0,,1.0,,,,5.0,5.0,2.0,2117,,prolific_final,Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:1...,671829a403af514aabbac001,675867a866926f4f99073407,675b55bdfea5c9e3054828e0,,1.0,2117.0,3.25,4.25,4.50,1.75,4.50


<h3>3. Evaluate internal consistency (Cronbach's Alpha) of Mini-IPIP responses</h3>

In [6]:
import pingouin as pg

def calculate_mini_ipip_cronbach_alphas(df):
    alphas = {}
    
    trait_items = {
        "Extraversion": ['pq_1', 'pq_6_reverse_score', 'pq_11', 'pq_16_reverse_score'],
        "Agreeableness": ['pq_2', 'pq_7_reverse_score', 'pq_12', 'pq_17_reverse_score'],
        "Conscientiousness": ['pq_3', 'pq_8_reverse_score', 'pq_13', 'pq_18_reverse_score'],
        "Neuroticism": ['pq_4', 'pq_9_reverse_score', 'pq_14', 'pq_19_reverse_score'],
        "Openness": ['pq_5', 'pq_10_reverse_score', 'pq_15_reverse_score', 'pq_20_reverse_score']
    }

    for trait, item_columns in trait_items.items():
        # Subset the DataFrame to include only the columns of interest
        subset = df[item_columns]
        
        # Calculate Cronbach's alpha
        alpha = pg.cronbach_alpha(data=subset)
        
        # Store the alpha value
        alphas[trait] = alpha[0]

    return alphas

# Calculate Cronbach's alpha for all traits
alphas = calculate_mini_ipip_cronbach_alphas(df_filtered)

for trait, alpha in alphas.items():
    print(f"Cronbach's alpha for {trait}: {alpha:.3f}")

Cronbach's alpha for Extraversion: 0.834
Cronbach's alpha for Agreeableness: 0.768
Cronbach's alpha for Conscientiousness: 0.751
Cronbach's alpha for Neuroticism: 0.798
Cronbach's alpha for Openness: 0.776


<h3>4. Evaluate internal consistency (Cronbach's Alpha) for Advertisement Effectiveness Scores</h3>

In [7]:
import pingouin as pg

def calculate_aes_cronbach_alphas(df):
    alphas = {}    

    trait_items = {
        "Product 1 - Openness": ['p_1_openness_item_1', 'p_1_openness_item_2', 'p_1_openness_item_3', 'p_1_openness_item_4', 'p_1_openness_item_5', 'p_1_openness_item_6'],
        "Product 1 - Conscientiousness": ['p_1_consc_item_1', 'p_1_consc_item_2', 'p_1_consc_item_3', 'p_1_consc_item_4', 'p_1_consc_item_5', 'p_1_consc_item_6'],
        "Product 1 - Extraversion": ['p_1_extr_item_1', 'p_1_extr_item_2', 'p_1_extr_item_3', 'p_1_extr_item_4', 'p_1_extr_item_5', 'p_1_extr_item_6'],
        "Product 1 - Agreeableness": ['p_1_agree_item_1', 'p_1_agree_item_2', 'p_1_agree_item_3', 'p_1_agree_item_4', 'p_1_agree_item_5', 'p_1_agree_item_6'],
        "Product 1 - Neuroticism": ['p_1_neuro_item_1', 'p_1_neuro_item_2', 'p_1_neuro_item_3', 'p_1_neuro_item_4', 'p_1_neuro_item_5', 'p_1_neuro_item_6'],
        
        "Product 2 - Openness": ['p_2_openness_item_1', 'p_2_openness_item_2', 'p_2_openness_item_3', 'p_2_openness_item_4', 'p_2_openness_item_5', 'p_2_openness_item_6'],
        "Product 2 - Conscientiousness": ['p_2_consc_item_1', 'p_2_consc_item_2', 'p_2_consc_item_3', 'p_2_consc_item_4', 'p_2_consc_item_5', 'p_2_consc_item_6'],
        "Product 2 - Extraversion": ['p_2_extr_item_1', 'p_2_extr_item_2', 'p_2_extr_item_3', 'p_2_extr_item_4', 'p_2_extr_item_5', 'p_2_extr_item_6'],
        "Product 2 - Agreeableness": ['p_2_agree_item_1', 'p_2_agree_item_2', 'p_2_agree_item_3', 'p_2_agree_item_4', 'p_2_agree_item_5', 'p_2_agree_item_6'],
        "Product 2 - Neuroticism": ['p_2_neuro_item_1', 'p_2_neuro_item_2', 'p_2_neuro_item_3', 'p_2_neuro_item_4', 'p_2_neuro_item_5', 'p_2_neuro_item_6'],
        
        "Product 3 - Openness": ['p_3_openness_item_1', 'p_3_openness_item_2', 'p_3_openness_item_3', 'p_3_openness_item_4', 'p_3_openness_item_5', 'p_3_openness_item_6'],
        "Product 3 - Conscientiousness": ['p_3_consc_item_1', 'p_3_consc_item_2', 'p_3_consc_item_3', 'p_3_consc_item_4', 'p_3_consc_item_5', 'p_3_consc_item_6'],
        "Product 3 - Extraversion": ['p_3_extr_item_1', 'p_3_extr_item_2', 'p_3_extr_item_3', 'p_3_extr_item_4', 'p_3_extr_item_5', 'p_3_extr_item_6'],
        "Product 3 - Agreeableness": ['p_3_agree_item_1', 'p_3_agree_item_2', 'p_3_agree_item_3', 'p_3_agree_item_4', 'p_3_agree_item_5', 'p_3_agree_item_6'],
        "Product 3 - Neuroticism": ['p_3_neuro_item_1', 'p_3_neuro_item_2', 'p_3_neuro_item_3', 'p_3_neuro_item_4', 'p_3_neuro_item_5', 'p_3_neuro_item_6'],
    }

    for trait, item_columns in trait_items.items():
        # Subset the DataFrame to include only the columns of interest
        subset = df[item_columns]
        
        # Check if all required columns are in the DataFrame
        if not all(col in df.columns for col in item_columns):
            print("ALERT!!!")
            alphas[trait] = None  # Assign None if any column is missing
            continue

        # Calculate Cronbach's alpha using pingouin
        alpha = pg.cronbach_alpha(data=subset)
        
        # Store the alpha value in the dictionary
        alphas[trait] = alpha[0]

    return alphas



alphas = calculate_aes_cronbach_alphas(df_filtered)

for product, alpha in alphas.items():
    if alpha is not None:
        print(f"Cronbach's Alpha for {product}: {alpha:.3f}")
    else:
        print(f"Cronbach's Alpha for {product}: Data missing")


Cronbach's Alpha for Product 1 - Openness: 0.967
Cronbach's Alpha for Product 1 - Conscientiousness: 0.940
Cronbach's Alpha for Product 1 - Extraversion: 0.967
Cronbach's Alpha for Product 1 - Agreeableness: 0.963
Cronbach's Alpha for Product 1 - Neuroticism: 0.936
Cronbach's Alpha for Product 2 - Openness: 0.968
Cronbach's Alpha for Product 2 - Conscientiousness: 0.952
Cronbach's Alpha for Product 2 - Extraversion: 0.971
Cronbach's Alpha for Product 2 - Agreeableness: 0.963
Cronbach's Alpha for Product 2 - Neuroticism: 0.943
Cronbach's Alpha for Product 3 - Openness: 0.963
Cronbach's Alpha for Product 3 - Conscientiousness: 0.949
Cronbach's Alpha for Product 3 - Extraversion: 0.968
Cronbach's Alpha for Product 3 - Agreeableness: 0.956
Cronbach's Alpha for Product 3 - Neuroticism: 0.942


<h3>5. Aggregate Advertisement Effectiveness Scores (AES) scores to derive Dependent Variables: AES by trait and product</h3>

In [8]:
from sklearn.linear_model import LinearRegression
import numpy as np

def calculate_raw_and_residualized_aes(dataframe):
    product_numbers = [1, 2, 3]
    
    traits = {
        "openness": "openness",
        "conscientiousness": "consc",
        "extraversion": "extr",
        "agreeableness": "agree",
        "neuroticism": "neuro"
    }

    for product_num in product_numbers:
        # Dictionary to store raw AES columns for each trait for the current product
        aes_columns = {}

        # Step 1: Calculate Raw AES
        for trait_name, trait_prefix in traits.items():
            # Find all relevant item columns for this product and trait
            target_columns = [
                col for col in dataframe.columns
                if col.startswith(f"p_{product_num}_{trait_prefix}_item_")
            ]
            
            if target_columns:
                # Calculate raw AES as the mean of relevant columns
                dataframe[f"aes_{product_num}_{trait_name}"] = dataframe[target_columns].mean(axis=1, skipna=True)
                aes_columns[trait_name] = dataframe[f"aes_{product_num}_{trait_name}"]
            else:
                print(f"No valid columns found for product {product_num}, trait {trait_name}!")

        # Step 2: Calculate Residualized AES
        for target_trait, aes_target in aes_columns.items():
            # Use raw AES scores of other traits as predictors
            predictors = [
                aes_columns[other_trait]
                for other_trait in traits.keys()
                if other_trait != target_trait and other_trait in aes_columns
            ]

            if predictors:
                # Stack predictors into a matrix
                predictors_matrix = np.column_stack(predictors)
                
                # Perform regression to calculate residuals
                regression_model = LinearRegression()
                regression_model.fit(predictors_matrix, aes_target)
                residuals = aes_target - regression_model.predict(predictors_matrix)
                
                # Save residualized AES
                dataframe[f"aes_resd_{product_num}_{target_trait}"] = residuals
            else:
                # If no predictors are available, retain raw AES
                dataframe[f"aes_{product_num}_{target_trait}"] = aes_target
                print(f"Could not residualize AES for product {product_num}, trait {target_trait} due to missing predictors.")

    return dataframe

df_filtered = calculate_raw_and_residualized_aes(df_filtered.copy())


<h3>6. Regression Analysis: using Big Five personality traits to predict respondents’ AES scores</h3>

In [9]:
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

def perform_regression_on_personality(df, aes_type='raw'):
    # Personality trait columns (predictors)
    traits_cols = [
        'extraversion_score',
        'agreeableness_score',
        'conscientiousness_score',
        'neuroticism_score',
        'openness_score'
    ]

    if aes_type == 'raw':
        aes_sufix = 'aes'
    elif aes_type == 'resd':
        aes_sufix = 'aes_resd'

    # AES columns grouped by product
    product_aes_cols = {
        "p1": [f'{aes_sufix}_1_extraversion', f'{aes_sufix}_1_agreeableness', f'{aes_sufix}_1_conscientiousness', f'{aes_sufix}_1_neuroticism', f'{aes_sufix}_1_openness'],
        "p2": [f'{aes_sufix}_2_extraversion', f'{aes_sufix}_2_agreeableness', f'{aes_sufix}_2_conscientiousness', f'{aes_sufix}_2_neuroticism', f'{aes_sufix}_2_openness'],
        "p3": [f'{aes_sufix}_3_extraversion', f'{aes_sufix}_3_agreeableness', f'{aes_sufix}_3_conscientiousness', f'{aes_sufix}_3_neuroticism', f'{aes_sufix}_3_openness']
    }

    results_dict = {}

    for product, aes_cols in product_aes_cols.items():
        # Create a results DataFrame for this product
        results = pd.DataFrame(index=traits_cols, columns=aes_cols)

        for aes_col in aes_cols:
            # Standardize predictors (traits) and outcome (AES)
            scaler = StandardScaler()
            X = scaler.fit_transform(df[traits_cols])
            y = scaler.fit_transform(df[[aes_col]]).flatten()

            # Add constant to predictors
            X = sm.add_constant(X)

            # Fit regression model
            model = sm.OLS(y, X).fit()

            # Extract coefficients and p-values (skip constant)
            coefficients = model.params[1:]
            p_values = model.pvalues[1:]

            # Store results as standardized beta coefficients with p-values
            results[aes_col] = [
                f"{0.00 if round(coeff, 2) == 0 else coeff:.2f} ({pval:.4f})"
                for coeff, pval in zip(coefficients, p_values)
            ]

        # Save this product's results
        results_dict[product] = results

    # Return results for all three products
    return results_dict["p1"], results_dict["p2"], results_dict["p3"]


# Raw AES
raw_results_p1, raw_results_p2, raw_results_p3 = perform_regression_on_personality(df_filtered, aes_type='raw')

print("Human Participants: Regression Coefficient Matrix – P1: Cabin luggage")
display(raw_results_p1)
print("Human Participants: Regression Coefficient – P2: Packing Cubes")
display(raw_results_p2)
print("Human Participants: Regression Coefficient – P3:Water Bottle")
display(raw_results_p3)

# # Residualized AES
resd_results_p1, resd_results_p2, resd_results_p3 = perform_regression_on_personality(df_filtered, aes_type='resd')

print("Human Participants: Regression Coefficient Matrix – P1: Cabin luggage (Residualized AES)")
display(resd_results_p1)
print("Human Participants: Regression Coefficient – P2: Packing Cubes (Residualized AES)")
display(resd_results_p2)
print("Human Participants: Regression Coefficient – P3:Water Bottle (Residualized AES)")
display(resd_results_p3)


Human Participants: Regression Coefficient Matrix – P1: Cabin luggage


Unnamed: 0,aes_1_extraversion,aes_1_agreeableness,aes_1_conscientiousness,aes_1_neuroticism,aes_1_openness
extraversion_score,0.24 (0.0000),0.02 (0.6723),0.08 (0.1396),0.01 (0.8954),0.28 (0.0000)
agreeableness_score,0.08 (0.1520),0.29 (0.0000),0.12 (0.0307),0.17 (0.0031),0.10 (0.0719)
conscientiousness_score,0.08 (0.1612),0.04 (0.4097),0.08 (0.1621),0.09 (0.1206),0.12 (0.0213)
neuroticism_score,-0.08 (0.1458),-0.01 (0.8141),-0.12 (0.0313),-0.04 (0.5280),-0.05 (0.3301)
openness_score,-0.13 (0.0092),-0.12 (0.0223),-0.01 (0.8162),-0.03 (0.5951),-0.12 (0.0178)


Human Participants: Regression Coefficient – P2: Packing Cubes


Unnamed: 0,aes_2_extraversion,aes_2_agreeableness,aes_2_conscientiousness,aes_2_neuroticism,aes_2_openness
extraversion_score,0.26 (0.0000),0.01 (0.8211),0.14 (0.0125),0.03 (0.5468),0.24 (0.0000)
agreeableness_score,0.08 (0.1529),0.40 (0.0000),0.08 (0.1633),0.17 (0.0029),0.10 (0.0766)
conscientiousness_score,0.14 (0.0103),0.09 (0.0934),0.04 (0.5074),0.09 (0.1063),0.10 (0.0613)
neuroticism_score,-0.04 (0.4570),0.00 (0.9579),-0.06 (0.2976),-0.04 (0.5270),-0.06 (0.2979)
openness_score,-0.08 (0.1028),-0.05 (0.2776),0.07 (0.1924),0.02 (0.7435),-0.01 (0.8034)


Human Participants: Regression Coefficient – P3:Water Bottle


Unnamed: 0,aes_3_extraversion,aes_3_agreeableness,aes_3_conscientiousness,aes_3_neuroticism,aes_3_openness
extraversion_score,0.28 (0.0000),0.06 (0.2957),0.13 (0.0161),0.10 (0.0663),0.23 (0.0000)
agreeableness_score,0.02 (0.6853),0.30 (0.0000),0.12 (0.0361),0.20 (0.0003),0.06 (0.2674)
conscientiousness_score,0.11 (0.0396),0.06 (0.2450),0.10 (0.0563),0.04 (0.4996),0.16 (0.0025)
neuroticism_score,-0.01 (0.8704),-0.01 (0.8439),-0.04 (0.4473),-0.16 (0.0035),0.05 (0.3474)
openness_score,-0.06 (0.2344),-0.07 (0.1907),0.05 (0.3895),-0.04 (0.4156),-0.05 (0.3044)


Human Participants: Regression Coefficient Matrix – P1: Cabin luggage (Residualized AES)


Unnamed: 0,aes_resd_1_extraversion,aes_resd_1_agreeableness,aes_resd_1_conscientiousness,aes_resd_1_neuroticism,aes_resd_1_openness
extraversion_score,0.10 (0.0702),-0.13 (0.0184),0.02 (0.6794),-0.04 (0.4349),0.19 (0.0007)
agreeableness_score,-0.04 (0.5058),0.26 (0.0000),0.02 (0.6687),0.10 (0.0682),-0.03 (0.6449)
conscientiousness_score,-0.01 (0.8683),-0.02 (0.7351),0.02 (0.7806),0.06 (0.3253),0.09 (0.1062)
neuroticism_score,-0.06 (0.3035),0.03 (0.6298),-0.11 (0.0551),0.03 (0.6075),0.01 (0.8456)
openness_score,-0.06 (0.2288),-0.06 (0.2399),0.03 (0.5413),-0.01 (0.8650),-0.03 (0.5706)


Human Participants: Regression Coefficient – P2: Packing Cubes (Residualized AES)


Unnamed: 0,aes_resd_2_extraversion,aes_resd_2_agreeableness,aes_resd_2_conscientiousness,aes_resd_2_neuroticism,aes_resd_2_openness
extraversion_score,0.17 (0.0020),-0.15 (0.0052),0.13 (0.0245),-0.08 (0.1329),0.13 (0.0204)
agreeableness_score,-0.09 (0.1108),0.39 (0.0000),-0.07 (0.2108),0.09 (0.1257),-0.03 (0.6286)
conscientiousness_score,0.08 (0.1361),0.02 (0.7268),-0.03 (0.6069),0.05 (0.3303),0.01 (0.8487)
neuroticism_score,-0.01 (0.8453),0.04 (0.4995),-0.05 (0.4213),0.00 (0.9878),-0.04 (0.4855)
openness_score,-0.09 (0.0960),-0.05 (0.3453),0.08 (0.1161),0.00 (0.9641),0.04 (0.4109)


Human Participants: Regression Coefficient – P3:Water Bottle (Residualized AES)


Unnamed: 0,aes_resd_3_extraversion,aes_resd_3_agreeableness,aes_resd_3_conscientiousness,aes_resd_3_neuroticism,aes_resd_3_openness
extraversion_score,0.19 (0.0008),-0.07 (0.1996),0.03 (0.5425),0.01 (0.8137),0.07 (0.1904)
agreeableness_score,-0.09 (0.1241),0.26 (0.0000),0.00 (0.9521),0.10 (0.0707),-0.01 (0.8831)
conscientiousness_score,0.01 (0.8169),0.00 (0.9879),0.05 (0.3336),-0.03 (0.5588),0.11 (0.0490)
neuroticism_score,-0.02 (0.6594),0.04 (0.5273),0.00 (0.9999),-0.17 (0.0025),0.09 (0.1161)
openness_score,-0.03 (0.5197),-0.06 (0.2909),0.10 (0.0711),-0.04 (0.4814),-0.03 (0.6229)


<h3>7. Compute Pearson Correaltions for Fisher's z test </h3>

In [10]:
import pandas as pd
import pingouin as pg

def compute_correlations_with_pvalues(df, aes_type='raw'):
    # Big Five traits
    traits_cols = [
        'extraversion_score',
        'agreeableness_score',
        'conscientiousness_score',
        'neuroticism_score',
        'openness_score'
    ]

    # AES suffix
    if aes_type == 'raw':
        aes_suffix = 'aes'
    elif aes_type == 'resd':
        aes_suffix = 'aes_resd'
    else:
        raise ValueError("aes_type must be 'raw' or 'resd'.")

    # AES columns grouped by product
    product_aes_cols = {
        "p1": [f'{aes_suffix}_1_extraversion', f'{aes_suffix}_1_agreeableness',
               f'{aes_suffix}_1_conscientiousness', f'{aes_suffix}_1_neuroticism', f'{aes_suffix}_1_openness'],
        "p2": [f'{aes_suffix}_2_extraversion', f'{aes_suffix}_2_agreeableness',
               f'{aes_suffix}_2_conscientiousness', f'{aes_suffix}_2_neuroticism', f'{aes_suffix}_2_openness'],
        "p3": [f'{aes_suffix}_3_extraversion', f'{aes_suffix}_3_agreeableness',
               f'{aes_suffix}_3_conscientiousness', f'{aes_suffix}_3_neuroticism', f'{aes_suffix}_3_openness']
    }

    results = {}
    p_vals = {}

    for product, aes_cols in product_aes_cols.items():
        correlation_results = pd.DataFrame(index=traits_cols, columns=aes_cols)

        p_vals_product = []

        for trait_col in traits_cols:
            for aes_col in aes_cols:
                corr_res = pg.corr(df[trait_col], df[aes_col])
                correlation = corr_res["r"].values[0]
                p_value = corr_res["p-val"].values[0]
                correlation_results.loc[trait_col, aes_col] = f"{correlation:.2f} (p={p_value:.6f})"
                p_vals_product.append(p_value)

        results[product] = correlation_results
        p_vals[product] = p_vals_product

    return results, p_vals

results, p_vals = compute_correlations_with_pvalues(df_filtered, aes_type='raw')


# Display results
print("Pearson Correlations with p-values - Product 1")
display(results["p1"])
print("Pearson Correlations with p-values - Product 2")
display(results["p2"])
print("Pearson Correlations with p-values - Product 3")
display(results["p3"])

Pearson Correlations with p-values - Product 1


Unnamed: 0,aes_1_extraversion,aes_1_agreeableness,aes_1_conscientiousness,aes_1_neuroticism,aes_1_openness
extraversion_score,0.27 (p=0.000000),0.11 (p=0.036678),0.15 (p=0.003895),0.08 (p=0.145869),0.32 (p=0.000000)
agreeableness_score,0.13 (p=0.015299),0.27 (p=0.000000),0.15 (p=0.004564),0.17 (p=0.001145),0.16 (p=0.001403)
conscientiousness_score,0.14 (p=0.005999),0.07 (p=0.151425),0.14 (p=0.006310),0.11 (p=0.028949),0.18 (p=0.000338)
neuroticism_score,-0.13 (p=0.009492),-0.02 (p=0.755569),-0.16 (p=0.002130),-0.06 (p=0.230070),-0.13 (p=0.010839)
openness_score,-0.08 (p=0.141616),-0.04 (p=0.405630),0.04 (p=0.454949),0.02 (p=0.739400),-0.05 (p=0.297600)


Pearson Correlations with p-values - Product 2


Unnamed: 0,aes_2_extraversion,aes_2_agreeableness,aes_2_conscientiousness,aes_2_neuroticism,aes_2_openness
extraversion_score,0.30 (p=0.000000),0.15 (p=0.004083),0.19 (p=0.000273),0.11 (p=0.037107),0.29 (p=0.000000)
agreeableness_score,0.15 (p=0.003724),0.40 (p=0.000000),0.14 (p=0.006075),0.19 (p=0.000259),0.18 (p=0.000554)
conscientiousness_score,0.19 (p=0.000200),0.12 (p=0.020933),0.08 (p=0.117623),0.12 (p=0.021403),0.16 (p=0.002011)
neuroticism_score,-0.13 (p=0.015041),-0.02 (p=0.667374),-0.10 (p=0.056247),-0.07 (p=0.167941),-0.13 (p=0.011305)
openness_score,-0.03 (p=0.614649),0.05 (p=0.342520),0.11 (p=0.030155),0.07 (p=0.200023),0.05 (p=0.349856)


Pearson Correlations with p-values - Product 3


Unnamed: 0,aes_3_extraversion,aes_3_agreeableness,aes_3_conscientiousness,aes_3_neuroticism,aes_3_openness
extraversion_score,0.30 (p=0.000000),0.16 (p=0.002590),0.20 (p=0.000135),0.19 (p=0.000212),0.25 (p=0.000001)
agreeableness_score,0.11 (p=0.037742),0.31 (p=0.000000),0.18 (p=0.000576),0.22 (p=0.000017),0.13 (p=0.009089)
conscientiousness_score,0.15 (p=0.003172),0.10 (p=0.063146),0.15 (p=0.004896),0.12 (p=0.016668),0.18 (p=0.000520)
neuroticism_score,-0.09 (p=0.075096),-0.03 (p=0.562831),-0.10 (p=0.044500),-0.18 (p=0.000417),-0.04 (p=0.419446)
openness_score,-0.02 (p=0.744384),0.02 (p=0.743352),0.10 (p=0.065940),0.04 (p=0.497208),-0.01 (p=0.810244)


<h3> 8. Store p-values for for p-val FDR correction</h3>

In [11]:
import json

pvals = {
    "p1_human_correlatins_analysis": p_vals["p1"],
    "p2_human_correlatins_analysis": p_vals["p2"],
    "p3_human_correlatins_analysis": p_vals["p3"],
}

# save pvals
with open("../p_value_correction/human_correlations_pvals.json", "w") as f:
    json.dump(pvals, f, indent=4)