<h1>GPT-5</h1>

<h1>Imports</h1>

In [20]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr, norm, chi2
import pingouin as pg
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

<h1>Evaluate Internal Consistency of Six 5-Point Likert Scale Items on Advertisement Effectiveness Using Cronbach's Alpha</h1>

In [21]:
def calculate_aes_cronbach_alphas(df):

    alphas = {}

    trait_items = {
        "Product 1 - Openness": ['p_1_openness_item_1', 'p_1_openness_item_2', 'p_1_openness_item_3', 'p_1_openness_item_4', 'p_1_openness_item_5', 'p_1_openness_item_6'],
        "Product 1 - Conscientiousness": ['p_1_consc_item_1', 'p_1_consc_item_2', 'p_1_consc_item_3', 'p_1_consc_item_4', 'p_1_consc_item_5', 'p_1_consc_item_6'],
        "Product 1 - Extraversion": ['p_1_extr_item_1', 'p_1_extr_item_2', 'p_1_extr_item_3', 'p_1_extr_item_4', 'p_1_extr_item_5', 'p_1_extr_item_6'],
        "Product 1 - Agreeableness": ['p_1_agree_item_1', 'p_1_agree_item_2', 'p_1_agree_item_3', 'p_1_agree_item_4', 'p_1_agree_item_5', 'p_1_agree_item_6'],
        "Product 1 - Neuroticism": ['p_1_neuro_item_1', 'p_1_neuro_item_2', 'p_1_neuro_item_3', 'p_1_neuro_item_4', 'p_1_neuro_item_5', 'p_1_neuro_item_6'],
        
        "Product 2 - Openness": ['p_2_openness_item_1', 'p_2_openness_item_2', 'p_2_openness_item_3', 'p_2_openness_item_4', 'p_2_openness_item_5', 'p_2_openness_item_6'],
        "Product 2 - Conscientiousness": ['p_2_consc_item_1', 'p_2_consc_item_2', 'p_2_consc_item_3', 'p_2_consc_item_4', 'p_2_consc_item_5', 'p_2_consc_item_6'],
        "Product 2 - Extraversion": ['p_2_extr_item_1', 'p_2_extr_item_2', 'p_2_extr_item_3', 'p_2_extr_item_4', 'p_2_extr_item_5', 'p_2_extr_item_6'],
        "Product 2 - Agreeableness": ['p_2_agree_item_1', 'p_2_agree_item_2', 'p_2_agree_item_3', 'p_2_agree_item_4', 'p_2_agree_item_5', 'p_2_agree_item_6'],
        "Product 2 - Neuroticism": ['p_2_neuro_item_1', 'p_2_neuro_item_2', 'p_2_neuro_item_3', 'p_2_neuro_item_4', 'p_2_neuro_item_5', 'p_2_neuro_item_6'],
        
        "Product 3 - Openness": ['p_3_openness_item_1', 'p_3_openness_item_2', 'p_3_openness_item_3', 'p_3_openness_item_4', 'p_3_openness_item_5', 'p_3_openness_item_6'],
        "Product 3 - Conscientiousness": ['p_3_consc_item_1', 'p_3_consc_item_2', 'p_3_consc_item_3', 'p_3_consc_item_4', 'p_3_consc_item_5', 'p_3_consc_item_6'],
        "Product 3 - Extraversion": ['p_3_extr_item_1', 'p_3_extr_item_2', 'p_3_extr_item_3', 'p_3_extr_item_4', 'p_3_extr_item_5', 'p_3_extr_item_6'],
        "Product 3 - Agreeableness": ['p_3_agree_item_1', 'p_3_agree_item_2', 'p_3_agree_item_3', 'p_3_agree_item_4', 'p_3_agree_item_5', 'p_3_agree_item_6'],
        "Product 3 - Neuroticism": ['p_3_neuro_item_1', 'p_3_neuro_item_2', 'p_3_neuro_item_3', 'p_3_neuro_item_4', 'p_3_neuro_item_5', 'p_3_neuro_item_6'],
    }

    for trait, item_columns in trait_items.items():
        # Subset the DataFrame to include only the columns of interest
        subset = df[item_columns]
        
        # Check if all required columns are in the DataFrame
        if not all(col in df.columns for col in item_columns):
            raise("ALERT!!!")
            alphas[trait] = None  # Assign None if any column is missing
            continue

        # Calculate Cronbach's alpha using pingouin
        alpha = pg.cronbach_alpha(data=subset)
        
        # Store the alpha value in the dictionary
        alphas[trait] = alpha[0]

    return alphas

df_with_personality_scores = pd.read_csv('../../data/synthetic_participants_gpt_5_latest.csv')
alphas = calculate_aes_cronbach_alphas(df_with_personality_scores)

for product, alpha in alphas.items():
    if alpha is not None:
        print(f"Cronbach's Alpha for {product}: {alpha:.3f}")
    else:
        print(f"Cronbach's Alpha for {product}: Data missing")



Cronbach's Alpha for Product 1 - Openness: 0.950
Cronbach's Alpha for Product 1 - Conscientiousness: 0.883
Cronbach's Alpha for Product 1 - Extraversion: 0.959
Cronbach's Alpha for Product 1 - Agreeableness: 0.961
Cronbach's Alpha for Product 1 - Neuroticism: 0.878
Cronbach's Alpha for Product 2 - Openness: 0.965
Cronbach's Alpha for Product 2 - Conscientiousness: 0.880
Cronbach's Alpha for Product 2 - Extraversion: 0.949
Cronbach's Alpha for Product 2 - Agreeableness: 0.981
Cronbach's Alpha for Product 2 - Neuroticism: 0.906
Cronbach's Alpha for Product 3 - Openness: 0.973
Cronbach's Alpha for Product 3 - Conscientiousness: 0.940
Cronbach's Alpha for Product 3 - Extraversion: 0.963
Cronbach's Alpha for Product 3 - Agreeableness: 0.981
Cronbach's Alpha for Product 3 - Neuroticism: 0.902


<h1>Aggregate Advertisement Effectiveness Scores (AES) Responses to Derive Dependent Variables: AES by Trait and Product</h1>

In [22]:
from sklearn.linear_model import LinearRegression
import numpy as np

def calculate_raw_and_residualized_aes(dataframe):
    product_numbers = [1, 2, 3]
    traits = {
        "openness": "openness",
        "conscientiousness": "consc",
        "extraversion": "extr",
        "agreeableness": "agree",
        "neuroticism": "neuro"
    }

    for product_num in product_numbers:
        # Dictionary to store raw AES columns for each trait for the current product
        raw_aes_columns = {}

        # Step 1: Calculate Raw AES
        for trait_name, trait_prefix in traits.items():
            # Find all relevant item columns for this product and trait
            target_columns = [
                col for col in dataframe.columns
                if col.startswith(f"p_{product_num}_{trait_prefix}_item_")
            ]
            
            if target_columns:
                # Calculate raw AES as the mean of relevant columns
                dataframe[f"raw_aes_{product_num}_{trait_name}"] = dataframe[target_columns].mean(axis=1, skipna=True)
                raw_aes_columns[trait_name] = dataframe[f"raw_aes_{product_num}_{trait_name}"]
            else:
                print(f"No valid columns found for product {product_num}, trait {trait_name}!")

        # Step 2: Calculate Residualized AES
        for target_trait, raw_aes_target in raw_aes_columns.items():
            # Use raw AES scores of other traits as predictors
            predictors = [
                raw_aes_columns[other_trait]
                for other_trait in traits.keys()
                if other_trait != target_trait and other_trait in raw_aes_columns
            ]

            if predictors:
                # Stack predictors into a matrix
                predictors_matrix = np.column_stack(predictors)
                
                # Perform regression to calculate residuals
                regression_model = LinearRegression()
                regression_model.fit(predictors_matrix, raw_aes_target)
                residuals = raw_aes_target - regression_model.predict(predictors_matrix)
                
                # Save residualized AES
                dataframe[f"aes_{product_num}_{target_trait}"] = residuals
            else:
                # If no predictors are available, retain raw AES
                dataframe[f"aes_{product_num}_{target_trait}"] = raw_aes_target
                print(f"Could not residualize AES for product {product_num}, trait {target_trait} due to missing predictors.")

    return dataframe

df_aes_with_aes_residualized = calculate_raw_and_residualized_aes(df_with_personality_scores)

df_aes_with_aes_residualized

#store the dataset into a new csv file that we can use to construct synthetic twins
# df_aes_with_aes_residualized.to_csv('../data/synth_twins_gpt_4o_dataset.csv')


Unnamed: 0,p_1_agree_item_1,p_1_agree_item_2,p_1_agree_item_3,p_1_agree_item_4,p_1_agree_item_5,p_1_agree_item_6,p_1_consc_item_1,p_1_consc_item_2,p_1_consc_item_3,p_1_consc_item_4,...,raw_aes_3_openness,raw_aes_3_conscientiousness,raw_aes_3_extraversion,raw_aes_3_agreeableness,raw_aes_3_neuroticism,aes_3_openness,aes_3_conscientiousness,aes_3_extraversion,aes_3_agreeableness,aes_3_neuroticism
0,4,4,4,4,4,4,4,4,4,4,...,4.000000,4.000000,3.166667,4.000000,4.000000,0.355473,-0.026191,-0.299219,-0.053342,-0.053293
1,3,3,2,3,3,3,3,4,2,3,...,3.166667,4.000000,2.000000,3.000000,4.000000,0.179114,0.079268,-0.863000,-0.432667,-0.001742
2,3,3,3,4,3,3,4,4,3,4,...,4.666667,4.000000,4.000000,3.333333,4.333333,0.785371,-0.233451,0.109462,-0.933966,0.143509
3,2,4,2,4,4,3,4,5,3,5,...,4.833333,4.666667,5.000000,3.166667,5.000000,0.265316,-0.096283,0.870244,-1.624757,0.228938
4,3,4,2,4,4,3,4,4,2,4,...,4.000000,4.000000,2.833333,3.666667,4.000000,0.554769,-0.001519,-0.572352,-0.312034,-0.053584
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
368,4,5,4,5,4,4,5,5,4,5,...,4.333333,4.333333,4.000000,4.833333,4.333333,0.004182,-0.063644,0.122269,0.335460,0.018060
369,5,5,5,5,5,5,4,5,4,5,...,5.000000,5.000000,4.000000,5.000000,4.833333,0.309312,0.111657,-0.326908,-0.143830,0.004257
370,5,5,5,5,5,5,4,4,4,4,...,4.000000,5.000000,4.000000,5.000000,4.500000,-0.587103,0.451971,0.387058,0.027300,-0.279927
371,4,4,4,4,4,4,4,5,4,4,...,4.000000,4.000000,3.000000,3.500000,4.000000,0.536245,0.025952,-0.375586,-0.516021,-0.076659


<h1>Regression Analysis: Scores on the Big Five Traits as Predictors of Respondents' Ratings of the Advertisements' Effectivenes</h1>

In [23]:
%%time

import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

def perform_regression_on_personality(df, aes_cols, traits_cols):
    results = pd.DataFrame(index=traits_cols, columns=aes_cols)

    for aes_col in aes_cols:
        # Standardize predictors (traits) only
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(df[traits_cols])
        y = scaler.fit_transform(df[[aes_col]]).flatten()
        
        # Add constant
        X_scaled = sm.add_constant(X_scaled)

        # Fit OLS model
        model = sm.OLS(y, X_scaled).fit()

        # Extract beta coefficients and p-values (excluding constant)
        coefficients = model.params[1:]
        p_values = model.pvalues[1:]

        results[aes_col] = [f"{beta:.2f} (p={p:.4f})" for beta, p in zip(coefficients, p_values)]

    return results
    
# Define AES and personality trait columns
aes_cols_product1 = ['aes_1_extraversion', 'aes_1_agreeableness', 'aes_1_conscientiousness', 'aes_1_neuroticism', 'aes_1_openness']
aes_cols_product2 = ['aes_2_extraversion', 'aes_2_agreeableness', 'aes_2_conscientiousness', 'aes_2_neuroticism', 'aes_2_openness']
aes_cols_product3 = ['aes_3_extraversion', 'aes_3_agreeableness', 'aes_3_conscientiousness', 'aes_3_neuroticism', 'aes_3_openness']

personality_cols = ['extraversion_score', 'agreeableness_score', 'conscientiousness_score', 'neuroticism_score', 'openness_score']

# Run the analysis for each product
regression_results = {
    1: perform_regression_on_personality(df_aes_with_aes_residualized, aes_cols_product1, personality_cols),
    2: perform_regression_on_personality(df_aes_with_aes_residualized, aes_cols_product2, personality_cols),
    3: perform_regression_on_personality(df_aes_with_aes_residualized, aes_cols_product3, personality_cols),
}

regression_results 

CPU times: user 19.1 ms, sys: 1.83 ms, total: 21 ms
Wall time: 20.1 ms


{1:                         aes_1_extraversion aes_1_agreeableness  \
 extraversion_score         0.41 (p=0.0000)    -0.12 (p=0.0200)   
 agreeableness_score        0.03 (p=0.4911)     0.31 (p=0.0000)   
 conscientiousness_score   -0.20 (p=0.0001)     0.09 (p=0.0887)   
 neuroticism_score         -0.01 (p=0.7934)     0.21 (p=0.0001)   
 openness_score            -0.35 (p=0.0000)    -0.12 (p=0.0153)   
 
                         aes_1_conscientiousness aes_1_neuroticism  \
 extraversion_score             -0.05 (p=0.3729)   0.11 (p=0.0392)   
 agreeableness_score            -0.08 (p=0.1123)   0.16 (p=0.0044)   
 conscientiousness_score         0.30 (p=0.0000)   0.11 (p=0.0336)   
 neuroticism_score              -0.17 (p=0.0010)   0.24 (p=0.0000)   
 openness_score                 -0.01 (p=0.8549)  -0.04 (p=0.4028)   
 
                            aes_1_openness  
 extraversion_score       -0.20 (p=0.0000)  
 agreeableness_score      -0.19 (p=0.0000)  
 conscientiousness_score  -0.24 (p=0

In [24]:
import pandas as pd

df_humans = pd.read_csv('../../data/filtered_participants_dataset.csv');
df_synths = df_aes_with_aes_residualized

# Combine both DataFrames
df_combined = pd.concat([df_humans, df_synths], axis=0).reset_index(drop=True)

df_humans['Group'] = 'Humans'
df_synths['Group'] = 'Synthetic Twins'

df_combined = pd.concat([df_humans, df_synths], axis=0).reset_index(drop=True)

# df_combined
df_combined[aes_cols_product1 + aes_cols_product2 + aes_cols_product3]

Unnamed: 0,aes_1_extraversion,aes_1_agreeableness,aes_1_conscientiousness,aes_1_neuroticism,aes_1_openness,aes_2_extraversion,aes_2_agreeableness,aes_2_conscientiousness,aes_2_neuroticism,aes_2_openness,aes_3_extraversion,aes_3_agreeableness,aes_3_conscientiousness,aes_3_neuroticism,aes_3_openness
0,-0.589320,0.547696,-0.590881,-0.071894,-0.721854,-0.377274,-0.311016,-0.612154,0.093235,-0.698012,-0.939937,-0.883497,0.071693,0.203355,-0.169196
1,-0.581986,-1.097038,0.395912,0.141924,0.416278,-0.386329,-0.935341,0.348401,0.070223,0.870100,-0.435835,-0.519799,0.325971,0.304744,-0.072157
2,0.153302,0.193889,0.422019,-0.694482,0.490547,0.758704,-0.461896,0.783933,-0.205457,0.103993,0.234217,0.235637,0.014028,0.051043,0.239061
3,1.345917,-0.450205,0.999941,-0.431397,-1.119932,0.473976,0.727811,0.133040,-0.083276,-0.916147,0.228827,-0.168965,0.778575,-0.242742,0.169427
4,0.661316,0.624082,-0.127629,-1.174486,-0.606445,-0.095666,-0.507207,0.156978,-1.216210,0.735899,0.938994,-0.155936,-0.600658,-0.230803,-0.617504
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
741,-0.000858,0.131356,0.237674,-0.082201,-0.255310,0.512643,-0.006197,-0.283563,0.310982,-0.207914,0.122269,0.335460,-0.063644,0.018060,0.004182
742,-0.142328,0.175878,-0.131352,0.130994,0.384263,0.351577,0.100405,-0.070931,0.159525,-0.217061,-0.326908,-0.143830,0.111657,0.004257,0.309312
743,0.474463,0.669795,-0.072137,-0.109702,-0.258435,0.511697,0.231352,-0.207367,0.140250,-0.091504,0.387058,0.027300,0.451971,-0.279927,-0.587103
744,0.294994,-0.246736,0.094873,-0.191768,-0.008109,-0.481126,-0.844734,0.050159,0.038576,0.447382,-0.375586,-0.516021,0.025952,-0.076659,0.536245


In [25]:
import numpy as np
from scipy.stats import linregress, t
from sklearn.linear_model import LinearRegression
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def create_scatter_plots_with_fit_grouped(df_combined, aes_cols, traits_cols, product_name,
                                          confidence=0.95, alpha=0.020397):
    """alpha is the decision cutoff (our FDR-adjusted threshold)."""
    traits_order = ['openness','conscientiousness','extraversion','agreeableness','neuroticism']
    fig = make_subplots(
        rows=len(traits_order), cols=2,
        subplot_titles=["Humans" if i % 2 == 0 else "Synthetic Twins"
                        for trait in traits_order for i in range(2)],
        vertical_spacing=0.12, horizontal_spacing=0.12
    )

    def format_p(p, threshold=alpha):
        return f"< {threshold:.6f}".replace("0.", ".") if p < threshold else f"= {p:.6f}".replace("0.", ".")

    for i, trait in enumerate(traits_order):
        trait_col = f"{trait}_score"
        aes_col = next((c for c in aes_cols if c.endswith(trait)), None)
        if aes_col is None or trait_col not in traits_cols:
            continue

        for j, group in enumerate(["Humans", "Synthetic Twins"]):
            data = df_combined[df_combined["Group"] == group][[trait_col, aes_col]].dropna()
            if data.empty:
                continue

            x = data[trait_col].values
            y = data[aes_col].values
            x_reshaped = x.reshape(-1, 1)

            # Fit model & CI for the line
            lin_model = LinearRegression().fit(x_reshaped, y)
            y_pred = lin_model.predict(x_reshaped)
            slope, intercept, r_value, p_value, std_err = linregress(x, y)

            n = len(x)
            mean_x = np.mean(x)
            t_val = t.ppf((1 + confidence) / 2., df=n - 2)
            residuals = y - y_pred
            se = np.sqrt(np.sum(residuals ** 2) / (n - 2))
            se_line = se * np.sqrt(1/n + ((x - mean_x)**2 / np.sum((x - mean_x)**2)))
            ci_upper = y_pred + t_val * se_line
            ci_lower = y_pred - t_val * se_line

            # Sort for plotting
            sorted_idx = np.argsort(x)
            x_sorted = x[sorted_idx]
            y_sorted = y_pred[sorted_idx]
            ci_upper_sorted = ci_upper[sorted_idx]
            ci_lower_sorted = ci_lower[sorted_idx]

            row, col = i + 1, j + 1

            # Scatter
            fig.add_trace(go.Scatter(
                x=x, y=y, mode='markers',
                marker=dict(color='black', opacity=0.3),
                showlegend=False
            ), row=row, col=col)

            # Regression line
            fig.add_trace(go.Scatter(
                x=x_sorted, y=y_sorted, mode='lines',
                line=dict(color='blue'),
                showlegend=False
            ), row=row, col=col)

            # Confidence band
            fig.add_trace(go.Scatter(
                x=np.concatenate([x_sorted, x_sorted[::-1]]),
                y=np.concatenate([ci_upper_sorted, ci_lower_sorted[::-1]]),
                fill='toself', fillcolor='rgba(0,0,0,0.15)',
                line=dict(color='rgba(255,255,255,0)'),
                hoverinfo="skip", showlegend=False
            ), row=row, col=col)

            # Axes
            fig.update_xaxes(title_text="Advertisement Effectiveness Score", row=row, col=col)
            fig.update_yaxes(title_text=f"{trait.capitalize()}-Tailored Ad", row=row, col=col)

            # def format_p(p, decimals=4):
            #     return f"= {p:.{decimals}f}"

            # Annotation: bold Significant / Non-Significant + p-value
            sig = p_value < alpha
            label = "<b>Significant</b>" if sig else "<b>Non-Significant</b>"
            color = "green" if sig else "red"
            text = f"{label} (p {format_p(p_value)})"
            
            fig.add_annotation(
                row=row, col=col, xref="x domain", yref="y domain",
                x=0.02, y=0.98, showarrow=False, align="left",
                text=text, font=dict(size=14, color=color),
                bgcolor="rgba(255,255,255,0.9)"
            )

    fig.update_layout(
        height=300 * len(traits_order),
        width=1400,
        title_text=f"{product_name} – AES Scores: Humans vs Synthetic Twins (GPT-5-latest)",
        template="simple_white",
        showlegend=False
    )
    return fig


In [26]:
raw_aes_cols_product1 = ['raw_aes_1_extraversion', 'raw_aes_1_agreeableness', 'raw_aes_1_conscientiousness', 'raw_aes_1_neuroticism', 'raw_aes_1_openness']
raw_aes_cols_product2 = ['raw_aes_2_extraversion', 'raw_aes_2_agreeableness', 'raw_aes_2_conscientiousness', 'raw_aes_2_neuroticism', 'raw_aes_2_openness']
raw_aes_cols_product3 = ['raw_aes_3_extraversion', 'raw_aes_3_agreeableness', 'raw_aes_3_conscientiousness', 'raw_aes_3_neuroticism', 'raw_aes_3_openness']

fig1 = create_scatter_plots_with_fit_grouped(df_combined, raw_aes_cols_product1, personality_cols, "Cabin Luggage")
fig1.show()

fig2 = create_scatter_plots_with_fit_grouped(df_combined, raw_aes_cols_product2, personality_cols, "Packing Cubes")
fig2.show()

fig3 = create_scatter_plots_with_fit_grouped(df_combined, raw_aes_cols_product3, personality_cols, "Water Bottle")
fig3.show()

<h1>Box Plots</h1>

In [27]:
import pandas as pd
import plotly.express as px
import scipy.stats as stats

def compute_95ci(data):
    n = len(data)
    mean = data.mean()
    sem = stats.sem(data)
    ci_range = stats.t.ppf(0.975, df=n-1) * sem
    return round(mean - ci_range, 2), round(mean + ci_range, 2)

def plot_trait_boxplots_by_product(col_prefix, product_name):
    traits = ["extraversion", "agreeableness", "conscientiousness", "neuroticism", "openness"]
    
    for trait in traits:
        col_name = f"{col_prefix}{trait}"
        trait_label = trait.capitalize()

        # Prepare data
        df_plot = df_combined[["Group", col_name]].dropna().copy()
        df_plot.rename(columns={col_name: "Score"}, inplace=True)

        fig = px.box(df_plot,
                     x="Group",               # Categorical on x → vertical plot
                     y="Score",               # Numeric on y → vertical direction
                     color="Group",
                     points="all",
                     title=f"{trait_label} Trait -> {product_name}",
                     labels={"Score": f"{trait_label} AES Score", "Group": "Group"},
                     category_orders={"Group": ["Humans", "Synthetic Twins"]})

        # Add 95% CI annotations
        for group in ["Humans", "Synthetic Twins"]:
            group_data = df_plot[df_plot["Group"] == group]["Score"]
            if len(group_data) >= 2:
                ci_low, ci_high = compute_95ci(group_data)
                y_pos = group_data.max() + 0.3
                fig.add_annotation(
                    x=group,
                    y=y_pos,
                    text=f"95% CI [{ci_low}, {ci_high}]",
                    showarrow=False,
                    font=dict(size=11),
                    xanchor="center"
                )

        # Control jitter and marker style
        fig.update_traces(jitter=0.4, marker=dict(opacity=0.5, size=6))
        fig.update_layout(showlegend=False)
        fig.show()


<h2> Cabin luggage: Box Plot</h2>

In [28]:
plot_trait_boxplots_by_product("raw_aes_1_", "Cabin Luggage")


<h2> Packing Cubes: Box Plot</h2>

In [29]:
plot_trait_boxplots_by_product("raw_aes_2_", "Packing Cubes")

<h2>Water Bottle</h2>

In [30]:
plot_trait_boxplots_by_product("raw_aes_3_", "Water Bottle")

<h1>Confidence Intervals</h1>

In [31]:
import pandas as pd
import numpy as np
import pingouin as pg
from scipy.stats import pearsonr

# Define products and traits
products = {
    "Cabin Luggage": "raw_aes_1_",
    "Packing Cubes": "raw_aes_2_",
    "Water Bottle": "raw_aes_3_"
}

traits = ['openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism']

# Function to compute bootstrapped CI
def boot_ci(data):
    data = data.dropna()
    if len(data) < 2:
        return None, None
    ci = pg.compute_bootci(data, func='mean', n_boot=1000, confidence=0.95)
    return data.mean(), (ci[0], ci[1])

# Output rows + p-values collectors
table_rows = []
pvals_trait = []   # raw p-values for each trait t-test
pvals_agg = []     # raw p-values for aggregate Pearson correlation

for product_name, prefix in products.items():
    human_means = []
    synth_means = []
    same_cat_count = 0

    for trait in traits:
        col_name = f"{prefix}{trait}"
        trait_label = trait.capitalize()

        # Subset data
        humans = df_combined[df_combined["Group"] == "Humans"][col_name]
        synths = df_combined[df_combined["Group"] == "Synthetic Twins"][col_name]

        # Compute CIs and means
        h_mean, h_ci = boot_ci(humans)
        s_mean, s_ci = boot_ci(synths)

        # Store means for correlation later
        human_means.append(h_mean)
        synth_means.append(s_mean)

        # Categories
        h_cat = int(round(h_mean)) if h_mean is not None else None
        s_cat = int(round(s_mean)) if s_mean is not None else None
        same_cat = "✅" if h_cat == s_cat else "❌"
        if same_cat == "✅":
            same_cat_count += 1

        # T-test
        if humans.dropna().shape[0] > 1 and synths.dropna().shape[0] > 1:
            pval = pg.ttest(humans.dropna(), synths.dropna(), paired=False)['p-val'].values[0]
            pvals_trait.append(pval)
            pval_str = f"{pval:.3f}"
        else:
            pvals_trait.append(np.nan)
            pval_str = "N/A"

        # Format means and CIs
        h_str = f"{h_mean:.2f} [{h_ci[0]:.2f}, {h_ci[1]:.2f}]" if h_mean is not None else "N/A"
        s_str = f"{s_mean:.2f} [{s_ci[0]:.2f}, {s_ci[1]:.2f}]" if s_mean is not None else "N/A"

        # Append row
        table_rows.append({
            "Product": product_name,
            "Trait": trait_label,
            "Human Mean [95% CI]": h_str,
            "Synthetic Mean [95% CI]": s_str,
            "Human Cat": h_cat,
            "Synthetic Cat": s_cat,
            "p-value": pval_str,
            "Same Category": same_cat,
            "Aggregate r": "", "Agg. p": "", "% Same Category": ""
        })

    # Aggregate correlation per product
    if None not in human_means and None not in synth_means:
        r, p = pearsonr(human_means, synth_means)
        r = round(r, 2)
        p_agg = round(p, 3)
        pvals_agg.append(p)  # raw p-value

        same_percent = round((same_cat_count / len(traits)) * 100, 1)
        for i in range(len(traits)):
            table_rows[-(i+1)]["Aggregate r"] = r
            table_rows[-(i+1)]["Agg. p"] = p_agg
            table_rows[-(i+1)]["% Same Category"] = same_percent
    else:
        pvals_agg.append(np.nan)

# Create DataFrame
final_df = pd.DataFrame(table_rows)

# Display results
from IPython.display import display
display(final_df)


Unnamed: 0,Product,Trait,Human Mean [95% CI],Synthetic Mean [95% CI],Human Cat,Synthetic Cat,p-value,Same Category,Aggregate r,Agg. p,% Same Category
0,Cabin Luggage,Openness,"2.96 [2.85, 3.06]","3.65 [3.58, 3.73]",3,4,0.0,❌,0.86,0.064,60.0
1,Cabin Luggage,Conscientiousness,"3.60 [3.51, 3.69]","3.89 [3.84, 3.93]",4,4,0.0,✅,0.86,0.064,60.0
2,Cabin Luggage,Extraversion,"2.91 [2.80, 3.04]","3.22 [3.15, 3.29]",3,3,0.0,✅,0.86,0.064,60.0
3,Cabin Luggage,Agreeableness,"3.31 [3.19, 3.41]","3.95 [3.87, 4.02]",3,4,0.0,❌,0.86,0.064,60.0
4,Cabin Luggage,Neuroticism,"3.82 [3.74, 3.90]","4.12 [4.07, 4.16]",4,4,0.0,✅,0.86,0.064,60.0
5,Packing Cubes,Openness,"2.96 [2.84, 3.06]","3.39 [3.31, 3.46]",3,3,0.0,✅,0.91,0.03,80.0
6,Packing Cubes,Conscientiousness,"3.61 [3.51, 3.70]","4.04 [4.00, 4.08]",4,4,0.0,✅,0.91,0.03,80.0
7,Packing Cubes,Extraversion,"2.84 [2.71, 2.95]","2.88 [2.81, 2.96]",3,3,0.543,✅,0.91,0.03,80.0
8,Packing Cubes,Agreeableness,"3.42 [3.32, 3.54]","4.14 [4.06, 4.23]",3,4,0.0,❌,0.91,0.03,80.0
9,Packing Cubes,Neuroticism,"3.79 [3.69, 3.87]","4.09 [4.03, 4.13]",4,4,0.0,✅,0.91,0.03,80.0


In [32]:
import json

pvals = {
    "gpt-5-latest-ci-test": pvals_trait,
}

# save pvals
with open("../p_value_correction/gpt_5_latest_ci.json", "w") as f:
    json.dump(pvals, f, indent=4)

In [33]:
from scipy.stats import binomtest

n = 15
chance_level = 0.05
aligned = 10

result = binomtest(aligned, n, p=chance_level, alternative="greater")

print(f"% aligned: {result.statistic}/{n}, p-value: {result.pvalue:.5f}")

% aligned: 0.6666666666666666/15, p-value: 0.00000


<h1>Synth Twins(GPT-4o): Regression Coefficient Matrix for Product 1 (Cabin luggage)</h1>

In [34]:
regression_results[1]

Unnamed: 0,aes_1_extraversion,aes_1_agreeableness,aes_1_conscientiousness,aes_1_neuroticism,aes_1_openness
extraversion_score,0.41 (p=0.0000),-0.12 (p=0.0200),-0.05 (p=0.3729),0.11 (p=0.0392),-0.20 (p=0.0000)
agreeableness_score,0.03 (p=0.4911),0.31 (p=0.0000),-0.08 (p=0.1123),0.16 (p=0.0044),-0.19 (p=0.0000)
conscientiousness_score,-0.20 (p=0.0001),0.09 (p=0.0887),0.30 (p=0.0000),0.11 (p=0.0336),-0.24 (p=0.0000)
neuroticism_score,-0.01 (p=0.7934),0.21 (p=0.0001),-0.17 (p=0.0010),0.24 (p=0.0000),-0.12 (p=0.0006)
openness_score,-0.35 (p=0.0000),-0.12 (p=0.0153),-0.01 (p=0.8549),-0.04 (p=0.4028),0.75 (p=0.0000)


<h1>Synth Twins(GPT-4o): Regression Coefficient Matrix for Product 2 (Compressible storage bag set)</h1>

In [35]:
regression_results[2]

Unnamed: 0,aes_2_extraversion,aes_2_agreeableness,aes_2_conscientiousness,aes_2_neuroticism,aes_2_openness
extraversion_score,0.41 (p=0.0000),-0.15 (p=0.0032),0.03 (p=0.5934),0.08 (p=0.1577),-0.15 (p=0.0001)
agreeableness_score,0.02 (p=0.7425),0.30 (p=0.0000),0.03 (p=0.5881),0.04 (p=0.5081),-0.18 (p=0.0000)
conscientiousness_score,-0.06 (p=0.2291),0.07 (p=0.1622),0.27 (p=0.0000),-0.03 (p=0.5735),-0.20 (p=0.0000)
neuroticism_score,-0.05 (p=0.3150),0.20 (p=0.0001),-0.18 (p=0.0008),0.22 (p=0.0001),-0.13 (p=0.0007)
openness_score,-0.35 (p=0.0000),-0.18 (p=0.0003),-0.05 (p=0.2877),0.09 (p=0.0932),0.71 (p=0.0000)


<h1>Synth Twins(GPT-4o): Regression Coefficient Matrix for Product 3 (A water bottle)</h1>

In [36]:
regression_results[3]

Unnamed: 0,aes_3_extraversion,aes_3_agreeableness,aes_3_conscientiousness,aes_3_neuroticism,aes_3_openness
extraversion_score,0.72 (p=0.0000),-0.19 (p=0.0004),0.02 (p=0.7830),0.04 (p=0.4546),-0.28 (p=0.0000)
agreeableness_score,0.07 (p=0.0642),0.22 (p=0.0001),-0.03 (p=0.6262),0.13 (p=0.0144),-0.20 (p=0.0000)
conscientiousness_score,0.05 (p=0.1861),0.08 (p=0.1421),-0.09 (p=0.0966),0.23 (p=0.0000),-0.22 (p=0.0000)
neuroticism_score,-0.02 (p=0.5291),0.23 (p=0.0000),-0.20 (p=0.0005),0.24 (p=0.0000),-0.13 (p=0.0022)
openness_score,-0.21 (p=0.0000),-0.12 (p=0.0211),-0.02 (p=0.7729),0.06 (p=0.2398),0.61 (p=0.0000)


In [37]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr

def correlations_with_p_and_n(df, aes_cols, traits_cols):
    r_df = pd.DataFrame(index=traits_cols, columns=aes_cols, dtype=float)
    p_df = pd.DataFrame(index=traits_cols, columns=aes_cols, dtype=float)
    n_df = pd.DataFrame(index=traits_cols, columns=aes_cols, dtype=int)

    for trait in traits_cols:
        for aes in aes_cols:
            pair = df[[trait, aes]].dropna()
            n = len(pair)
            if n >= 3:
                r, p = pearsonr(pair[trait], pair[aes])
            else:
                r, p = float("nan"), float("nan")
            r_df.loc[trait, aes] = r
            p_df.loc[trait, aes] = p
            n_df.loc[trait, aes] = n

    return r_df, p_df, n_df

r1, p1, n1 = correlations_with_p_and_n(df_aes_with_aes_residualized, aes_cols_product1, personality_cols)
r2, p2, n2 = correlations_with_p_and_n(df_aes_with_aes_residualized, aes_cols_product2, personality_cols)
r3, p3, n3 = correlations_with_p_and_n(df_aes_with_aes_residualized, aes_cols_product3, personality_cols)

def pretty_table(r_df, p_df):
    return pd.DataFrame(
        [[f"{r_df.iloc[i,j]:.2f} (p={p_df.iloc[i,j]:.6f})"
          if pd.notna(r_df.iloc[i,j]) and pd.notna(p_df.iloc[i,j]) else "nan"
          for j in range(r_df.shape[1])]
         for i in range(r_df.shape[0])],
        index=r_df.index,
        columns=r_df.columns
    )

def json_ready_pvals(p_df: pd.DataFrame):
    # Flatten -> Python floats -> replace NaN/inf with None
    flat = np.asarray(p_df.to_numpy(dtype=float)).ravel()
    return [None if (pd.isna(v) or not np.isfinite(v)) else float(v) for v in flat]

# --- Pretty tables ---
print("Pearson Correlations with p-values - Product 1")
display(pretty_table(r1, p1))
print("Pearson Correlations with p-values - Product 2")
display(pretty_table(r2, p2))
print("Pearson Correlations with p-values - Product 3")
display(pretty_table(r3, p3))

# --- Print arrays of p-values ---
print("\nP-values as arrays:")
print("Product 1:", p1.values.flatten())
print("Product 2:", p2.values.flatten())
print("Product 3:", p3.values.flatten())


import json

pvals = {
    "p1_gpt_5_latest_correlations": json_ready_pvals(p1),
    "p2_gpt_5_latest_correlations": json_ready_pvals(p2),
    "p3_gpt_5_latest_correlations": json_ready_pvals(p3),
}

# save pvals
with open("../p_value_correction/gpt_5_latest_correlations_pvals.json", "w") as f:
    json.dump(pvals, f, indent=4)




Pearson Correlations with p-values - Product 1


Unnamed: 0,aes_1_extraversion,aes_1_agreeableness,aes_1_conscientiousness,aes_1_neuroticism,aes_1_openness
extraversion_score,0.35 (p=0.000000),-0.06 (p=0.230819),-0.01 (p=0.886575),0.13 (p=0.012160),-0.17 (p=0.000841)
agreeableness_score,0.06 (p=0.215046),0.25 (p=0.000002),-0.08 (p=0.107110),0.20 (p=0.000140),-0.09 (p=0.087712)
conscientiousness_score,-0.13 (p=0.009235),0.02 (p=0.694110),0.35 (p=0.000000),0.05 (p=0.334460),-0.24 (p=0.000004)
neuroticism_score,0.02 (p=0.675783),0.21 (p=0.000035),-0.28 (p=0.000000),0.19 (p=0.000226),-0.07 (p=0.197165)
openness_score,-0.28 (p=0.000000),-0.08 (p=0.120875),-0.02 (p=0.640617),-0.01 (p=0.848834),0.69 (p=0.000000)


Pearson Correlations with p-values - Product 2


Unnamed: 0,aes_2_extraversion,aes_2_agreeableness,aes_2_conscientiousness,aes_2_neuroticism,aes_2_openness
extraversion_score,0.37 (p=0.000000),-0.11 (p=0.038729),0.10 (p=0.066532),0.06 (p=0.242051),-0.11 (p=0.028799)
agreeableness_score,0.06 (p=0.273355),0.21 (p=0.000047),0.04 (p=0.443373),0.09 (p=0.090624),-0.07 (p=0.208719)
conscientiousness_score,0.02 (p=0.759077),0.00 (p=0.984069),0.35 (p=0.000000),-0.10 (p=0.053821),-0.19 (p=0.000229)
neuroticism_score,-0.07 (p=0.193729),0.22 (p=0.000012),-0.28 (p=0.000000),0.21 (p=0.000046),-0.10 (p=0.062106)
openness_score,-0.28 (p=0.000000),-0.15 (p=0.004783),-0.03 (p=0.570905),0.09 (p=0.081082),0.65 (p=0.000000)


Pearson Correlations with p-values - Product 3


Unnamed: 0,aes_3_extraversion,aes_3_agreeableness,aes_3_conscientiousness,aes_3_neuroticism,aes_3_openness
extraversion_score,0.72 (p=0.000000),-0.16 (p=0.001652),0.03 (p=0.620486),0.08 (p=0.121653),-0.27 (p=0.000000)
agreeableness_score,0.26 (p=0.000001),0.14 (p=0.008363),-0.04 (p=0.462495),0.18 (p=0.000336),-0.16 (p=0.002026)
conscientiousness_score,0.16 (p=0.002346),-0.01 (p=0.794864),-0.02 (p=0.716652),0.15 (p=0.003583),-0.22 (p=0.000012)
neuroticism_score,-0.14 (p=0.005347),0.24 (p=0.000002),-0.17 (p=0.001334),0.15 (p=0.003856),-0.06 (p=0.287167)
openness_score,-0.09 (p=0.070293),-0.11 (p=0.036830),-0.00 (p=0.954223),0.08 (p=0.133029),0.53 (p=0.000000)



P-values as arrays:
Product 1: [5.13028951e-12 2.30819352e-01 8.86575059e-01 1.21603601e-02
 8.41405546e-04 2.15045550e-01 1.56131059e-06 1.07110145e-01
 1.40405445e-04 8.77119611e-02 9.23479481e-03 6.94110300e-01
 4.16746879e-12 3.34459707e-01 3.74178905e-06 6.75783039e-01
 3.51477710e-05 4.58094202e-08 2.26494181e-04 1.97165224e-01
 2.80821382e-08 1.20874502e-01 6.40617372e-01 8.48834427e-01
 3.87764486e-53]
Product 2: [2.28822537e-13 3.87294381e-02 6.65323363e-02 2.42050562e-01
 2.87986582e-02 2.73355441e-01 4.71386717e-05 4.43372849e-01
 9.06239179e-02 2.08718831e-01 7.59076839e-01 9.84069068e-01
 6.96644934e-12 5.38210396e-02 2.28971630e-04 1.93729250e-01
 1.21758238e-05 3.73239691e-08 4.58122037e-05 6.21061972e-02
 2.31766690e-08 4.78321678e-03 5.70905322e-01 8.10815744e-02
 1.03363260e-46]
Product 3: [3.17699006e-61 1.65183957e-03 6.20485640e-01 1.21652691e-01
 1.51729940e-07 5.71670728e-07 8.36317650e-03 4.62495351e-01
 3.36201757e-04 2.02606644e-03 2.34620806e-03 7.94864494e-

<h1>Compute r_difference</h1>

In [38]:
import numpy as np
import pandas as pd
from scipy.stats import t

def compute_r_difference_synths(synths_results, product_num, N):
    """
    Compute r-difference (difference in standardized regression coefficients) for synth twins only.

    Args:
    - human_results (pd.DataFrame): Regression coefficients for synth twins for a specific product.
    - product_num (int): Product number (1, 2, or 3).
    - N (int): Sample size.

    Returns:
    - pd.DataFrame: Table with r-differences, t-values, and p-values.
    """
    # Compute degrees of freedom
    df = N - 2

    def fisher_r_to_z(r):
        """Apply Fisher's transformation to r."""
        return 0.5 * np.log((1 + r) / (1 - r))

    def compute_t_value(r_matched, r_mismatched, N):
        """Compute t-value and p-value for r-difference."""
        z_matched = fisher_r_to_z(r_matched)
        z_mismatched = fisher_r_to_z(r_mismatched)
        
        SE = np.sqrt((1 / (N - 3)) + (1 / (N - 3)))
        t_value = (z_matched - z_mismatched) / SE
        p_value = 2 * (1 - t.cdf(abs(t_value), df))
        
        return t_value, p_value

    # Dynamically construct the matched-mismatched AES columns for the given product
    matched_mismatched_pairs = {
        "extraversion_score": (f"aes_{product_num}_extraversion", f"aes_{product_num}_neuroticism"),
        "agreeableness_score": (f"aes_{product_num}_agreeableness", f"aes_{product_num}_openness"),
        "conscientiousness_score": (f"aes_{product_num}_conscientiousness", f"aes_{product_num}_extraversion"),
        "neuroticism_score": (f"aes_{product_num}_neuroticism", f"aes_{product_num}_conscientiousness"),
        "openness_score": (f"aes_{product_num}_openness", f"aes_{product_num}_agreeableness"),
    }

    # Initialize results DataFrame
    results = pd.DataFrame(index=matched_mismatched_pairs.keys(), columns=["r_difference", "t_value", "p_value"])

    # Compute r-difference for each trait
    for trait, (matched_col, mismatched_col) in matched_mismatched_pairs.items():
        if matched_col in synths_results.columns and mismatched_col in synths_results.columns:
            # Extract beta coefficient (ignoring p-values in parentheses)
            r_matched = float(synths_results.loc[trait, matched_col].split()[0])  
            r_mismatched = float(synths_results.loc[trait, mismatched_col].split()[0])  
            
            # Compute r-difference
            r_diff = r_matched - r_mismatched
            
            # Compute t-test values
            t_val, p_val = compute_t_value(r_matched, r_mismatched, N)
            
            # Store results
            results.loc[trait, "r_difference"] = f'{r_diff:.2f}'
            results.loc[trait, "t_value"] = f'{t_val:.2f}'
            results.loc[trait, "p_value"] = p_val
        else:
            print(f"Missing data for {trait} in product {product_num}. Check if columns exist in synths_results.")

    return results



# Compute r-differences for all three products
r_difference_synths_product1 = compute_r_difference_synths(regression_results[1], product_num=1, N=373)
r_difference_synths_product2 = compute_r_difference_synths(regression_results[2], product_num=2, N=373)
r_difference_synths_product3 = compute_r_difference_synths(regression_results[3], product_num=3, N=373)

# Display results
print("R-Difference Table for Synths Twins (Product 1)")
display(r_difference_synths_product1)

print("R-Difference Table for Synths Twins (Product 2)")
display(r_difference_synths_product2)

print("R-Difference Table for Synths Twins (Product 3)")
display(r_difference_synths_product3)


R-Difference Table for Synths Twins (Product 1)


Unnamed: 0,r_difference,t_value,p_value
extraversion_score,0.3,4.42,1.3e-05
agreeableness_score,0.5,6.98,0.0
conscientiousness_score,0.5,6.97,0.0
neuroticism_score,0.41,5.66,0.0
openness_score,0.87,14.87,0.0


R-Difference Table for Synths Twins (Product 2)


Unnamed: 0,r_difference,t_value,p_value
extraversion_score,0.33,4.83,2e-06
agreeableness_score,0.48,6.69,0.0
conscientiousness_score,0.33,4.58,6e-06
neuroticism_score,0.4,5.52,0.0
openness_score,0.89,14.54,0.0


R-Difference Table for Synths Twins (Product 3)


Unnamed: 0,r_difference,t_value,p_value
extraversion_score,0.68,11.8,0.0
agreeableness_score,0.42,5.8,0.0
conscientiousness_score,-0.14,-1.91,0.057151
neuroticism_score,0.44,6.09,0.0
openness_score,0.73,11.28,0.0
