In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from scipy.stats import norm

def fishers_z_test(r1, r2, n1, n2):
    """
    Perform Fisher's Z-test to compare two correlation coefficients.
    r1, r2: Correlation coefficients to compare
    n1, n2: Sample sizes of the respective groups
    Returns: z-score and p-value
    """
    # Fisher's Z transformation
    z1 = 0.5 * np.log((1 + r1) / (1 - r1))
    z2 = 0.5 * np.log((1 + r2) / (1 - r2))

    # Standard error
    se = np.sqrt(1/(n1 - 3) + 1/(n2 - 3))

    # Z-test statistic
    z = (z1 - z2) / se

    # Two-tailed p-value
    p_value = 2 * (1 - norm.cdf(abs(z)))

    return z, p_value

n_human = 373
n_synthetic = 373

# Correlation matrices (Human, GPT-4o, and Gemini)
correlations_human = {
    "Cabin Luggage": {"Extraversion": 0.1015, "Agreeableness": 0.2585, "Conscientiousness": 0.0157, "Neuroticism": 0.0293, "Openness": -0.0302},
    "Packing Cubes": {"Extraversion": 0.1717, "Agreeableness": 0.3926, "Conscientiousness": -0.0288, "Neuroticism": -0.0009, "Openness": 0.0443},
    "Water Bottle": {"Extraversion": 0.1866, "Agreeableness": 0.2573, "Conscientiousness": 0.0545, "Neuroticism": -0.1716, "Openness": -0.0265}
}

correlations_gpt4o = {
    "Cabin Luggage": {"Extraversion": 0.27, "Agreeableness": 0.21, "Conscientiousness": 0.26, "Neuroticism": -0.00, "Openness": 0.55},
    "Packing Cubes": {"Extraversion": 0.34, "Agreeableness": 0.23, "Conscientiousness": 0.23, "Neuroticism": 0.00, "Openness": 0.52},
    "Water Bottle": {"Extraversion": 0.64, "Agreeableness": 0.17, "Conscientiousness": 0.27, "Neuroticism": 0.08, "Openness": 0.57}
}

correlations_gemini = {
    "Cabin Luggage": {"Extraversion": 0.18, "Agreeableness": 0.24, "Conscientiousness": 0.28, "Neuroticism": 0.08, "Openness": 0.29},
    "Packing Cubes": {"Extraversion": 0.26, "Agreeableness": 0.22, "Conscientiousness": 0.19, "Neuroticism": 0.14, "Openness": 0.35},
    "Water Bottle": {"Extraversion": 0.37, "Agreeableness": 0.18, "Conscientiousness": 0.09, "Neuroticism": -0.21, "Openness": 0.42}
}

fdr_threshold = 0.00022

# Function to compute Fisher's Z-tests
def fisher_tests(human_data, synthetic_data):
    results = {product: {} for product in human_data.keys()}
    
    for product in human_data.keys():
        for trait in human_data[product].keys():
            z, p = fishers_z_test(
                human_data[product][trait], 
                synthetic_data[product][trait], 
                n_human, 
                n_synthetic
            )
            results[product][trait] = {"z-score": z, "p-value": p}
    
    return results

results_gpt4o = fisher_tests(correlations_human, correlations_gpt4o)
results_gemini = fisher_tests(correlations_human, correlations_gemini)



<h1>Assessing Alignment Between Humans and Models Using Fisher’s z-Test</h1>

In [247]:
import numpy as np
import scipy.stats as stats
import pandas as pd
from statsmodels.stats.multitest import multipletests

correlations_human = {
    "Cabin Luggage": {"Extraversion": 0.1015, "Agreeableness": 0.2585, "Conscientiousness": 0.0157, "Neuroticism": 0.0293, "Openness": -0.0302},
    "Packing Cubes": {"Extraversion": 0.1717, "Agreeableness": 0.3926, "Conscientiousness": -0.0288, "Neuroticism": -0.0009, "Openness": 0.0443},
    "Water Bottle": {"Extraversion": 0.1866, "Agreeableness": 0.2573, "Conscientiousness": 0.0545, "Neuroticism": -0.1716, "Openness": -0.0265}
}

correlations_gpt4o = {
    "Cabin Luggage": {"Extraversion": 0.27, "Agreeableness": 0.21, "Conscientiousness": 0.26, "Neuroticism": -0.00, "Openness": 0.55},
    "Packing Cubes": {"Extraversion": 0.34, "Agreeableness": 0.23, "Conscientiousness": 0.23, "Neuroticism": 0.00, "Openness": 0.52},
    "Water Bottle": {"Extraversion": 0.64, "Agreeableness": 0.17, "Conscientiousness": 0.27, "Neuroticism": 0.08, "Openness": 0.57}
}

correlations_gemini = {
    "Cabin Luggage": {"Extraversion": 0.18, "Agreeableness": 0.24, "Conscientiousness": 0.28, "Neuroticism": 0.08, "Openness": 0.29},
    "Packing Cubes": {"Extraversion": 0.26, "Agreeableness": 0.22, "Conscientiousness": 0.19, "Neuroticism": 0.14, "Openness": 0.35},
    "Water Bottle": {"Extraversion": 0.37, "Agreeableness": 0.18, "Conscientiousness": 0.09, "Neuroticism": -0.21, "Openness": 0.42}
}


df_human = pd.DataFrame(correlations_human).T
df_gpt4o = pd.DataFrame(correlations_gpt4o).T
df_gemini = pd.DataFrame(correlations_gemini).T


n_human = 373
n_synthetic = 373

def compute_fishers_z_per_product(df_human, df_model, model_name):
    results = []
    for product in df_human.index:
        for trait in df_human.columns:
            r_human = df_human.loc[product, trait]
            r_model = df_model.loc[product, trait]
            z, p = fishers_z_test(r_human, r_model, n_human, n_synthetic)
            
            # Round values
            p = round(p, 4)
            z = round(z, 2)
            r_human = round(r_human, 2)
            r_model = round(r_model, 2)

            similarity_threshold = 0.1
            alignment = "Aligned" if (abs(r_human - r_model) <= similarity_threshold) and (p >= 0.05) else "Misaligned"
            significance = "*" if p < 0.00022 else "" # FDR-adjusted p-value

            results.append({
                "Product": product,
                "Trait-Matched Ad": trait,
                "Human ( r )": r_human,
                f"{model_name} ( r )": r_model,
                "Δr": r_human - r_model,
                "z-statistics": z,
                "p-value": p,
                "Significance": significance,
                "Alignment": alignment
            })

    df_results = pd.DataFrame(results)

    return df_results

df_z_gpt4o = compute_fishers_z_per_product(df_human, df_gpt4o, "GPT-4o")
df_z_gemini = compute_fishers_z_per_product(df_human, df_gemini, "Gemini")

In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_alignment_with_humans(model="gpt", similarity_threshold=0.1):
    correlations_human = {
        "Cabin Luggage": {"Extraversion": 0.1015, "Agreeableness": 0.2585, "Conscientiousness": 0.0157, "Neuroticism": 0.0293, "Openness": -0.0302},
        "Packing Cubes": {"Extraversion": 0.1717, "Agreeableness": 0.3926, "Conscientiousness": -0.0288, "Neuroticism": -0.0009, "Openness": 0.0443},
        "Water Bottle": {"Extraversion": 0.1866, "Agreeableness": 0.2573, "Conscientiousness": 0.0545, "Neuroticism": -0.1716, "Openness": -0.0265}
    }

    correlations_gpt4o = {
        "Cabin Luggage": {"Extraversion": 0.27, "Agreeableness": 0.21, "Conscientiousness": 0.26, "Neuroticism": -0.00, "Openness": 0.55},
        "Packing Cubes": {"Extraversion": 0.34, "Agreeableness": 0.23, "Conscientiousness": 0.23, "Neuroticism": 0.00, "Openness": 0.52},
        "Water Bottle": {"Extraversion": 0.64, "Agreeableness": 0.17, "Conscientiousness": 0.27, "Neuroticism": 0.08, "Openness": 0.57}
    }

    correlations_gemini = {
        "Cabin Luggage": {"Extraversion": 0.18, "Agreeableness": 0.24, "Conscientiousness": 0.28, "Neuroticism": 0.08, "Openness": 0.29},
        "Packing Cubes": {"Extraversion": 0.26, "Agreeableness": 0.22, "Conscientiousness": 0.19, "Neuroticism": 0.14, "Openness": 0.35},
        "Water Bottle": {"Extraversion": 0.37, "Agreeableness": 0.18, "Conscientiousness": 0.09, "Neuroticism": -0.21, "Openness": 0.42}
    }

    model_name = "GPT-4o" if model == "gpt" else "Gemini 1.5 Flash"
    correlations_model = correlations_gpt4o if model == "gpt" else correlations_gemini

    df_human = pd.DataFrame(correlations_human).T
    df_model = pd.DataFrame(correlations_model).T
    traits = ["Openness", "Conscientiousness", "Extraversion","Agreeableness","Neuroticism"]

    x_ranges = {}
    for product in df_human.index:
        all_vals = pd.concat([df_human.loc[product], df_model.loc[product]])
        x_ranges[product] = (all_vals.min() - 0.05, all_vals.max() + 0.05)

    fig = make_subplots(
        rows=3, cols=1, 
        shared_xaxes=False, 
        subplot_titles=list(df_human.index), 
        vertical_spacing=0.1
    )

    for i, product in enumerate(df_human.index, start=1):
        for trait in traits:
            r_human = round(df_human.loc[product, trait], 2)
            r_model = round(df_model.loc[product, trait], 2)
            aligned = abs(r_human - r_model) <= similarity_threshold
            color = "green" if aligned else "red"

            # Line
            fig.add_trace(go.Scatter(
                x=[r_human, r_model],
                y=[trait, trait],
                mode='lines',
                line=dict(color="black", width=1),
                opacity=0.7,
                showlegend=False
            ), row=i, col=1)

            # Human marker
            fig.add_trace(go.Scatter(
                x=[r_human],
                y=[trait],
                mode='markers+text',
                marker=dict(size=10, symbol="circle", color=color),
                text=[f"r={r_human}"],
                textposition="middle left" if r_human < r_model else "middle right",
                showlegend=False
            ), row=i, col=1)

            # Synthetic model marker
            fig.add_trace(go.Scatter(
                x=[r_model],
                y=[trait],
                mode='markers+text',
                marker=dict(size=10, symbol="square", color=color),
                text=[f"r={r_model}"],
                textposition="middle right" if r_human < r_model else "middle left",
                showlegend=False
            ), row=i, col=1)

        # Ensure consistent y-axis order
        fig.update_yaxes(
            categoryorder='array',
            categoryarray=traits[::-1],
            row=i, col=1
        )
        fig.update_xaxes(
            title_text="Correlation",
            range=x_ranges[product],
            row=i, col=1
        )

    # Add legends
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(size=10, symbol='circle', color='gray'),
        name='Humans'
    ))
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(size=10, symbol='square', color='gray'),
        name='Synthetic Twin'
    ))
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(size=10, symbol='circle', color='green'),
        name='Aligned'
    ))
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(size=10, symbol='circle', color='red'),
        name='Misaligned'
    ))

    # Final layout
    fig.update_layout(
        height=1400,
        width=1300,
        title=f"Alignment Between Humans and Synthetic Twins ({model_name})",
        margin=dict(t=100, b=80, l=100, r=220),
        legend=dict(
            x=1.02,
            y=1,
            traceorder='normal',
            font=dict(size=12),
            borderwidth=0
        )
    )

    fig.show()


In [254]:
df_z_gpt4o

Unnamed: 0,Product,Trait-Matched Ad,Human ( r ),GPT-4o ( r ),Δr,z-statistics,p-value,Significance,Alignment
0,Cabin Luggage,Extraversion,0.1,0.27,-0.17,-2.38,0.0173,,Misaligned
1,Cabin Luggage,Agreeableness,0.26,0.21,0.05,0.7,0.4851,,Aligned
2,Cabin Luggage,Conscientiousness,0.02,0.26,-0.24,-3.41,0.0007,,Misaligned
3,Cabin Luggage,Neuroticism,0.03,-0.0,0.03,0.4,0.6902,,Aligned
4,Cabin Luggage,Openness,-0.03,0.55,-0.58,-8.82,0.0,*,Misaligned
5,Packing Cubes,Extraversion,0.17,0.34,-0.17,-2.46,0.014,,Misaligned
6,Packing Cubes,Agreeableness,0.39,0.23,0.16,2.46,0.014,,Misaligned
7,Packing Cubes,Conscientiousness,-0.03,0.23,-0.26,-3.58,0.0003,,Misaligned
8,Packing Cubes,Neuroticism,-0.0,0.0,-0.0,-0.01,0.9902,,Aligned
9,Packing Cubes,Openness,0.04,0.52,-0.48,-7.24,0.0,*,Misaligned


In [309]:
plot_alignment_with_humans("gpt")

In [251]:
df_z_gemini

Unnamed: 0,Product,Trait-Matched Ad,Human ( r ),Gemini ( r ),Δr,z-statistics,p-value,Significance,Alignment
0,Cabin Luggage,Extraversion,0.1,0.18,-0.08,-1.09,0.2758,,Aligned
1,Cabin Luggage,Agreeableness,0.26,0.24,0.02,0.27,0.7885,,Aligned
2,Cabin Luggage,Conscientiousness,0.02,0.28,-0.26,-3.7,0.0002,*,Misaligned
3,Cabin Luggage,Neuroticism,0.03,0.08,-0.05,-0.69,0.4891,,Aligned
4,Cabin Luggage,Openness,-0.03,0.29,-0.32,-4.47,0.0,*,Misaligned
5,Packing Cubes,Extraversion,0.17,0.26,-0.09,-1.26,0.2074,,Aligned
6,Packing Cubes,Agreeableness,0.39,0.22,0.17,2.6,0.0093,,Misaligned
7,Packing Cubes,Conscientiousness,-0.03,0.19,-0.22,-3.01,0.0026,,Misaligned
8,Packing Cubes,Neuroticism,-0.0,0.14,-0.14,-1.93,0.0537,,Misaligned
9,Packing Cubes,Openness,0.04,0.35,-0.31,-4.37,0.0,*,Misaligned


In [310]:
plot_alignment_with_humans("gemini")