In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from scipy.stats import norm

def fishers_z_test(r1, r2, n1, n2):
    """
    Perform Fisher's Z-test to compare two correlation coefficients.
    r1, r2: Correlation coefficients to compare
    n1, n2: Sample sizes of the respective groups
    Returns: z-score and p-value
    """
    # Fisher's Z transformation
    z1 = 0.5 * np.log((1 + r1) / (1 - r1))
    z2 = 0.5 * np.log((1 + r2) / (1 - r2))

    # Standard error
    se = np.sqrt(1/(n1 - 3) + 1/(n2 - 3))

    # Z-test statistic
    z = (z1 - z2) / se

    # Two-tailed p-value
    p_value = 2 * (1 - norm.cdf(abs(z)))

    return z, p_value

n_human = 373
n_synthetic = 373

# Correlation matrices (Human, GPT-4o, and Gemini)
correlations_human = {
    "Cabin Luggage": {"Extraversion": 0.1015, "Agreeableness": 0.2585, "Conscientiousness": 0.0157, "Neuroticism": 0.0293, "Openness": -0.0302},
    "Packing Cubes": {"Extraversion": 0.1717, "Agreeableness": 0.3926, "Conscientiousness": -0.0288, "Neuroticism": -0.0009, "Openness": 0.0443},
    "Water Bottle": {"Extraversion": 0.1866, "Agreeableness": 0.2573, "Conscientiousness": 0.0545, "Neuroticism": -0.1716, "Openness": -0.0265}
}

correlations_gpt4o = {
    "Cabin Luggage": {"Extraversion": 0.27, "Agreeableness": 0.21, "Conscientiousness": 0.26, "Neuroticism": -0.00, "Openness": 0.55},
    "Packing Cubes": {"Extraversion": 0.34, "Agreeableness": 0.23, "Conscientiousness": 0.23, "Neuroticism": 0.00, "Openness": 0.52},
    "Water Bottle": {"Extraversion": 0.64, "Agreeableness": 0.17, "Conscientiousness": 0.27, "Neuroticism": 0.08, "Openness": 0.57}
}

correlations_gemini = {
    "Cabin Luggage": {"Extraversion": 0.18, "Agreeableness": 0.24, "Conscientiousness": 0.28, "Neuroticism": 0.08, "Openness": 0.29},
    "Packing Cubes": {"Extraversion": 0.26, "Agreeableness": 0.22, "Conscientiousness": 0.19, "Neuroticism": 0.14, "Openness": 0.35},
    "Water Bottle": {"Extraversion": 0.37, "Agreeableness": 0.18, "Conscientiousness": 0.09, "Neuroticism": -0.21, "Openness": 0.42}
}

fdr_threshold = 0.00022

# Function to compute Fisher's Z-tests
def fisher_tests(human_data, synthetic_data):
    results = {product: {} for product in human_data.keys()}
    
    for product in human_data.keys():
        for trait in human_data[product].keys():
            z, p = fishers_z_test(
                human_data[product][trait], 
                synthetic_data[product][trait], 
                n_human, 
                n_synthetic
            )
            results[product][trait] = {"z-score": z, "p-value": p}
    
    return results

results_gpt4o = fisher_tests(correlations_human, correlations_gpt4o)
results_gemini = fisher_tests(correlations_human, correlations_gemini)



<h1>Compute Fisher Z-test</h1>

In [24]:
import numpy as np
import scipy.stats as stats
import pandas as pd
from statsmodels.stats.multitest import multipletests

correlations_human = {
    "Cabin Luggage": {"Extraversion": 0.1015, "Agreeableness": 0.2585, "Conscientiousness": 0.0157, "Neuroticism": 0.0293, "Openness": -0.0302},
    "Packing Cubes": {"Extraversion": 0.1717, "Agreeableness": 0.3926, "Conscientiousness": -0.0288, "Neuroticism": -0.0009, "Openness": 0.0443},
    "Water Bottle": {"Extraversion": 0.1866, "Agreeableness": 0.2573, "Conscientiousness": 0.0545, "Neuroticism": -0.1716, "Openness": -0.0265}
}

correlations_gpt4o = {
    "Cabin Luggage": {"Extraversion": 0.27, "Agreeableness": 0.21, "Conscientiousness": 0.26, "Neuroticism": -0.00, "Openness": 0.55},
    "Packing Cubes": {"Extraversion": 0.34, "Agreeableness": 0.23, "Conscientiousness": 0.23, "Neuroticism": 0.00, "Openness": 0.52},
    "Water Bottle": {"Extraversion": 0.64, "Agreeableness": 0.17, "Conscientiousness": 0.27, "Neuroticism": 0.08, "Openness": 0.57}
}

correlations_gemini = {
    "Cabin Luggage": {"Extraversion": 0.18, "Agreeableness": 0.24, "Conscientiousness": 0.28, "Neuroticism": 0.08, "Openness": 0.29},
    "Packing Cubes": {"Extraversion": 0.26, "Agreeableness": 0.22, "Conscientiousness": 0.19, "Neuroticism": 0.14, "Openness": 0.35},
    "Water Bottle": {"Extraversion": 0.37, "Agreeableness": 0.18, "Conscientiousness": 0.09, "Neuroticism": -0.21, "Openness": 0.42}
}


df_human = pd.DataFrame(correlations_human).T
df_gpt4o = pd.DataFrame(correlations_gpt4o).T
df_gemini = pd.DataFrame(correlations_gemini).T


n_human = 373
n_synthetic = 373

# def fishers_z_test(r1, r2, n1, n2):
#     """Compute Fisher's Z-test for difference in correlations."""
#     z1 = 0.5 * np.log((1 + r1) / (1 - r1))
#     z2 = 0.5 * np.log((1 + r2) / (1 - r2))
#     se_diff = np.sqrt(1 / (n1 - 3) + 1 / (n2 - 3))
#     z_score = (z1 - z2) / se_diff
#     p_value = 2 * (1 - stats.norm.cdf(abs(z_score)))
#     return z_score, p_value


# def compute_fishers_z_for_models(df_human, df_model, model_name):
#     traits = df_human.columns
#     z_scores_diag = np.zeros(len(traits))
#     p_values_diag = np.zeros(len(traits))

#     for i, trait in enumerate(traits):
#         z_scores_diag[i], p_values_diag[i] = fishers_z_test(
#             df_human[trait].mean(),
#             df_model[trait].mean(),
#             n_human, n_synthetic
#         )

#     _, p_values_fdr_diag, _, _ = multipletests(p_values_diag, alpha=0.00022, method='fdr_bh')

#     df_diag_model = pd.DataFrame({
#         "Trait": traits,
#         "Human Correlation (Mean)": df_human.mean(),
#         f"{model_name} Correlation (Mean)": df_model.mean(),
#         "P-Value (Fisher's Z-Test, FDR Adjusted)": p_values_fdr_diag,
#         "Significance": ["*" if p < 0.00022 else "" for p in p_values_fdr_diag]
#     })

#     return df_diag_model

# df_diag_gpt4o = compute_fishers_z_for_models(df_human, df_gpt4o, "GPT-4o")
# df_diag_gemini = compute_fishers_z_for_models(df_human, df_gemini, "Gemini")

# print("\nDiagonal Correlations: Human vs GPT-4o")
# print(df_diag_gpt4o.to_string(index=False))

# print("\nDiagonal Correlations: Human vs Gemini Flash-1.5")
# print(df_diag_gemini.to_string(index=False))

def compute_fishers_z_per_product(df_human, df_model, model_name):
    results = []
    for product in df_human.index:
        for trait in df_human.columns:
            r_human = df_human.loc[product, trait]
            r_model = df_model.loc[product, trait]
            z, p = fishers_z_test(r_human, r_model, n_human, n_synthetic)
            p = round(p, 4)

            similarity_threshold = 0.10
            alignment = "Aligned" if (abs(r_human - r_model) <= similarity_threshold) and (p >= 0.05) else "Misaligned"

            results.append({
                "Product": product,
                "Trait": trait,
                "Human ( r )": r_human,
                f"{model_name} ( r )": r_model,
                "z-statistics": z,
                "p-value": p,
                "alignment": alignment
            })

    df_results = pd.DataFrame(results)
    _, p_fdr, _, _ = multipletests(df_results["p-value"], alpha=0.00022, method='fdr_bh')
    df_results["p-value (FDR adjusted)"] = [f"{p:.4f}" for p in p_fdr]
    df_results["significance"] = ["*" if p < 0.00022 else "" for p in p_fdr]

    return df_results

df_z_gpt4o = compute_fishers_z_per_product(df_human, df_gpt4o, "GPT-4o")
df_z_gemini = compute_fishers_z_per_product(df_human, df_gemini, "Gemini")

In [25]:
df_z_gpt4o

Unnamed: 0,Product,Trait,Human ( r ),GPT-4o ( r ),z-statistics,p-value,alignment,p-value (FDR adjusted),significance
0,Cabin Luggage,Extraversion,0.1015,0.27,-2.380435,0.0173,Misaligned,0.0236,
1,Cabin Luggage,Agreeableness,0.2585,0.21,0.69815,0.4851,Aligned,0.5597,
2,Cabin Luggage,Conscientiousness,0.0157,0.26,-3.405905,0.0007,Misaligned,0.0015,
3,Cabin Luggage,Neuroticism,0.0293,-0.0,0.398637,0.6902,Aligned,0.7395,
4,Cabin Luggage,Openness,-0.0302,0.55,-8.821785,0.0,Misaligned,0.0,*
5,Packing Cubes,Extraversion,0.1717,0.34,-2.457442,0.014,Misaligned,0.021,
6,Packing Cubes,Agreeableness,0.3926,0.23,2.457523,0.014,Misaligned,0.021,
7,Packing Cubes,Conscientiousness,-0.0288,0.23,-3.577152,0.0003,Misaligned,0.0009,
8,Packing Cubes,Neuroticism,-0.0009,0.0,-0.012241,0.9902,Aligned,0.9902,
9,Packing Cubes,Openness,0.0443,0.52,-7.236128,0.0,Misaligned,0.0,*


In [26]:
df_z_gemini

Unnamed: 0,Product,Trait,Human ( r ),Gemini ( r ),z-statistics,p-value,alignment,p-value (FDR adjusted),significance
0,Cabin Luggage,Extraversion,0.1015,0.18,-1.089912,0.2758,Aligned,0.3761,
1,Cabin Luggage,Agreeableness,0.2585,0.24,0.268306,0.7885,Aligned,0.7885,
2,Cabin Luggage,Conscientiousness,0.0157,0.28,-3.699339,0.0002,Misaligned,0.0008,
3,Cabin Luggage,Neuroticism,0.0293,0.08,-0.691811,0.4891,Aligned,0.6114,
4,Cabin Luggage,Openness,-0.0302,0.29,-4.47183,0.0,Misaligned,0.0,*
5,Packing Cubes,Extraversion,0.1717,0.26,-1.260729,0.2074,Aligned,0.3457,
6,Packing Cubes,Agreeableness,0.3926,0.22,2.600792,0.0093,Misaligned,0.0199,
7,Packing Cubes,Conscientiousness,-0.0288,0.19,-3.007899,0.0026,Misaligned,0.0078,
8,Packing Cubes,Neuroticism,-0.0009,0.14,-1.929036,0.0537,Misaligned,0.1007,
9,Packing Cubes,Openness,0.0443,0.35,-4.367633,0.0,Misaligned,0.0,*


<h1>Compute Fisher z-test per prodcut & per model </h1>

In [4]:
import numpy as np
import scipy.stats as stats
import pandas as pd
from statsmodels.stats.multitest import multipletests

correlations_human = {
    "Cabin Luggage": {"Extraversion": 0.10, "Agreeableness": 0.26, "Conscientiousness": 0.02, "Neuroticism": 0.03, "Openness": -0.03},
    "Packing Cubes": {"Extraversion": 0.17, "Agreeableness": 0.39, "Conscientiousness": -0.03, "Neuroticism": -0.00, "Openness": 0.04},
    "Water Bottle": {"Extraversion": 0.19, "Agreeableness": 0.26, "Conscientiousness": 0.05, "Neuroticism": -0.17, "Openness": -0.03}
}

correlations_gpt4o = {
    "Cabin Luggage": {"Extraversion": 0.27, "Agreeableness": 0.21, "Conscientiousness": 0.26, "Neuroticism": -0.00, "Openness": 0.55},
    "Packing Cubes": {"Extraversion": 0.34, "Agreeableness": 0.23, "Conscientiousness": 0.23, "Neuroticism": 0.00, "Openness": 0.52},
    "Water Bottle": {"Extraversion": 0.64, "Agreeableness": 0.17, "Conscientiousness": 0.27, "Neuroticism": 0.08, "Openness": 0.57}
}

correlations_gemini = {
    "Cabin Luggage": {"Extraversion": 0.18, "Agreeableness": 0.24, "Conscientiousness": 0.28, "Neuroticism": 0.08, "Openness": 0.29},
    "Packing Cubes": {"Extraversion": 0.26, "Agreeableness": 0.22, "Conscientiousness": 0.19, "Neuroticism": 0.14, "Openness": 0.35},
    "Water Bottle": {"Extraversion": 0.37, "Agreeableness": 0.18, "Conscientiousness": 0.09, "Neuroticism": -0.21, "Openness": 0.42}
}


df_human = pd.DataFrame(correlations_human).T
df_gpt4o = pd.DataFrame(correlations_gpt4o).T
df_gemini = pd.DataFrame(correlations_gemini).T


n_human = 373
n_synthetic = 373


def fishers_z_test(r1, r2, n1, n2):
    """Compute Fisher's Z-test for difference in correlations."""
    z1 = 0.5 * np.log((1 + r1) / (1 - r1))
    z2 = 0.5 * np.log((1 + r2) / (1 - r2))
    se_diff = np.sqrt(1 / (n1 - 3) + 1 / (n2 - 3))
    z_score = (z1 - z2) / se_diff
    p_value = 2 * (1 - stats.norm.cdf(abs(z_score)))
    return z_score, p_value


def compute_fishers_z_per_product(df_human, df_model, model_name):
    results = {}

    for product in df_human.index:
        traits = df_human.columns
        z_scores, p_values = [], []

        for trait in traits:
            z_score, p_value = fishers_z_test(
                df_human.loc[product, trait],
                df_model.loc[product, trait],
                n_human, n_synthetic
            )
            z_scores.append(z_score)
            p_values.append(p_value)

        _, p_values_fdr, _, _ = multipletests(p_values, alpha=0.00022, method='fdr_bh')

        df_product = pd.DataFrame({
            "Trait": traits,
            "Human Correlation": df_human.loc[product].values,
            f"{model_name} Correlation": df_model.loc[product].values, "P-Value (Fisher's Z-Test, FDR Adjusted)": [f"{p:.6f}" for p in p_values_fdr],
            "Z-Score": [f"{z:.3f}" for z in z_scores],
            "Significance": ["*" if p < 0.00022 else "" for p in p_values_fdr]
        })

        results[product] = df_product  # Store result

    return results

df_gpt4o_per_product = compute_fishers_z_per_product(df_human, df_gpt4o, "GPT-4o")
df_gemini_per_product = compute_fishers_z_per_product(df_human, df_gemini, "Gemini")

for product in df_human.index:
    print(f"\nDiagonal Correlations for {product}: Human vs GPT-4o")
    print(df_gpt4o_per_product[product].to_string(index=False))

    print(f"\nDiagonal Correlations for {product}: Human vs Gemini Flash-1.5")
    print(df_gemini_per_product[product].to_string(index=False))



Diagonal Correlations for Cabin Luggage: Human vs GPT-4o
            Trait  Human Correlation  GPT-4o Correlation P-Value (Fisher's Z-Test, FDR Adjusted) Z-Score Significance
     Extraversion               0.10                0.27                                0.027247  -2.401             
    Agreeableness               0.26                0.21                                0.589389   0.720             
Conscientiousness               0.02                0.26                                0.002039  -3.347             
      Neuroticism               0.03               -0.00                                0.683151   0.408             
         Openness              -0.03                0.55                                0.000000  -8.819            *

Diagonal Correlations for Cabin Luggage: Human vs Gemini Flash-1.5
            Trait  Human Correlation  Gemini Correlation P-Value (Fisher's Z-Test, FDR Adjusted) Z-Score Significance
     Extraversion               0.10            