In [2]:
import pandas as pd
import numpy as np
from pyvttbl import DataFrame
from pyvttbl.stats import Anova
import os

##### SET STUDY VARIABLES
## Between-factors >> group on....
# Dep: Exptertise, FATR: Study
human = True
human = False

FATR = False
FATR = True


if FATR:
    print("** FATR Study **")
    source = os.path.join("data","fatr_students.csv")
    bfactor = "Study"
else:
    print("** Depend Study **")
    source = os.path.join("data","depend_only.csv")
    bfactor = "Expertise"


temp_csv_path = 'temp_cleaned_data.csv'

## Set DVs >> process multiple in one script, or just one at a time; 
dv_cols = ['NasaTLX','Decision','Diagnosis Time','Cooper Harper']
dv_cols = ['Cooper Harper']  
dv_cols = ['NasaTLX']
dv_cols = ['Diagnosis Time']  
dv_cols = ['Decision']

# Create a pyvttbl.DataFrame from the cleaned pandas DataFrame
df = DataFrame()
df_pandas = pd.read_csv(source)

# Calculate NasaTLX if this is the DV for analysis
if dv_cols[0] != "NasaTLX":
    ## Drop rows with missing values in relevant columns
    # CAUTION! If dv_cols list contains multiple columns, then columns missing row values will obliterate other columns' row values
    df_pandas = df_pandas.dropna(subset=dv_cols)
    temp_csv_path = 'temp_cleaned_data.csv'
    df_pandas.to_csv(temp_csv_path, index=False)
else:
    print("create NasaTLX composit measure...")
    # Invert 'Performance' scores
    df_pandas['Performance'] = 11 - df_pandas['Performance']
    columns = ['Performance', 'Temporal Demand', 'Frustration Level', 'Mental Demand', 'Effort']
    for column in columns:
        median = df_pandas[column].median()
        df_pandas[column].fillna(median)
    # Calculate the NasaTLX composite score
    df_pandas['NasaTLX'] = df_pandas[columns].mean(axis=1)
    df_pandas.to_csv(temp_csv_path, index=False)

df.read_tbl(temp_csv_path)


# Initialize dictionaries to store ANOVA results and marginal means for two-way interactions
two_way_anova_results = {}
two_way_marginal_means = {}

# Just before running ANOVA, for debugging:
# print(f"Data preview:\n{df_pandas.head()}")
# print(f"Checking for NaNs in factors and DV: {df_pandas[['Scenario', 'Fault', bfactor, dv]].isna().sum()}")

    
for dv in dv_cols:
    print(f"Running ANOVA for: {dv}")
    
    aov = Anova()
    aov.run(df, dv, wfactors=['Scenario', 'Fault'], bfactors=[bfactor])   
    aov.truncate(test='gg')

    for dv in dv_cols:
        print(f"Running ANOVA for: {dv}")
        
        aov = Anova()
        aov.run(df, dv, wfactors=['Scenario', 'Fault'], bfactors=[bfactor])   
        aov.truncate(test='gg')
    
        if not human:
            # Assuming 'human' controls the output format
            anova_results = dict(aov)
            print("ANOVA Results:", anova_results)
            
            marginal_means = aov.get_marginal_means(['Scenario', 'Fault', bfactor])
            print(f"Marginal Means for: {dv}")
            print(marginal_means)  # Check if this prints expected results or is empty
            
            if not marginal_means[0]:  # Assuming marginal_means is a tuple of dicts
                print("Marginal means calculation failed or returned empty.")
            print("\n\n")


** FATR Study **
Running ANOVA for: Decision
('Scenario',)
('Study',)
('Fault', 'Study')
('Scenario', 'Fault', 'Study')
Running ANOVA for: Decision
('Scenario',)
('Study',)
('Fault', 'Study')
('Scenario', 'Fault', 'Study')
ANOVA Results: {('Fault',): {'y2': array([0.66666667, 0.83333333]), 'df': 1.0, 'eps_gg': 1.0, 'eps_hf': 1.0, 'eps_lb': 1.0, 'ss': 2.666666666666666, 'mss': 2.666666666666666, 'dfe': 22.0, 'sse': 4.791666666666667, 'mse': 0.2178030303030303, 'F': 12.243478260869562, 'p': 0.0020286288906186467, 'eta': 0.17391304347826084, 'obs': 48.0, 'critT': 2.073873067904015, 'se': 0.07127503522131787, 'ci': 0.139699069033783, 'lambda': 26.713043478260865, 'power': 0.9985217320274492, 'df_gg': 1.0, 'dfe_gg': 22.0, 'mss_gg': 2.666666666666666, 'mse_gg': 0.2178030303030303, 'F_gg': 12.243478260869562, 'p_gg': 0.0020286288906186467, 'obs_gg': 48.0, 'critT_gg': 2.073873067904015, 'se_gg': 0.07127503522131787, 'ci_gg': 0.139699069033783, 'lambda_gg': 26.713043478260865, 'power_gg': 0.998

In [9]:
def extract_interaction_data(anova_results, marginal_means):
    interactions = {}
    for key in anova_results:
        if len(key) == 2 and key != ('SUBJECT',):
            interactions[key] = anova_results[key]

    report = "Two-Way Interactions:\n"
    for i, (key, value) in enumerate(interactions.items(), start=1):
        df = value.get('df', 'N/A')
        dfe = value.get('dfe', 'N/A')
        f_value = value.get('F', 'N/A')
        p_value = value.get('p', 'N/A')
        eta_squared = value.get('eta', 'N/A')
        report += f"{i}. {' * '.join(key)}: F({df}, {dfe}) = {f_value}, p = {p_value}, η² = {eta_squared}\n"

    report += "\nMarginal Means and 95% CI:\n"
    for key in interactions:
        interaction_key = ''.join(key)
        if interaction_key in marginal_means[0]:
            report += f"\n{' * '.join(key)}:\n"
            for i, condition in enumerate(marginal_means[2][interaction_key]):
                mean = marginal_means[0][interaction_key][i]
                se = marginal_means[1][interaction_key][i]
                ci_lower = mean - 1.96 * se
                ci_upper = mean + 1.96 * se
                report += f"- {', '.join(condition)}: M = {mean:.3f}, 95% CI [{ci_lower:.3f}, {ci_upper:.3f}]\n"

    return report

# Assuming you have the anova_results and marginal_means dictionaries available
# print(marginal_means)
report = extract_interaction_data(anova_results, marginal_means)
print(report)

Two-Way Interactions:
1. Scenario * Fault: F(1.0, 22.0) = 6.9620253164556924, p = 0.01500386028216144, η² = 0.07598784194528874
2. Scenario * Study: F(1.0, 22.0) = 5.966101694915253, p = 0.023076638811863698, η² = 0.049999999999999996
3. Scenario * SUBJECT: F(22.0, N/A) = N/A, p = N/A, η² = N/A
4. Fault * SUBJECT: F(22.0, N/A) = N/A, p = N/A, η² = N/A

Marginal Means and 95% CI:

Scenario * Fault:
- LOFW, Spoof: M = 0.750, 95% CI [0.573, 0.927]
- LOFW, True Fault: M = 0.875, 95% CI [0.740, 1.010]
- SGTR, Spoof: M = 0.417, 95% CI [0.215, 0.618]
- SGTR, True Fault: M = 0.958, 95% CI [0.877, 1.040]

Scenario * Study:
- LOFW, Dependency: M = 0.958, 95% CI [0.877, 1.040]
- LOFW, FATR: M = 0.667, 95% CI [0.474, 0.859]
- SGTR, Dependency: M = 0.667, 95% CI [0.474, 0.859]
- SGTR, FATR: M = 0.708, 95% CI [0.523, 0.894]



In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Example data for 'Scenario' and 'Fault' interaction
scenarios = ['LOFW', 'SGTR']
faults = ['Spoof', 'True Fault']
means = np.array([0.75, 0.875, 0.41666667, 0.95833333])  # Example means
errors = np.array([0.09028939, 0.06895966, 0.10279899, 0.04166667])  # Example errors

# Reshape for plotting
means = means.reshape((2, 2))
errors = errors.reshape((2, 2))

# Plotting
fig, ax = plt.subplots()
for i, fault in enumerate(faults):
    ax.errorbar(scenarios, means[:, i], yerr=errors[:, i], label=fault, fmt='-o')

ax.set_xlabel('Scenario')
ax.set_ylabel('Marginal Mean Decision')
ax.set_title('Interaction of Scenario and Fault on Decision')
ax.legend()

plt.show()



https://claude.ai/chat/460a76bc-c646-4393-9806-431fceb7bdb9