In [1]:
# Data and statistical testing
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency, fisher_exact

In [None]:
# ---------------------------------------------------------------------------
# 1. Load data
# ---------------------------------------------------------------------------
data = pd.read_excel('data.xlsx')

# ---------------------------------------------------------------------------
# 2. Aim 3: Association of model characteristics with calibration/validation
#    For each (characteristic × procedure) we run Chi-square or Fisher's exact.
#    Fisher's exact is used only for 2×2 tables with any expected count < 5.
# ---------------------------------------------------------------------------
model_characteristics = [
    "type", "cardiometabolic", "publication", "data", "country", "tool", "open"
]
calibration_validation_vars = [
    "calib1", "calib2", "parameter", "target", "GOF", "GOF_Stat", "GOF_Visual",
    "search", "convergence", "stopping", "validation", "face", "verification",
    "cross", "external"
]

results = []
for characteristic in model_characteristics:
    for procedure in calibration_validation_vars:
        contingency_table = pd.crosstab(data[characteristic], data[procedure])
        test_used = ""
        p_value = None
        chi2 = None
        dof = None

        if contingency_table.shape == (2, 2):
            expected = chi2_contingency(contingency_table, correction=False)[3]
            if (expected < 5).any():
                test_used = "Fisher's Exact Test"
                _, p_value = fisher_exact(contingency_table)
            else:
                test_used = "Chi-Square Test"
                chi2, p_value, dof, _ = chi2_contingency(contingency_table, correction=False)
        else:
            test_used = "Chi-Square Test"
            chi2, p_value, dof, _ = chi2_contingency(contingency_table, correction=False)

        results.append({
            "Characteristic": characteristic,
            "Procedure": procedure,
            "Test": test_used,
            "Chi2": chi2,
            "p-value": p_value,
            "Degrees of Freedom": dof
        })

# ---------------------------------------------------------------------------
# 3. Save and display results
# ---------------------------------------------------------------------------
results_df = pd.DataFrame(results)
# results_df.to_excel("Results_Aim3.xlsx", index=False)