# Behavioral manipulation check: mean proportion of wins (low / mid / high)

This notebook reproduces the **behavioral** manipulation check from the MATLAB script:
- Compute each subject’s mean win rate per task (task==1/2/3), using only valid trials (`early==0 & invalid==0`).
- Run a **one-way repeated-measures ANOVA** on the subject×condition matrix (N×3) using your existing `rm_anova_oneway`.
- Print per-condition mean and 95% CI (t-based, like MATLAB).

✅ You only need to edit the import line for `rm_anova_oneway` and set `BIDS_ROOT`.


In [10]:
# --- USER SETTINGS ---
# Path to your BIDS dataset root (the folder that contains sub-XX/beh/...).
BIDS_ROOT = r"/Users/xuyg/GitHub/EEG_Reward-Processing_ERP/ds004147"  # TODO: change me

# Optional: restrict to a subset of subjects (strings or ints). Example:
# SUBJECTS = [27, 28, 31]
SUBJECTS = None  # None = auto-detect all beh.tsv under BIDS_ROOT

# Optional: save a CSV summary
SAVE_CSV = True
OUT_CSV = r"behavior_task_winrates.csv"  # saved in the current working directory


In [11]:
# --- IMPORTS ---
import os
from pathlib import Path
import numpy as np
import pandas as pd
from scipy import stats
import sys

# --- IMPORT YOUR EXISTING FUNCTION HERE ---
# Change this line to wherever rm_anova_oneway lives in your repo.
# Examples you might use:
# from stats.rewp_robust import rm_anova_oneway
# from stats.s11b_stats_utils import rm_anova_oneway
# from s11b_stats import rm_anova_oneway
from stats.rewp_parametric import rm_anova_oneway  # TODO: edit if needed




In [12]:
def _coerce_outcome_to_01(outcome: pd.Series) -> np.ndarray:
    """Ensure outcome is 0/1.
    - If outcome is already {0,1}, keep.
    - If outcome is {-1,1}, map 1->1, -1->0.
    - Otherwise: treat outcome>0 as win.
    """
    x = outcome.to_numpy(dtype=float)
    x = x[np.isfinite(x)]
    u = set(np.unique(x).tolist())
    if u.issubset({0, 1}):
        return outcome.to_numpy(dtype=float)
    if u.issubset({-1, 1}):
        return (outcome.to_numpy(dtype=float) == 1).astype(float)
    return (outcome.to_numpy(dtype=float) > 0).astype(float)


def mean_ci_t(x, alpha=0.05):
    """Mean and t-based CI (like MATLAB tinv) for a 1D vector."""
    x = np.asarray(x, float)
    x = x[np.isfinite(x)]
    n = x.size
    if n == 0:
        return np.nan, (np.nan, np.nan), 0
    m = float(np.mean(x))
    if n < 2:
        return m, (np.nan, np.nan), n
    sd = float(np.std(x, ddof=1))
    tval = float(stats.t.ppf(1 - alpha/2, df=n-1))
    ci = tval * sd / np.sqrt(n)
    return m, (m - ci, m + ci), n


def compute_task_winrates_from_beh(beh_path: str | Path):
    """Return one subject's mean win rate for low/mid/high tasks (valid trials only)."""
    beh_path = Path(beh_path)
    df = pd.read_csv(beh_path, sep='\t')
    required = ['task', 'early', 'invalid', 'outcome']
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise ValueError(f"{beh_path} missing columns: {missing}")

    valid = (df['early'] == 0) & (df['invalid'] == 0)
    out01 = _coerce_outcome_to_01(df['outcome'])

    def _grab(task_id: int):
        mask = (df['task'] == task_id) & valid
        vals = out01[mask.to_numpy()]
        return float(np.nanmean(vals)) if vals.size else np.nan, int(vals.size)

    low, n_low = _grab(1)
    mid, n_mid = _grab(2)
    high, n_high = _grab(3)

    return {
        'low': low, 'mid': mid, 'high': high,
        'n_low': n_low, 'n_mid': n_mid, 'n_high': n_high,
        'beh_path': str(beh_path),
    }


def collect_all_subject_task_winrates(bids_root: str | Path, subjects=None):
    """Collect low/mid/high winrates for all subjects (or a provided subset)."""
    bids_root = Path(bids_root)

    if subjects is None:
        beh_files = sorted(bids_root.rglob('sub-*_task-casinos_beh.tsv'))
    else:
        beh_files = []
        for s in subjects:
            s_str = f"{int(s):02d}" if str(s).isdigit() else str(s)
            beh_files.append(bids_root / f"sub-{s_str}" / 'beh' / f"sub-{s_str}_task-casinos_beh.tsv")
        beh_files = [p for p in beh_files if p.exists()]

    if not beh_files:
        raise FileNotFoundError(f"No beh.tsv found under {bids_root}")

    rows = []
    for p in beh_files:
        # subject id from path parts like .../sub-27/beh/...
        sub = next((x.replace('sub-','') for x in p.parts if x.startswith('sub-')), None)
        res = compute_task_winrates_from_beh(p)
        rows.append({'subject': sub, **res})

    df = pd.DataFrame(rows).sort_values('subject').reset_index(drop=True)
    return df


In [13]:
# --- LOAD & COMPUTE WINRATES ---
df = collect_all_subject_task_winrates(BIDS_ROOT, subjects=SUBJECTS)
display(df.head())

X = df[['low', 'mid', 'high']].to_numpy(float)
mask = np.all(np.isfinite(X), axis=1)
df_use = df.loc[mask].reset_index(drop=True)
X_use = X[mask]

print(f"Subjects with complete low/mid/high winrates: {df_use.shape[0]} / {df.shape[0]}")


Unnamed: 0,subject,low,mid,high,n_low,n_mid,n_high,beh_path
0,27,0.507042,0.56338,0.566434,142,142,143,/Users/xuyg/GitHub/EEG_Reward-Processing_ERP/d...
1,28,0.534722,0.661972,0.638889,144,142,144,/Users/xuyg/GitHub/EEG_Reward-Processing_ERP/d...
2,29,0.44086,0.507812,0.5,93,128,120,/Users/xuyg/GitHub/EEG_Reward-Processing_ERP/d...
3,30,0.453237,0.565217,0.492647,139,138,136,/Users/xuyg/GitHub/EEG_Reward-Processing_ERP/d...
4,31,0.485915,0.559441,0.721429,142,143,140,/Users/xuyg/GitHub/EEG_Reward-Processing_ERP/d...


Subjects with complete low/mid/high winrates: 12 / 12


In [14]:
# --- REPORT MEANS + 95% CI (like MATLAB) ---
for name in ['low', 'mid', 'high']:
    m, (lo, hi), n = mean_ci_t(df_use[name].to_numpy(float))
    m, lo, hi = m*100, lo*100, hi*100
    print(f"{name}-value: {m:.2f}%, 95% CI [{lo:.2f}, {hi:.2f}] (n={n})")


low-value: 48.60%, 95% CI [46.71, 50.50] (n=12)
mid-value: 59.03%, 95% CI [55.78, 62.28] (n=12)
high-value: 60.00%, 95% CI [54.08, 65.92] (n=12)


In [15]:
# --- RUN ONE-WAY REPEATED-MEASURES ANOVA (your existing function) ---
anova_res = rm_anova_oneway(X_use)
print("\nReturned dict:", anova_res)

# Pretty one-liner similar to paper
df1, df2 = anova_res.get('df1'), anova_res.get('df2')
F, p = anova_res.get('F'), anova_res.get('p')
etap, etag = anova_res.get('partial_eta2'), anova_res.get('generalized_eta2')
if p is not None and np.isfinite(p) and p < 0.001:
    p_str = "< .001"
else:
    p_str = f"= {p:.4g}" if p is not None else "= ?"
print(f"F({df1},{df2}) = {F:.2f}, p {p_str}, ηp² = {etap:.2f}, ηg² = {etag:.2f}")


Normality var 1: met (p=0.8828)
Normality var 2: met (p=0.1956)
Normality var 3: met (p=0.9466)
RM ANOVA: F(2,22) = 14.08, p = 0.0001155
partial eta^2 = 0.5614
generalized eta^2 = 0.4168
Friedman: chi2 = 15.17, p = 0.0005089

Returned dict: {'F': 14.080137126897442, 'p': 0.00011552888001464812, 'df1': 2, 'df2': 22, 'partial_eta2': 0.5614059068200651, 'generalized_eta2': 0.4167876866125043, 'friedman_chi2': 15.166666666666657, 'friedman_p': 0.0005088621855732938, 'friedman_n': 12}
F(2,22) = 14.08, p < .001, ηp² = 0.56, ηg² = 0.42


In [16]:
# --- OPTIONAL: SAVE SUBJECT-LEVEL WINRATES ---
if SAVE_CSV:
    out = Path(OUT_CSV)
    df_use[['subject','low','mid','high','n_low','n_mid','n_high','beh_path']].to_csv(out, index=False)
    print("Saved:", out.resolve() if out.exists() else out)


Saved: /Users/xuyg/GitHub/EEG_Reward-Processing_ERP/scripts/behavior_task_winrates.csv
