In [89]:
import os, glob, re, csv
import numpy as np
import nibabel as nib
import statsmodels.api as sm
from nilearn import datasets
from nilearn.image import resample_to_img

from scipy.stats import ttest_1samp

In [90]:
# Load the BOLD cleaned image
bold_img = nib.load('/Volumes/Passport/fmriprep/derivatives/pieMan_cleaned/sub-002/func/sub-002_task-pieman_run-1_cleaned_desc-masked_bold.nii.gz')

# Print voxel size (spatial resolution) and TR (temporal resolution)
zooms = bold_img.header.get_zooms()
print(f"Voxel size (mm): {zooms[:3]}")
print(f"TR (s): {zooms[3]}")
print(f"Shape {bold_img.shape}")

Voxel size (mm): (3.0, 3.0, 4.0)
TR (s): 1.0
Shape (65, 77, 49, 160)


In [151]:
# SET MAIN HYPERPARAMETERS
TRAIT_LABEL = "feeling Gloomy"  

# Our 13 trait labels 
# TRAIT_LABELS = ["Open-minded", "feeling Affectionate", "Attentive", "Assertive", "feeling Gloomy", "feeling Peaceful", "Agreeable", "Judging", "feeling Angry", "feeling Bewildered", "Impulsive", "Self-disciplined", "Contemplating"]

TRAIT_LABEL_SAVE_STRING = TRAIT_LABEL.replace(" ", "_").replace("-", "_")
STIMULUS_LABEL_SAVE_STRING = "pieman"

In [152]:
# ──────────────────────────────────────────────────────────────
# 0) PATHS & I/O
# ──────────────────────────────────────────────────────────────
root_dir  = "/Volumes/Passport/fmriprep"          # ←  same as in cleaning script
deriv_dir = os.path.join(root_dir, "derivatives") #   (don’t hard-code “subjects” yet)



# output from your behaviour-model RSA
model_rdm = np.load(
    os.path.join(deriv_dir, "RDMs_behavior", f"{STIMULUS_LABEL_SAVE_STRING}_{TRAIT_LABEL_SAVE_STRING}_RDM.npy"))

In [153]:
# ──────────────────────────────────────────────────────────────
# 1) SUBJECT / RUN FILTERS  (copy-paste verbatim)  ─────────────
# ──────────────────────────────────────────────────────────────
exclude_subs = {
    "sub-001","sub-021","sub-022","sub-038","sub-056","sub-068","sub-069"
}
exclude_sub_runs = {
    ("sub-002","2"),("sub-003","2"),("sub-004","2"),("sub-005","2"),("sub-006","2"),
    ("sub-008","2"),("sub-010","2"),("sub-011","2"),("sub-012","2"),("sub-013","2"),
    ("sub-014","2"),("sub-015","2"),("sub-016","2")
}
target_subject = None     # e.g. "sub-002" to run a single person


In [154]:
# ----------------------------------------------------------------
# 2)  BUILD SUBJECT LIST  (from cleaned derivatives)  ------------
# ----------------------------------------------------------------
cleaned_root = os.path.join(deriv_dir, f"{STIMULUS_LABEL_SAVE_STRING}_cleaned")
all_subs     = sorted(
    d for d in os.listdir(cleaned_root) if d.startswith("sub-")
)
if target_subject:
    if target_subject not in all_subs:
        raise ValueError(f"{target_subject} not found in {cleaned_root}")
    subjects = [target_subject]
else:
    subjects = [s for s in all_subs if s not in exclude_subs]

print("Subjects to process →", ", ".join(subjects))

Subjects to process → sub-002, sub-003, sub-004, sub-005, sub-006, sub-007, sub-008, sub-009, sub-010, sub-011, sub-012, sub-013, sub-014, sub-015, sub-016, sub-017, sub-018, sub-019, sub-020, sub-023, sub-024, sub-025, sub-026, sub-027, sub-028, sub-029, sub-030, sub-031, sub-032, sub-033, sub-034, sub-035, sub-036, sub-037, sub-039, sub-040, sub-041, sub-042, sub-043, sub-044, sub-045, sub-046, sub-047, sub-048, sub-049, sub-050, sub-051, sub-052, sub-053, sub-054, sub-055, sub-057, sub-058, sub-059, sub-060, sub-061, sub-062, sub-063, sub-064, sub-065, sub-066, sub-067, sub-070, sub-071, sub-072, sub-073, sub-074, sub-075, sub-076, sub-077, sub-078, sub-079, sub-080, sub-081, sub-082


In [155]:
# ──────────────────────────────────────────────────────────────
# 3) FETCH SCHAEFER ATLAS  ─────────────────────────────────────
# ──────────────────────────────────────────────────────────────

# Schaefer parcel/atlas parameters
n_rois = 200
yeo_networks = 17
resolution_mm = 2                   # resolution of your Schaefer atlas (double check!)

schaefer    = datasets.fetch_atlas_schaefer_2018(
                 n_rois=n_rois,
                 yeo_networks=yeo_networks,
                 resolution_mm=resolution_mm
             )
atlas_img   = nib.load(schaefer['maps'])  # default 2mm MNI - but our images 3x3x4 (Pieman and others) OR 2.5^3 (ie., Black and Forgot)

atlas_resampled = resample_to_img(atlas_img, bold_img, interpolation='nearest')
atlas_data     = atlas_resampled.get_fdata()



# Change Schaeffer Labels so 0 is whole brain and 1 corresponds to 1st ROI
labels = schaefer['labels']
# change to string and remove excess
labels = [l.replace(b'17Networks_', b'').decode('utf-8') for l in labels]
# Prepend background label
labels = np.insert(labels, 0, "Background")

In [156]:
# ──────────────────────────────────────────────────────────────
# 4) Define OLS function  ─────────────────────────────────────
# ──────────────────────────────────────────────────────────────

def rsa_ols(neural_rdm: np.ndarray, model_rdm: np.ndarray):
    """OLS RSA identical in logic and naming to the manual example."""
    # ------------------------------------------------------------------
    # EXTRACT ONE TRIANGLE (lower inc. diagonal)            ↳ k = -1
    # ------------------------------------------------------------------
    i_low = np.tril_indices(160, k=-1)

    y = neural_rdm[i_low]                         # dependent variable
    X = model_rdm[i_low][:, None]      # predictor (N × 1)
    X = sm.add_constant(X)  # add intercept column

    # ------------------------------------------------------------------
    # OLS FIT (statsmodels)                                         
    # ------------------------------------------------------------------
    model = sm.OLS(y, X).fit()

    beta = float(model.params[1])                 # coefficient for x1
    r2   = float(model.rsquared)
    return beta, r2

# ──────────────────────────────────────────────────────────────
# 5) Define MAIN per-subject function  (adds run-skip check)  ─────────
# ──────────────────────────────────────────────────────────────
def run_parcel_rsa_for_subject(sub):
    func_dir   = os.path.join(cleaned_root, sub, "func")
    run_pat    = os.path.join(func_dir,
                  f"{sub}_task-{STIMULUS_LABEL_SAVE_STRING}_run-*_*cleaned_desc-masked_bold.nii.gz")
    single_pat = os.path.join(func_dir,
                  f"{sub}_task-{STIMULUS_LABEL_SAVE_STRING}_cleaned_desc-masked_bold.nii.gz")
    bold_files = sorted(glob.glob(run_pat)) + sorted(glob.glob(single_pat))
    if not bold_files:
        print(f"⏩ {sub}: no cleaned runs")
        return

    rows = []
    for bf in bold_files:
        m   = re.search(r"_run-(\d+)_", os.path.basename(bf))
        run = m.group(1) if m else None
        if run and (sub, run) in exclude_sub_runs:
            print(f"   • skipping {sub} run-{run} (in exclusion list)")
            continue

        bold_img  = nib.load(bf)
        bold_data = bold_img.get_fdata()

        atlas_resampled = resample_to_img(atlas_img, bold_img, interpolation="nearest")
        atlas_data      = atlas_resampled.get_fdata()

        for parcel_id in range(1, n_rois + 1):
            mask = atlas_data == parcel_id
            if not mask.any(): continue
            rdm   = 1 - np.corrcoef(bold_data[mask, :].T).astype(np.float32)
            beta, r2 = rsa_ols(rdm, model_rdm)
            rows.append([sub, run or "NA", parcel_id, beta, r2])
            
    # ---- write one CSV per subject ----
    out_dir  = os.path.join(deriv_dir, "RSA_stats")

    # add the stimulus‐specific subfolder (e.g. "pieman")
    stim_folder = os.path.join(out_dir, STIMULUS_LABEL_SAVE_STRING)
    os.makedirs(stim_folder, exist_ok=True)

    # add the trait‐specific subfolder (e.g. "feeling_Affectionate")
    trait_folder = os.path.join(stim_folder, TRAIT_LABEL_SAVE_STRING)
    os.makedirs(trait_folder, exist_ok=True)

    out_csv = os.path.join(trait_folder, f"{sub}_{STIMULUS_LABEL_SAVE_STRING}_{TRAIT_LABEL_SAVE_STRING}_parcel_RSA.csv")
    with open(out_csv, "w", newline="") as f:
        csv.writer(f).writerows([["subject","run","parcel","beta","r2"]] + rows)
    print(f"✅ {sub}: {len(rows)} rows → {out_csv}")


In [157]:
# ----------------------------------------------------------------------
# 6. CALL FUNCTION FOR EACH SUBJECT
# ----------------------------------------------------------------------
for sub in subjects:
    run_parcel_rsa_for_subject(sub)

print("\nALL DONE 🎉")

✅ sub-002: 200 rows → /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/feeling_Gloomy/sub-002_pieman_feeling_Gloomy_parcel_RSA.csv
✅ sub-003: 200 rows → /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/feeling_Gloomy/sub-003_pieman_feeling_Gloomy_parcel_RSA.csv
✅ sub-004: 200 rows → /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/feeling_Gloomy/sub-004_pieman_feeling_Gloomy_parcel_RSA.csv
✅ sub-005: 200 rows → /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/feeling_Gloomy/sub-005_pieman_feeling_Gloomy_parcel_RSA.csv
✅ sub-006: 200 rows → /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/feeling_Gloomy/sub-006_pieman_feeling_Gloomy_parcel_RSA.csv
✅ sub-007: 200 rows → /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/feeling_Gloomy/sub-007_pieman_feeling_Gloomy_parcel_RSA.csv
✅ sub-008: 200 rows → /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/feeling_Gloomy/sub-008_pieman_feeling_Gloomy_parcel_RSA.csv
✅ sub-009: 200 rows → /Volumes/Pas

In [158]:
import pandas as pd
rsa_df = pd.read_csv(f'/Volumes/Passport/fmriprep/derivatives/RSA_stats/{STIMULUS_LABEL_SAVE_STRING}/{TRAIT_LABEL_SAVE_STRING}/sub-002_{STIMULUS_LABEL_SAVE_STRING}_{TRAIT_LABEL_SAVE_STRING}_parcel_RSA.csv')

# identify parcel with max R²
best_row = rsa_df.loc[rsa_df['r2'].idxmax()]
best_parcel = int(best_row['parcel'])
print(f"Best parcel  ➜  {best_parcel:03d}  (R² = {best_row['r2']:.6f})")

Best parcel  ➜  162  (R² = 0.000326)


In [159]:
print(rsa_df)

     subject  run  parcel      beta        r2
0    sub-002    1       1 -0.000267  0.000015
1    sub-002    1       2  0.000239  0.000006
2    sub-002    1       3 -0.000121  0.000002
3    sub-002    1       4  0.000244  0.000006
4    sub-002    1       5 -0.000511  0.000046
..       ...  ...     ...       ...       ...
195  sub-002    1     196 -0.000872  0.000182
196  sub-002    1     197 -0.000568  0.000082
197  sub-002    1     198 -0.000869  0.000154
198  sub-002    1     199 -0.000673  0.000085
199  sub-002    1     200 -0.000699  0.000102

[200 rows x 5 columns]


In [160]:
# concatenate all dfs from all subjects into 1 big df
all_df = pd.DataFrame()

for sub in subjects:
    temp_df = pd.read_csv(f'/Volumes/Passport/fmriprep/derivatives/RSA_stats/{STIMULUS_LABEL_SAVE_STRING}/{TRAIT_LABEL_SAVE_STRING}/{sub}_{STIMULUS_LABEL_SAVE_STRING}_{TRAIT_LABEL_SAVE_STRING}_parcel_RSA.csv')
    all_df = pd.concat([all_df, temp_df], ignore_index=True)

all_df

Unnamed: 0,subject,run,parcel,beta,r2
0,sub-002,1.0,1,-0.000267,0.000015
1,sub-002,1.0,2,0.000239,0.000006
2,sub-002,1.0,3,-0.000121,0.000002
3,sub-002,1.0,4,0.000244,0.000006
4,sub-002,1.0,5,-0.000511,0.000046
...,...,...,...,...,...
14995,sub-082,,196,-0.002080,0.003557
14996,sub-082,,197,-0.002024,0.002797
14997,sub-082,,198,-0.002013,0.002160
14998,sub-082,,199,-0.002013,0.001928


In [161]:
all_df.subject.nunique()

75

In [162]:
t_values = []
p_values = []

for parcel in range(1,n_rois+1):
    parcel_df = all_df[all_df['parcel'] == parcel]
    assert len(parcel_df) == 75, f"There should be 75 subjects data per parcel, there is only {len(parcel_df)}"

    # Perform t-test
    t, p = ttest_1samp(parcel_df['beta'], 0, alternative='greater')
    t_values.append(t)
    p_values.append(p)



In [163]:
np.min(p_values)
# np.min(t_values)

1.0

In [164]:
np.min(t_values)

-85.35315605832639

In [165]:
# 1) build a summary DataFrame
summary_df = pd.DataFrame({
    "parcel": np.arange(1, n_rois + 1),
    "t_value": t_values,
    "p_value": p_values
})

# 2) make sure the trait folder exists
output_folder = os.path.join(
    deriv_dir,
    "RSA_stats",
    STIMULUS_LABEL_SAVE_STRING,
    TRAIT_LABEL_SAVE_STRING
)
os.makedirs(output_folder, exist_ok=True)

# 3) write it out
outfile = os.path.join(
    output_folder,
    f"group_stats_{STIMULUS_LABEL_SAVE_STRING}_{TRAIT_LABEL_SAVE_STRING}.csv"
)
summary_df.to_csv(outfile, index=False)

print(f"✅ Saved group stats for {TRAIT_LABEL} →\n   {outfile}")

✅ Saved group stats for feeling Gloomy →
   /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/feeling_Gloomy/group_stats_pieman_feeling_Gloomy.csv
