# Single Trait Analysis

In [1]:
import os, glob, re, csv
import numpy as np
import nibabel as nib
import statsmodels.api as sm
from nilearn.image import resample_to_img
import pandas as pd
from scipy.stats import ttest_1samp
from nilearn import plotting, datasets
import scipy.stats as stats
from scipy.optimize import nnls   



In [2]:
# Load the BOLD cleaned image
bold_img = nib.load('/Volumes/Passport/fmriprep/derivatives/pieMan_cleaned/sub-002/func/sub-002_task-pieman_run-1_cleaned_desc-masked_bold.nii.gz')

# Print voxel size (spatial resolution) and TR (temporal resolution)
zooms = bold_img.header.get_zooms()
print(f"Voxel size (mm): {zooms[:3]}")
print(f"TR (s): {zooms[3]}")
print(f"Shape {bold_img.shape}")

Voxel size (mm): (3.0, 3.0, 4.0)
TR (s): 1.0
Shape (65, 77, 49, 160)


In [3]:
# SET MAIN HYPERPARAMETERS
# TRAIT_LABEL = "Contemplating"  
ALL_TRAIT_LABELS = [
    "Open-minded","feeling Affectionate","Attentive","Assertive",
    "feeling Gloomy","feeling Peaceful","Agreeable","Judging",
    "feeling Angry","feeling Bewildered","Impulsive",
    "Self-disciplined","Contemplating"
]
ALL_TRAIT_SAVE_STRS = [t.replace(" ","_").replace("-","_")
                       for t in ALL_TRAIT_LABELS]
# Our 13 trait labels 
# ["Open-minded", "feeling Affectionate", "Attentive", "Assertive", "feeling Gloomy", "feeling Peaceful", "Agreeable", "Judging", "feeling Angry", "feeling Bewildered", "Impulsive", "Self-disciplined", "Contemplating"]

#TRAIT_LABEL_SAVE_STRING = TRAIT_LABEL.replace(" ", "_").replace("-", "_")
STIMULUS_LABEL_SAVE_STRING = "pieman"

In [4]:
# ──────────────────────────────────────────────────────────────
# 0) PATHS & I/O
# ──────────────────────────────────────────────────────────────
root_dir  = "/Volumes/Passport/fmriprep"          # ←  same as in cleaning script
deriv_dir = os.path.join(root_dir, "derivatives") #   (don’t hard-code “subjects” yet)



# output from your behaviour-model RSA
for trait_long, trait_save in zip(ALL_TRAIT_LABELS, ALL_TRAIT_SAVE_STRS):
    rdm_path = os.path.join(
        deriv_dir, "RDMs_behavior",
        f"{STIMULUS_LABEL_SAVE_STRING}_{trait_save}_RDM.npy"
    )
    model_rdm = np.load(rdm_path)

In [5]:
# ──────────────────────────────────────────────────────────────
# 1) SUBJECT / RUN FILTERS  (copy-paste verbatim)  ─────────────
# ──────────────────────────────────────────────────────────────
exclude_subs = {
    "sub-001","sub-021","sub-022","sub-038","sub-056","sub-068","sub-069"
}
exclude_sub_runs = {
    ("sub-002","2"),("sub-003","2"),("sub-004","2"),("sub-005","2"),("sub-006","2"),
    ("sub-008","2"),("sub-010","2"),("sub-011","2"),("sub-012","2"),("sub-013","2"),
    ("sub-014","2"),("sub-015","2"),("sub-016","2")
}
target_subject = None     # e.g. "sub-002" to run a single person


In [6]:
# ----------------------------------------------------------------
# 2)  BUILD SUBJECT LIST  (from cleaned derivatives)  ------------
# ----------------------------------------------------------------
cleaned_root = os.path.join(deriv_dir, f"{STIMULUS_LABEL_SAVE_STRING}_cleaned")
all_subs     = sorted(
    d for d in os.listdir(cleaned_root) if d.startswith("sub-")
)
if target_subject:
    if target_subject not in all_subs:
        raise ValueError(f"{target_subject} not found in {cleaned_root}")
    subjects = [target_subject]
else:
    subjects = [s for s in all_subs if s not in exclude_subs]

print("Subjects to process →", ", ".join(subjects))

Subjects to process → sub-002, sub-003, sub-004, sub-005, sub-006, sub-007, sub-008, sub-009, sub-010, sub-011, sub-012, sub-013, sub-014, sub-015, sub-016, sub-017, sub-018, sub-019, sub-020, sub-023, sub-024, sub-025, sub-026, sub-027, sub-028, sub-029, sub-030, sub-031, sub-032, sub-033, sub-034, sub-035, sub-036, sub-037, sub-039, sub-040, sub-041, sub-042, sub-043, sub-044, sub-045, sub-046, sub-047, sub-048, sub-049, sub-050, sub-051, sub-052, sub-053, sub-054, sub-055, sub-057, sub-058, sub-059, sub-060, sub-061, sub-062, sub-063, sub-064, sub-065, sub-066, sub-067, sub-070, sub-071, sub-072, sub-073, sub-074, sub-075, sub-076, sub-077, sub-078, sub-079, sub-080, sub-081, sub-082


In [7]:
# ──────────────────────────────────────────────────────────────
# 3) FETCH SCHAEFER ATLAS  ─────────────────────────────────────
# ──────────────────────────────────────────────────────────────

# Schaefer parcel/atlas parameters
n_rois = 200
yeo_networks = 17
resolution_mm = 2                   # resolution of your Schaefer atlas (double check!)

schaefer    = datasets.fetch_atlas_schaefer_2018(
                 n_rois=n_rois,
                 yeo_networks=yeo_networks,
                 resolution_mm=resolution_mm
             )
atlas_img   = nib.load(schaefer['maps'])  # default 2mm MNI - but our images 3x3x4 (Pieman and others) OR 2.5^3 (ie., Black and Forgot)

atlas_resampled = resample_to_img(atlas_img, bold_img, interpolation='nearest')
atlas_data     = atlas_resampled.get_fdata()



# Change Schaeffer Labels so 0 is whole brain and 1 corresponds to 1st ROI
labels = schaefer['labels']
# change to string and remove excess
labels = [l.replace(b'17Networks_', b'').decode('utf-8') for l in labels]
# Prepend background label
labels = np.insert(labels, 0, "Background")

In [8]:
# ──────────────────────────────────────────────────────────────
# 4) Define NNLS function  ─────────────────────────────────────
# ──────────────────────────────────────────────────────────────
def rsa_nnls(neural_rdm: np.ndarray, model_rdm: np.ndarray):
    """
    Non-Negative Least-Squares RSA.
    Returns
    -------
    beta : float   – coefficient for the model RDM (≥ 0) 
    r2   : float   – coefficient of determination for the NNLS fit
    """
    # ----------------------------------------------------------
    # 1) VECTORISE LOWER-TRIANGLE (k = -1 keeps diagonal out)
    # ----------------------------------------------------------
    i_low = np.tril_indices(160, k=-1)
    y = neural_rdm[i_low]                   # dependent variable, shape (N,)
    X = model_rdm[i_low][:, None]           # predictor,      shape (N,1)

    # add a constant column so the fit can pick up any baseline offset;
    # both columns are subject to the non-negativity constraint.
    X = np.column_stack([np.ones_like(y), X])   # shape (N,2)

    # ----------------------------------------------------------
    # 2) NNLS FIT
    # ----------------------------------------------------------
    coef, _ = nnls(X, y)            # coef[0] = intercept, coef[1] = beta (≥0)

    # ----------------------------------------------------------
    # 3) GOODNESS-OF-FIT (pseudo-R²)
    # ----------------------------------------------------------
    y_hat = X @ coef
    ss_res = np.sum((y - y_hat) ** 2)
    ss_tot = np.sum((y - y.mean()) ** 2)
    r2     = 1 - ss_res / ss_tot if ss_tot > 0 else 0.0

    beta = float(coef[1])
    return beta, float(r2)

# ──────────────────────────────────────────────────────────────
# 5) Single Trait per-subject function  (adds run-skip check)  ─────────
# ──────────────────────────────────────────────────────────────
def run_parcel_rsa_for_subject(sub: str,
        trait_save: str,
        model_rdm: np.ndarray):
    func_dir   = os.path.join(cleaned_root, sub, "func")
    run_pat    = os.path.join(func_dir,
                  f"{sub}_task-{STIMULUS_LABEL_SAVE_STRING}_run-*_*cleaned_desc-masked_bold.nii.gz")
    single_pat = os.path.join(func_dir,
                  f"{sub}_task-{STIMULUS_LABEL_SAVE_STRING}_cleaned_desc-masked_bold.nii.gz")
    bold_files = sorted(glob.glob(run_pat)) + sorted(glob.glob(single_pat))
    if not bold_files:
        print(f"⏩ {sub}: no cleaned runs")
        return

    rows = []
    for bf in bold_files:
        m   = re.search(r"_run-(\d+)_", os.path.basename(bf))
        run = m.group(1) if m else None
        if run and (sub, run) in exclude_sub_runs:
            print(f"   • skipping {sub} run-{run} (in exclusion list)")
            continue

        bold_img  = nib.load(bf)
        bold_data = bold_img.get_fdata()

        atlas_resampled = resample_to_img(atlas_img, bold_img, interpolation="nearest")
        atlas_data      = atlas_resampled.get_fdata()

        for parcel_id in range(1, n_rois + 1):
            mask = atlas_data == parcel_id
            if not mask.any(): continue
            rdm   = 1 - np.corrcoef(bold_data[mask, :].T).astype(np.float32)
            beta, r2 = rsa_nnls(rdm, model_rdm)  
            parcel_label = labels[parcel_id]
            rows.append([sub, run or "NA", parcel_id, parcel_label, beta, r2])
            
    # ---- write one CSV per subject ----
    out_dir  = os.path.join(deriv_dir, "RSA_stats")

    # add the stimulus‐specific subfolder (e.g. "pieman")
    stim_folder = os.path.join(out_dir, STIMULUS_LABEL_SAVE_STRING)
    os.makedirs(stim_folder, exist_ok=True)

    # add the trait‐specific subfolder (e.g. "feeling_Affectionate")
    trait_folder = os.path.join(stim_folder, trait_save)
    os.makedirs(trait_folder, exist_ok=True)

    out_csv = os.path.join(trait_folder, f"{sub}_{STIMULUS_LABEL_SAVE_STRING}_{trait_save}_parcel_RSA_NNLS.csv")
    with open(out_csv, "w", newline="") as f:
        csv.writer(f).writerows([["subject","run","parcel_num","parcel_label","beta","r2"]] + rows)
    print(f"✅ {sub}: {len(rows)} rows → {out_csv}")

In [9]:
# ----------------------------------------------------------------------
# 6. CALL FUNCTION FOR EACH SUBJECT
# ----------------------------------------------------------------------
for trait_label, trait_save in zip(ALL_TRAIT_LABELS, ALL_TRAIT_SAVE_STRS):

    print(f"\n── TRAIT: {trait_label} ──")

    # 1) load that trait's behavioural RDM once
    model_rdm = np.load(
        os.path.join(deriv_dir, "RDMs_behavior",
                     f"{STIMULUS_LABEL_SAVE_STRING}_{trait_save}_RDM.npy")
    )

    # 2) run every subject for this trait
    for sub in subjects:
        run_parcel_rsa_for_subject(sub, trait_save, model_rdm)

        
print("\nALL DONE 🎉")


── TRAIT: Open-minded ──
✅ sub-002: 200 rows → /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/Open_minded/sub-002_pieman_Open_minded_parcel_RSA_NNLS.csv
✅ sub-003: 200 rows → /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/Open_minded/sub-003_pieman_Open_minded_parcel_RSA_NNLS.csv
✅ sub-004: 200 rows → /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/Open_minded/sub-004_pieman_Open_minded_parcel_RSA_NNLS.csv


KeyboardInterrupt: 

# Group Analysis for Single Trait

In [None]:
# ------------------------------------------------------------------
# 0) helper: run parcel-wise t-tests for ONE trait and save summary
# ------------------------------------------------------------------
def run_group_stats_for_trait(stim_label: str,
                              trait_save: str,
                              trait_long: str,
                              subjects: list[str],
                              n_rois: int):

    # (1) gather all subject files for this trait
    dfs = []
    for sub in subjects:
        filepath = os.path.join(
            deriv_dir, "RSA_stats", stim_label, trait_save,
            f"{sub}_{stim_label}_{trait_save}_parcel_RSA_NNLS.csv"   
        )
        dfs.append(pd.read_csv(filepath))
    all_df = pd.concat(dfs, ignore_index=True)
    

    # (2) sanity check
    n_sub = all_df["subject"].nunique()
    print(f"{trait_long}: concatenated {n_sub} subjects")

    # (3) parcel-wise one-sample t-tests
    t_vals, p_vals = [], []
    for parcel in range(1, n_rois + 1):
        parcel_df = all_df[all_df["parcel_num"] == parcel]
        assert len(parcel_df) == n_sub, \
            f"Parcel {parcel}: expected {n_sub} rows, got {len(parcel_df)}"
        t, p = ttest_1samp(parcel_df["beta"], 0, alternative="greater")
        t_vals.append(t); p_vals.append(p)

    # (4) summary dataframe
    summary = pd.DataFrame({
        "parcel_num" : np.arange(1, n_rois + 1),
        "parcel_label": labels[1:],       # skip background
        "t_value"    : t_vals,
        "p_value"    : p_vals
    })
    summary = summary.sort_values("parcel_num")      # ← NEW
    summary["parcel_label"] = labels[1:]    # ← NEW
    
    # (5) save
    out_dir = os.path.join(
        deriv_dir, "RSA_stats", stim_label, trait_save
    )
    os.makedirs(out_dir, exist_ok=True)
    out_csv = os.path.join(
        out_dir, f"group_stats_{stim_label}_{trait_save}_NNLS.csv"
    )
    summary.to_csv(out_csv, index=False)
    print(f"✅ Saved group stats for {trait_long} → {out_csv}\n")

In [None]:
# ------------------------------------------------------------------
# 1) DRIVER LOOP  (iterate over all 13 traits)
# ------------------------------------------------------------------
for trait_long, trait_save in zip(ALL_TRAIT_LABELS, ALL_TRAIT_SAVE_STRS):
    run_group_stats_for_trait(
        STIMULUS_LABEL_SAVE_STRING,
        trait_save,
        trait_long,
        subjects,
        n_rois
    )

Open-minded: concatenated 75 subjects
✅ Saved group stats for Open-minded → /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/Open_minded/group_stats_pieman_Open_minded_NNLS.csv

feeling Affectionate: concatenated 75 subjects
✅ Saved group stats for feeling Affectionate → /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/feeling_Affectionate/group_stats_pieman_feeling_Affectionate_NNLS.csv

Attentive: concatenated 75 subjects
✅ Saved group stats for Attentive → /Volumes/Passport/fmriprep/derivatives/RSA_stats/pieman/Attentive/group_stats_pieman_Attentive_NNLS.csv

Assertive: concatenated 75 subjects


KeyboardInterrupt: 