In [1]:
import os
os.chdir("/flash/PaoU/seann/fmri-edm-ccm")  # or %cd /flash/PaoU/seann/fmri-edm-ccm


In [2]:
from src.utils import load_yaml

cfg = load_yaml("configs/demo.yaml")
SUB, STORY = cfg['subject'], cfg['story']
paths = cfg['paths']
print('Config keys:', sorted(cfg.keys()))
print('Subject/Story:', SUB, STORY)

Config keys: ['E_grid', 'E_mult', 'E_univ', 'TR', 'delta', 'k_grid', 'lib_sizes', 'nIC', 'n_parcels', 'paths', 'pca_components', 'shortlist_topk', 'simplex_k', 'smap_theta', 'story', 'subject', 'tau', 'theiler_min', 'theta_grid']
Subject/Story: UTS01 wheretheressmoke


In [3]:
from pathlib import Path

data_root = Path(paths['data_root'])
bold_runs = sorted(data_root.glob(f'sub-{SUB}/**/*{STORY}*_bold.nii.gz'))
print('BOLD runs found:', len(bold_runs))
print('Example BOLD:', bold_runs[0] if bold_runs else None)
audio_candidates = [
    data_root / 'stimuli' / f'{STORY}.wav',
    data_root / 'audio' / f'{STORY}.wav',
]
wav_path = next((p for p in audio_candidates if p.exists()), None)
print('Audio WAV:', wav_path)
tg_candidates = [
    data_root / 'stimuli' / f'{STORY}.TextGrid',
    data_root / 'annotations' / f'{STORY}.TextGrid',
    data_root / 'derivative' / 'TextGrids' / f'{STORY}.TextGrid',
    data_root / 'derivative' / 'TextGrids' / f'{STORY.lower()}.TextGrid',
]
tg_path = next((p for p in tg_candidates if p.exists()), None)
print('TextGrid:', tg_path)

BOLD runs found: 10
Example BOLD: /bucket/PaoU/seann/openneuro/ds003020/sub-UTS01/ses-10/func/sub-UTS01_ses-10_task-wheretheressmoke_run-9_bold.nii.gz
Audio WAV: /bucket/PaoU/seann/openneuro/ds003020/stimuli/wheretheressmoke.wav
TextGrid: /bucket/PaoU/seann/openneuro/ds003020/derivative/TextGrids/wheretheressmoke.TextGrid


In [4]:
from src import features
from src.utils import zscore_per_column

X = features.load_english1000_TR(SUB, STORY, paths)
Z, pca_model = features.pca_fit_transform(X, cfg['pca_components'])
env = features.load_envelope_TR(SUB, STORY, paths)
wr = features.load_wordrate_TR(SUB, STORY, paths)
print('Semantic/TR shapes:', X.shape, Z.shape)
print('Drivers shapes:', env.shape, wr.shape)

Semantic/TR shapes: (301, 985) (301, 128)
Drivers shapes: (301,) (301,)


In [5]:
from src import roi

R = roi.load_schaefer_timeseries_TR(SUB, STORY, cfg['n_parcels'], paths)
print('ROI shape:', R.shape)

ROI shape: (301, 400)


In [6]:
from src import edm

E_univ = cfg.get('E_univ', cfg['E_grid'][0])
delta = cfg['delta'][0]
k = cfg.get('simplex_k', cfg['k_grid'][1])
theta = cfg.get('smap_theta', cfg['theta_grid'][2])
theiler_univ = max(cfg['theiler_min'], E_univ)
lag_univ = (E_univ - 1) * cfg['tau']
y_future = edm.horizon_shift(Z[:, 0], delta)
if y_future.shape[0] <= lag_univ:
    raise ValueError('Not enough samples for univariate embedding')
X_univ_full = edm.embed_multivariate(Z[:, 0], E_univ, cfg['tau'])
X_univ = zscore_per_column(X_univ_full[: y_future.shape[0] - lag_univ])
y_univ = y_future[lag_univ:]
yhat_univ = edm.simplex(X_univ, y_univ, k, theiler=theiler_univ)
rho_univ = edm.corr_skill(yhat_univ, y_univ)
print('ρ_univ:', rho_univ)

ρ_univ: 1.0


In [7]:
from src import ccm

shortlist = ccm.ccm_conditional_screen(
    R,
    Z[:, 0],
    [env, wr],
    E_univ,
    cfg['tau'],
    theiler_univ,
    cfg['lib_sizes'],
)[: cfg['shortlist_topk']]
print('Shortlist:', shortlist)

Shortlist: [251, 278, 254, 294, 240, 391, 234, 352, 397, 55]


In [8]:
import numpy as np

E_mult = cfg.get('E_mult', cfg['E_grid'][1])
lag_embed = (E_mult - 1) * cfg['tau']
y_future = edm.horizon_shift(Z[:, 0], delta)
baseline_E, baseline_tau = 3, 1
lag_drivers = (baseline_E - 1) * baseline_tau
t_start = max(lag_embed, lag_drivers)
theiler_mult = max(cfg['theiler_min'], E_mult)
n_samples = y_future.shape[0] - t_start
if n_samples <= 0:
    raise ValueError('Not enough samples for multivariate embedding')
X_rois_full = edm.embed_multivariate(R[:, shortlist], E_mult, cfg['tau'])
X_target_full = edm.embed_multivariate(Z[:, 0], E_mult, cfg['tau'])
start_emb = t_start - lag_embed
X_rois = zscore_per_column(X_rois_full[start_emb : start_emb + n_samples])
X_target = zscore_per_column(X_target_full[start_emb : start_emb + n_samples])
Xemb = np.hstack([X_target, X_rois])
y_target = y_future[t_start:]
yhat_simplex = edm.simplex(Xemb, y_target, k, theiler=theiler_mult)
yhat_smap = edm.smap(Xemb, y_target, k, theta, theiler=theiler_mult)
rho_simplex = edm.corr_skill(yhat_simplex, y_target)
rho_smap = edm.corr_skill(yhat_smap, y_target)
print('ρ_simplex:', rho_simplex)
print('ρ_smap:', rho_smap)

ρ_simplex: 1.0
ρ_smap: 0.9999999999984466


In [9]:
import numpy as np
from src import baselines

drivers = np.column_stack([env, wr])
Xb_full = features.make_lag_stack(drivers, E=baseline_E, tau=baseline_tau)
start_base = t_start - lag_drivers
Xb = Xb_full[start_base : start_base + n_samples]
yhat_baseline = baselines.ridge_forecast(Xb, y_target)
rho_baseline = edm.corr_skill(yhat_baseline, y_target)
print('ρ_drivers:', rho_baseline)

ρ_drivers: 0.11800044450495596


In [10]:
from pathlib import Path
from sklearn.linear_model import LinearRegression
from src import plots

skill_summary = {
    'drivers-only': rho_baseline,
    'simplex': rho_simplex,
    'smap': rho_smap,
}
plot_root = Path(paths['figs']) / SUB / STORY
plots.forecast_bars(skill_summary, str(plot_root / 'forecast_bars.png'))
if shortlist:
    roi_idx = int(shortlist[0])
    delta = cfg['delta'][0]
    ccm_theiler = max(cfg['theiler_min'], E_univ)
    target_shift = edm.horizon_shift(Z[:, 0], delta)
    roi_series = R[:, roi_idx]
    if delta > 0:
        roi_series = roi_series[:-delta]
        target_shift = target_shift
    drivers_ccm = np.column_stack([env[: roi_series.shape[0]], wr[: roi_series.shape[0]]])
    if drivers_ccm.size:
        model = LinearRegression(fit_intercept=True)
        model.fit(drivers_ccm, roi_series)
        roi_resid = roi_series - model.predict(drivers_ccm)
        model.fit(drivers_ccm, target_shift)
        target_resid = target_shift - model.predict(drivers_ccm)
    else:
        roi_resid = roi_series - roi_series.mean()
        target_resid = target_shift - target_shift.mean()
    ccm_res = ccm.ccm_pair(
        roi_resid,
        target_resid,
        E_univ,
        cfg['tau'],
        ccm_theiler,
        cfg['lib_sizes'],
    )
    plots.ccm_curve(cfg['lib_sizes'], ccm_res['skill_curve'], str(plot_root / 'ccm_curve.png'))
    theta_scores = [
        edm.corr_skill(edm.smap(Xemb, y_target, k, th, theiler=theiler_mult), y_target)
        for th in cfg['theta_grid']
    ]
    plots.theta_sweep(cfg['theta_grid'], theta_scores, str(plot_root / 'theta_sweep.png'))
    plots.attractor_3d(
        Xemb,
        str(plot_root / 'attractor_3d.png'),
        color=np.linspace(0, 1, Xemb.shape[0]),
    )
    print('θ preference:', cfg['theta_grid'][theta_scores.index(max(theta_scores))])
print('Δ=1 advantage (S-Map - drivers):', rho_smap - rho_baseline)
print('ρ_univ vs simplex:', rho_univ, rho_simplex)

θ preference: 0.4
Δ=1 advantage (S-Map - drivers): 0.8819995554934906
ρ_univ vs simplex: 1.0 1.0
