# Day 6 — ROI Mask QA (stepwise)

In [None]:
import sys
from pathlib import Path
import json
from typing import Dict, Optional

import numpy as np
import pandas as pd
import h5py

try:
    import cortex
    HAVE_PYCORTEX = True
except ImportError:
    HAVE_PYCORTEX = False
    print('pycortex not available — install with `pip install pycortex` if you need surface transforms.')

try:
    import nibabel as nib
except ImportError as exc:
    raise RuntimeError('nibabel is required to read FreeSurfer annotation files. Install with `pip install nibabel`.') from exc

REPO_ROOT = Path.cwd().parent
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

DATA_ROOT = Path('/bucket/PaoU/seann/openneuro/ds003020')
PREPROC_ROOT = DATA_ROOT / 'derivative' / 'preprocessed_data'
PYCORTEX_DB = DATA_ROOT / 'derivative' / 'pycortex-db'
FREESURFER_SUBJECTS = DATA_ROOT / 'derivative' / 'freesurfer_subjdir'
TRANSFORM_PATH = DATA_ROOT / 'derivative' / 'subject_xfms.json'

SUBJECT_ID = 'sub-UTS01'
SUBJECT_FS = SUBJECT_ID.replace('sub-', '')
STORY_IDS = ['adventuresinsayingyes', 'adollshouse']
TR = 2.0

for path in [DATA_ROOT, PREPROC_ROOT, PYCORTEX_DB, FREESURFER_SUBJECTS, TRANSFORM_PATH]:
    if not Path(path).exists():
        raise FileNotFoundError(f'Required resource missing: {path}')

with TRANSFORM_PATH.open() as fh:
    transform_map = json.load(fh)
TRANSFORM_ID = transform_map.get(SUBJECT_FS)
if TRANSFORM_ID is None:
    raise KeyError(f'No transform entry for {SUBJECT_FS} in {TRANSFORM_PATH}')

print(f'Subject {SUBJECT_ID}: transform {TRANSFORM_ID}')
print(f'pycortex available: {HAVE_PYCORTEX}')
print(f'Stories to process: {STORY_IDS}')


In [None]:
def load_fs_annotations(subject_fs: str, atlas: str = 'aparc') -> Dict[str, Dict[str, np.ndarray]]:
    """Load FreeSurfer annotation labels for left/right hemispheres."""
    annotations: Dict[str, Dict[str, np.ndarray]] = {}
    for hemi in ('lh', 'rh'):
        annot_path = FREESURFER_SUBJECTS / subject_fs / 'label' / f'{hemi}.{atlas}.annot'
        if not annot_path.exists():
            raise FileNotFoundError(f'Missing annotation file: {annot_path}')
        labels, ctab, names = nib.freesurfer.read_annot(str(annot_path))
        decoded = [name.decode('utf-8') if isinstance(name, bytes) else str(name) for name in names]
        annotations[hemi] = {
            'labels': labels,
            'ctab': ctab,
            'names': decoded,
        }
    return annotations

fs_ann = load_fs_annotations(SUBJECT_FS)
print(f"Loaded FreeSurfer annotations for {SUBJECT_FS}:")
for hemi in ('lh', 'rh'):
    labels = fs_ann[hemi]['labels']
    names = fs_ann[hemi]['names']
    print(f"  {hemi}: {len(names)} labels, {labels.size} vertices")
    print(f"    first labels: {', '.join(names[:10])}")

LANGUAGE_ROIS = {
    'lh': ['parsopercularis', 'parstriangularis', 'superiortemporal', 'middletemporal', 'temporalpole', 'bankssts', 'inferiorparietal', 'supramarginal'],
    'rh': ['parsopercularis', 'parstriangularis', 'superiortemporal', 'middletemporal', 'temporalpole', 'bankssts', 'inferiorparietal', 'supramarginal'],
}

ROI_FILTER = LANGUAGE_ROIS
print('ROI_FILTER set to language subset')


In [None]:
def _normalize_label(text: str) -> str:
    return ''.join(ch for ch in text.lower() if ch.isalnum())


def build_fs_roi_masks(
    annotations: Dict[str, Dict[str, np.ndarray]],
    roi_filter: Optional[Dict[str, list]] = None,
) -> Dict[str, Dict[str, np.ndarray]]:
    """Create hemisphere-specific boolean masks from FreeSurfer annotations."""
    masks: Dict[str, Dict[str, np.ndarray]] = {}
    for hemi in ('lh', 'rh'):
        labels = annotations[hemi]['labels']
        names = annotations[hemi]['names']
        name_to_index = {_normalize_label(name): (name, idx) for idx, name in enumerate(names)}
        requested = roi_filter.get(hemi) if roi_filter else names
        hemi_masks: Dict[str, np.ndarray] = {}
        for roi in requested:
            norm = _normalize_label(roi)
            candidates = [norm]
            if roi in ROI_NAME_OVERRIDES:
                candidates.extend(_normalize_label(alias) for alias in ROI_NAME_OVERRIDES[roi])
            match = None
            for cand in candidates:
                if cand in name_to_index:
                    match = name_to_index[cand]
                    break
            if match is None:
                available = ', '.join(names[:10])
                raise KeyError(f'ROI {hemi}-{roi} not found in annotations. Sample names: {available}')
            match_name, match_idx = match
            mask = labels == match_idx
            hemi_masks[f'{hemi}-{roi}'] = mask
        masks[hemi] = hemi_masks
    return masks

ROI_NAME_OVERRIDES = {
    'parsopercularis': ['G_front_inf-Opercular'],
    'parstriangularis': ['G_front_inf-Triangul'],
    'superiortemporal': ['G_temporal_sup'],
    'middletemporal': ['G_temporal_middle'],
    'temporalpole': ['Pole_temporal'],
    'bankssts': ['S_temporal_sup-Lateral'],
    'inferiorparietal': ['G_pariet_inf-Angular'],
    'supramarginal': ['G_pariet_inf-Supramar'],
}

fs_masks_native = build_fs_roi_masks(fs_ann, ROI_FILTER)
for hemi, hemi_masks in fs_masks_native.items():
    print(f'{hemi}: {len(hemi_masks)} masks')
    for roi, mask in hemi_masks.items():
        print(f'  {roi}: {int(mask.sum())} vertices')


In [None]:
if not HAVE_PYCORTEX:
    raise RuntimeError('pycortex is required to project FreeSurfer ROI masks to the functional surface.')

cortex.config.default_db = str(PYCORTEX_DB)
cortex.config.default_filestore = str(PYCORTEX_DB)
cortex.config.default_subject = SUBJECT_FS
cortex.database.default_filestore = str(PYCORTEX_DB)
cortex.database.db = cortex.database.Database(str(PYCORTEX_DB))
cortex.db = cortex.database.db

import cortex.dataset
cortex.dataset.db = cortex.database.db
import cortex.dataset.braindata as _braindata
_braindata.db = cortex.database.db
import cortex.dataset.views as _views
_views.db = cortex.database.db

subjects = cortex.db.subjects
print(f"pycortex subjects detected: {list(subjects.keys())[:5]} ... total={len(subjects)}")

coord = cortex.db.get_xfm(SUBJECT_FS, TRANSFORM_ID, xfmtype='coord')
magnet = cortex.db.get_xfm(SUBJECT_FS, TRANSFORM_ID, xfmtype='magnet')
print('coord type:', type(coord), 'magnet type:', type(magnet))
print('coord repr:', coord)
print('magnet repr:', magnet)


In [None]:
import struct
import shutil
import numpy as np
from cortex import freesurfer

def _parse_surf_numpy2(filename: str):
    """Read FreeSurfer surface files using NumPy 2 compatible buffer APIs."""
    with open(filename, 'rb') as fp:
        fp.seek(3)
        comment = fp.readline()
        fp.readline()
        verts, faces = struct.unpack('>2I', fp.read(8))
        pts_raw = fp.read(4 * 3 * verts)
        polys_raw = fp.read(4 * 3 * faces)
    print(comment)
    pts = np.frombuffer(pts_raw, dtype=np.dtype('>f4'), count=verts * 3)
    polys = np.frombuffer(polys_raw, dtype=np.dtype('>i4'), count=faces * 3)
    if pts.size != verts * 3 or polys.size != faces * 3:
        raise ValueError(
            f"parse_surf: expected {verts * 3} floats and {faces * 3} ints, "
            f"got {pts.size} floats and {polys.size} ints from {filename}"
        )
    pts = pts.astype(np.float32, copy=False).reshape(-1, 3)
    polys = polys.astype(np.int32, copy=False).reshape(-1, 3)
    return pts, polys

def _parse_curv_numpy2(filename: str):
    """Parse FreeSurfer curvature files using NumPy 2 compatible APIs."""
    with open(filename, 'rb') as fp:
        fp.seek(15)
        data = fp.read()
    arr = np.frombuffer(data, dtype=np.dtype('>f4'))
    return arr.astype(np.float32, copy=False)

def _parse_patch_numpy2(filename: str):
    """Parse FreeSurfer patch files using NumPy 2 compatible APIs."""
    with open(filename, 'rb') as fp:
        header = struct.unpack('>i', fp.read(4))[0]
        nverts = struct.unpack('>i', fp.read(4))[0]
        raw = fp.read()
    dtype = np.dtype([('vert', '>i4'), ('x', '>f4'), ('y', '>f4'), ('z', '>f4')])
    data = np.frombuffer(raw, dtype=dtype)
    if len(data) != nverts:
        raise ValueError(
            f"parse_patch: expected {nverts} vertices, got {len(data)} from {filename}"
        )
    return data

_original_mri_surf2surf = freesurfer.mri_surf2surf

def _mri_surf2surf_with_fallback(data, source_subj, target_subj, hemi, subjects_dir=None):
    """Fallback to identity transform when mri_surf2surf binary is unavailable."""
    if shutil.which('mri_surf2surf') is None:
        if source_subj != target_subj:
            raise FileNotFoundError(
                "mri_surf2surf binary not found in PATH; load FreeSurfer or install the command "
                "to map between different subjects."
            )
        return np.asarray(data)
    return _original_mri_surf2surf(data, source_subj, target_subj, hemi, subjects_dir=subjects_dir)

freesurfer.parse_surf = _parse_surf_numpy2
freesurfer.parse_curv = _parse_curv_numpy2
freesurfer.parse_patch = _parse_patch_numpy2
freesurfer.mri_surf2surf = _mri_surf2surf_with_fallback


In [None]:
from cortex import freesurfer, utils

SURFACE_TYPE = 'fiducial'
print(f'Using FreeSurfer surface: {SURFACE_TYPE}')

mapper_lh = freesurfer.get_mri_surf2surf_matrix(
    SUBJECT_FS, 'lh', SURFACE_TYPE,
    target_subj=SUBJECT_FS,
    subjects_dir=str(FREESURFER_SUBJECTS)
)
mapper_rh = freesurfer.get_mri_surf2surf_matrix(
    SUBJECT_FS, 'rh', SURFACE_TYPE,
    target_subj=SUBJECT_FS,
    subjects_dir=str(FREESURFER_SUBJECTS)
)

print('mapper_lh shape:', mapper_lh.shape)
print('mapper_rh shape:', mapper_rh.shape)


In [None]:
resampled_masks = {}
mapper = utils.get_mapper(SUBJECT_FS, TRANSFORM_ID, recache=False)
mask_flat = mapper.mask.reshape(-1).astype(bool)

n_lh = mapper_lh.shape[0]
n_rh = mapper_rh.shape[0]

for roi_key, mask in fs_masks_native['lh'].items():
    lh_projected = mapper_lh.dot(mask.astype(float))
    flat = np.concatenate([lh_projected, np.zeros(n_rh)])
    roi_mask = (flat > 0)
    resampled_masks[roi_key] = roi_mask & mask_flat
    print(f"{roi_key}: {resampled_masks[roi_key].sum()} vertices")

for roi_key, mask in fs_masks_native['rh'].items():
    rh_projected = mapper_rh.dot(mask.astype(float))
    flat = np.concatenate([np.zeros(n_lh), rh_projected])
    roi_mask = (flat > 0)
    resampled_masks[roi_key] = roi_mask & mask_flat
    print(f"{roi_key}: {resampled_masks[roi_key].sum()} vertices")

print('Total ROI masks:', len(resampled_masks))


In [None]:
def load_story_bold(subject_fs: str, story_id: str):
    h5_path = PREPROC_ROOT / subject_fs / f'{story_id}.hf5'
    if not h5_path.exists():
        raise FileNotFoundError(f'Missing preprocessed file: {h5_path}')
    with h5py.File(h5_path, 'r') as hf:
        data = hf['data'][:]
    return data

story_id = STORY_IDS[0]
bold = load_story_bold(SUBJECT_FS, story_id)
print('BOLD shape:', bold.shape)

roi_ts = {}
for roi_key, mask in resampled_masks.items():
    roi_ts[roi_key] = bold[:, mask].mean(axis=1)
    print(f"{roi_key}: timeseries shape {roi_ts[roi_key].shape}")
