# Day 25 â€“ Story Boundary Bleed Correction

Produce alternate subject-level time series that retain the seam rows but inject small noise where lags would otherwise bleed between stories. Raw Day24 outputs stay untouched; this notebook saves noise-corrected variants alongside them.

In [None]:
import json
from pathlib import Path
from typing import Dict

import pandas as pd

from src.utils import load_yaml
from src.day24_subject_concat import (
    DEFAULT_OUTPUT_SUBDIR as DAY24_OUTPUT_SUBDIR,
    load_subject_concat_manifest,
)
from src.day25_bleed_correction import (
    NoiseConfig,
    load_concat_result_from_manifest,
    compute_bleed_mask,
    apply_bleed_noise,
    save_corrected_result,
)


In [None]:
# Configuration and subject selection
PROJECT_ROOT = Path('/flash/PaoU/seann/fmri-edm-ccm')
CONFIG_PATH = PROJECT_ROOT / 'configs' / 'demo.yaml'

cfg = load_yaml(CONFIG_PATH)
paths = (cfg.get('paths') or {}).copy()
paths.setdefault('project_root', str(PROJECT_ROOT))

SUBJECT = (cfg.get('subject') or 'UTS01').strip()
TR = float(cfg.get('TR', 2.0))

# MDE search parameters (keep aligned with Day22)
TAU_GRID = [1, 2]
E_CAP = 6

# Noise recipes for bleed rows
NOISE_STRATEGIES: Dict[str, Dict[str, NoiseConfig]] = {
    'constant': {
        'categories': NoiseConfig(mode='constant', scale=1e-3, random_seed=0),
        'roi': NoiseConfig(mode='constant', scale=1e-3, random_seed=1),
    },
    'flux': {
        'categories': NoiseConfig(mode='flux', scale=1e-3, random_seed=0),
        'roi': NoiseConfig(mode='flux', scale=1e-3, random_seed=1),
    },
}

FEATURES_ROOT = PROJECT_ROOT / paths.get('features_root', 'features_no_fallback')
DAY24_MANIFEST_PATH = FEATURES_ROOT / 'subjects' / SUBJECT / DAY24_OUTPUT_SUBDIR / 'manifest.json'

print(f'Subject: {SUBJECT}')
print(f'TR: {TR} seconds | tau grid: {TAU_GRID} | E cap: {E_CAP}')
print(f'Day24 manifest: {DAY24_MANIFEST_PATH}')


In [None]:
if not DAY24_MANIFEST_PATH.exists():
    raise FileNotFoundError(f'Day24 manifest not found at {DAY24_MANIFEST_PATH}')

manifest = load_subject_concat_manifest(
    SUBJECT,
    features_root=FEATURES_ROOT,
    output_subdir=DAY24_OUTPUT_SUBDIR,
)
if not manifest:
    raise RuntimeError('Unable to load Day24 manifest metadata.')

concat_result = load_concat_result_from_manifest(DAY24_MANIFEST_PATH)
print(f'Loaded combined dataset: {len(concat_result.category_frame)} rows, {concat_result.roi_matrix.shape[1]} ROIs.')


In [None]:
max_tau = max(TAU_GRID)
max_lag = max_tau * (E_CAP - 1)
print(f'Max lag considered: {max_lag} TRs (~{max_lag * TR:.1f} seconds)')

bleed_mask = compute_bleed_mask(concat_result.boundaries, max_lag, len(concat_result.category_frame))
bleed_rows = int(bleed_mask.sum())
print(f'Bleed rows flagged: {bleed_rows} ({bleed_rows * TR:.1f} seconds total)')

if bleed_rows:
    bleed_preview = concat_result.category_frame.loc[bleed_mask, ['global_index', 'story', 'story_row']].copy()
    display(bleed_preview.head())
else:
    print('No bleed rows detected with current settings.')


In [None]:
outputs = {}

for name, configs in NOISE_STRATEGIES.items():
    print(f"\n=== Strategy: {name} ===")
    patched = apply_bleed_noise(
        concat_result,
        bleed_mask,
        category_config=configs['categories'],
        roi_config=configs['roi'],
    )
    export = save_corrected_result(
        patched,
        features_root=FEATURES_ROOT,
        output_suffix=name,
        include_inventory=bool(not concat_result.inventory.empty),
    )
    manifest_path = export['manifest_path']
    manifest_data = json.loads(manifest_path.read_text())
    manifest_data['bleed_correction'] = {
        'strategy': name,
        'category_config': configs['categories'].__dict__,
        'roi_config': configs['roi'].__dict__,
        'max_lag_tr': max_lag,
        'max_lag_seconds': max_lag * TR,
        'bleed_rows': bleed_rows,
    }
    manifest_path.write_text(json.dumps(manifest_data, indent=2))
    outputs[name] = export
    print('Saved outputs:')
    for key, path in export.items():
        if path is None:
            continue
        print(f' - {key}: {path}')

outputs


In [None]:
summary_records = []
for strategy, export in outputs.items():
    manifest_path = export['manifest_path']
    manifest_data = json.loads(manifest_path.read_text())
    summary_records.append({
        'strategy': strategy,
        'output_dir': str(export['output_dir']),
        'categories_path': manifest_data.get('categories_path'),
        'roi_path': manifest_data.get('roi_path'),
        'bleed_rows': manifest_data.get('bleed_correction', {}).get('bleed_rows', 0),
        'max_lag_tr': manifest_data.get('bleed_correction', {}).get('max_lag_tr', 0),
    })

summary_df = pd.DataFrame(summary_records)
display(summary_df)
