# Master ECG Pipeline

This notebook combines all project scripts and modules into one single runnable file.

**Usage:** run cells top-to-bottom. For headless execution on Windows use:

```python
import asyncio, sys
if sys.platform == 'win32':
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
```



In [None]:

# Environment & imports - idempotent
import os, sys, json, time, math, asyncio
from pathlib import Path
import numpy as np
import random
import torch
print('Python:', sys.executable)
print('Torch:', getattr(torch, '__version__', 'n/a'))
# Windows asyncio fix for nbconvert headless runs
import platform
if platform.system() == 'Windows':
    try:
        import asyncio, sys
        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
    except Exception:
        pass

# Project root detection
ROOT = Path(os.environ.get('ECG_ROOT', Path.cwd().resolve()))
DATASET_DIR = ROOT / 'Dataset'
ARTIFACTS_DIR = ROOT / 'artifacts'
PROCESSED_DIR = ARTIFACTS_DIR / 'processed'
LOGS_DIR = ROOT / 'logs'
NOTEBOOKS_DIR = ROOT / 'notebooks'
for p in (ARTIFACTS_DIR, PROCESSED_DIR, PROCESSED_DIR/'records', LOGS_DIR):
    p.mkdir(parents=True, exist_ok=True)

# seeds for reproducibility
SEED = int(os.environ.get('ECG_SEED', '42'))
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('DEVICE', DEVICE)


## Quick: Generate/Load unified mapping (run this cell)


In [None]:

# Generate unified mapping (if you have script)
candidate = Path('logs/unified_label_mapping.candidate.csv')
prod = Path('logs/unified_label_mapping.csv')
if (Path('scripts/generate_unified_mapping.py')).exists() and not candidate.exists():
    print('Generating candidate mapping...')
    os.system(f'python "{str(Path("scripts/generate_unified_mapping.py"))}"')
else:
    print('Candidate mapping exists:', candidate.exists(), 'Prod file exists:', prod.exists())
# If you have a candidate and want to promote it, uncomment:
# if candidate.exists(): candidate.replace(prod)


## Preprocessing (streaming, memory-safe)


In [None]:

# Run streaming preprocessing from scripts/preprocess_streaming.py if present
proc_script = Path('scripts/preprocess_streaming.py')
if proc_script.exists():
    print('Launching streaming preprocessing (script)...')
    # recommend using environment var ECG_PREPROCESS_LIMIT to test
    os.system(f'python "{proc_script}"')
else:
    print('No preprocess_streaming.py found. Implement preprocessing in this notebook or inline alternate script.')


## Training (run this after preprocessing finishes)


In [None]:

# Launch training script if present
train_script = Path('scripts/train_pipeline.py')  # optional
if train_script.exists():
    print('Running training script...')
    os.system(f'python "{train_script}"')
else:
    print('No training script detected. Use in-notebook training cells or create scripts/training.py and link it.')


## Evaluation and Visuals


In [None]:

# Run evaluation if script exists
eval_script = Path('scripts/evaluate.py')
if eval_script.exists():
    os.system(f'python "{eval_script}"')
else:
    print('No evaluate.py. Use notebook cells to visualize artifacts/figures/')


## Smoke tests and quick validation


In [None]:

# Run smoke tests
smoke = Path('scripts/verify_smoke_test.py')
if smoke.exists():
    os.system(f'python "{smoke}"')
else:
    print('No smoke-test script. Manual checks:')
    print(' - Count files:', len(list((PROCESSED_DIR/'records').glob('*.npz'))))
    print(' - Check splits:', (PROCESSED_DIR/'splits.json').exists())


## Final: Notebook control
You can now run cells in order. Long-running steps are executed as external scripts to avoid kernel timeouts.
