# ECG Tensor Training & Validation

This notebook trains and validates tensor-based deep learning models for multi-class cardiac disease detection using 12-lead ECG datasets (PTBXL, Chapman-Shaoxing, CinC 2017 AFDB, etc.).

**Pipeline Outline:**
1. Configuration & Environment
2. Data Access & Loading (WFDB / CSV metadata)
3. Preprocessing (denoise, resample, normalize, beat/window segmentation)
4. Tensor Construction & Decomposition (CP / Tucker / HOSVD)
5. Model Architectures (CNN / CNN-LSTM Hybrid)
6. Training Loop with MLflow Tracking
7. Evaluation (AUC, F1, confusion matrix, reliability diagrams)
8. Interpretability (Grad-CAM / Captum / Saliency)
9. Paper-quality Visualization & Table Export

> NOTE: Cells are scaffolded; fill TODOs as data and experiments progress.

In [None]:
# 1. Configuration & Environment
from pathlib import Path
import os, sys, json, math, time, random

PROJECT_ROOT = Path('d:/ecg-research')  # Adjust if mounted differently
DATA_DIR = PROJECT_ROOT / 'dataset'
ARTIFACT_DIR = PROJECT_ROOT / 'artifacts'
FIG_DIR = ARTIFACT_DIR / 'figures'
MODEL_DIR = ARTIFACT_DIR / 'models'
SAL_DIR = ARTIFACT_DIR / 'saliency'
MLFLOW_DIR = ARTIFACT_DIR / 'mlflow'
for d in [FIG_DIR, MODEL_DIR, SAL_DIR, MLFLOW_DIR]:
    d.mkdir(parents=True, exist_ok=True)

# Reproducibility
import numpy as np
import torch
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED);
if torch.cuda.is_available(): torch.cuda.manual_seed_all(SEED)

print('CUDA available:', torch.cuda.is_available())
print('Data directory exists:', DATA_DIR.exists())
print('PTBXL exists:', (DATA_DIR / 'PTBXL').exists())

In [None]:
# 2. Data Access Skeleton (PTBXL example)
# TODO: Integrate full PTBXL metadata parsing & multi-dataset loaders
import pandas as pd
PTBXL_META = DATA_DIR / 'PTBXL' / 'ptbxl_database.csv'
if PTBXL_META.exists():
    df_meta = pd.read_csv(PTBXL_META)
    display(df_meta.head())
else:
    print('PTBXL metadata not found at', PTBXL_META)

# Placeholder for WFDB reading utilities (see preprocessing module later)

In [None]:
# 3. Preprocessing Functions (will be imported from preprocessing.ecg_preprocessing)
# TODO: After creating the module, import and demonstrate a sample transform
try:
    from preprocessing.ecg_preprocessing import denoise_signal, resample_signal, normalize_signal, segment_beats
    print('Preprocessing module imported.')
except Exception as e:
    print('Preprocessing module not ready yet:', e)

In [None]:
# 4. Tensor Construction & Decomposition Demo (using tensorly)
# Synthetic placeholder until real ECG tensors are built.
import tensorly as tl
from tensorly.decomposition import parafac, tucker
tl.set_backend('numpy')
example_tensor = tl.tensor(np.random.randn(12, 500, 4))  # leads x time x feature-channels
cp_rank = 6
weights, factors = parafac(example_tensor, rank=cp_rank, n_iter_max=10, init='random')
print('CP decomposition factors shapes:', [f.shape for f in factors])
tucker_core, tucker_factors = tucker(example_tensor, ranks=[6, 50, 3])
print('Tucker core shape:', tucker_core.shape)
print('Tucker factor shapes:', [f.shape for f in tucker_factors])

In [None]:
# 5. Model Architecture Stubs (CNN / CNN-LSTM hybrid)
import torch.nn as nn
class ECGCnn(nn.Module):
    def __init__(self, in_channels=12, num_classes=5):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv1d(in_channels, 32, kernel_size=7, padding=3),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Conv1d(32, 64, kernel_size=5, padding=2),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(64)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64*64, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )
    def forward(self, x):  # x: (batch, leads, time)
        x = self.features(x)
        return self.classifier(x)

class ECGCnnLstm(nn.Module):
    def __init__(self, in_channels=12, num_classes=5, hidden_size=128):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv1d(in_channels, 32, 7, padding=3),
            nn.ReLU(),
            nn.Conv1d(32, 64, 5, padding=2),
            nn.ReLU()
        )
        self.lstm = nn.LSTM(input_size=64, hidden_size=hidden_size, batch_first=True, bidirectional=True)
        self.head = nn.Sequential(
            nn.Linear(hidden_size*2, num_classes)
        )
    def forward(self, x):
        # x: (batch, leads, time) -> treat leads as channels
        feats = self.cnn(x)  # (batch, 64, time)
        feats = feats.transpose(1,2)  # (batch, time, 64)
        out,_ = self.lstm(feats)
        out = out[:, -1]  # last timestep
        return self.head(out)

print('Model stubs defined.')

In [None]:
# 6. Training Loop Skeleton with MLflow (to be expanded)
import mlflow
mlflow.set_tracking_uri(f'file:{MLFLOW_DIR.as_posix()}')
mlflow.set_experiment('ecg_tensor_experiments')

def train_one_epoch(model, dataloader, optimizer, criterion, device='cpu'):
    model.train()
    total_loss = 0.0
    for batch in dataloader:  # TODO: integrate real dataset
        x, y = batch
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        preds = model(x)
        loss = criterion(preds, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / max(1, len(dataloader))

print('MLflow initialized. Training skeleton ready.')

In [None]:
# 7. Evaluation Metrics Skeleton
from sklearn.metrics import roc_auc_score, f1_score, confusion_matrix
def evaluate(model, dataloader, device='cpu'):
    model.eval()
    all_preds, all_targets = [], []
    with torch.no_grad():
        for x, y in dataloader:  # TODO: integrate real dataset
            x = x.to(device)
            logits = model(x)
            preds = torch.argmax(logits, dim=1).cpu().numpy()
            all_preds.extend(preds)
            all_targets.extend(y.numpy())
    cm = confusion_matrix(all_targets, all_preds)
    f1 = f1_score(all_targets, all_preds, average='macro')
    # TODO: compute ROC-AUC per class using probability outputs
    return {'f1_macro': f1, 'confusion_matrix': cm.tolist()}

print('Evaluation skeleton ready.')

In [None]:
# 8. Interpretability Placeholder (Grad-CAM / Captum)
# TODO: Implement Grad-CAM hooks for 1D conv layers & integrate Captum.
print('Interpretability placeholders pending implementation.')

In [None]:
# 9. Paper-quality Visualization Setup
import matplotlib as mpl, matplotlib.pyplot as plt, seaborn as sns
sns.set_context('talk')
sns.set_style('whitegrid')
mpl.rcParams.update({
    'figure.dpi': 110,
    'axes.titlesize': 16,
    'axes.labelsize': 14,
    'font.size': 13,
    'legend.fontsize': 12,
    'figure.figsize': (8,5)
})
print('Visualization defaults set for publication.')

## Next Steps
- Implement real data loaders (PTBXL parsing, WFDB record reading).
- Add preprocessing functions in `preprocessing/ecg_preprocessing.py`.
- Replace synthetic tensor with actual lead-time-feature construction.
- Add training loop integration, Grad-CAM, calibration plots.
- Export LaTeX tables for metrics.

Proceed to module scaffolding next.