<a href="https://colab.research.google.com/github/zk2275/11-785-idl-project-group-38/blob/main/colab_baseline_models_(4).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# === 1. Install and Import Libraries ===

# Install xlrd for reading .xls files.
#!pip install xlrd

import os
import gc
import zipfile
import glob
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
import shutil

# Scipy for signal processing (peak finding)
from scipy.signal import find_peaks

# Sklearn for metrics
#from sklearn.metrics import mean_absolute_error

import torch
from torch.utils.data import Dataset, DataLoader


# Suppress warnings
#warnings.filterwarnings('ignore')

In [None]:
# === 2. Mount Drive & Define Data Functions ===

# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using:", device) 

In [None]:

FS = 125         
WIN_SEC = 4.0    
STEP_SEC = 2.0   

SBP_RANGE = (10, 300) 
DBP_RANGE = (10, 200) 

In [None]:
def list_csvs(folder: str):    
    return sorted(glob.glob(os.path.join(folder, '**', '*.csv'), recursive=True))

def read_record(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    need = ['t_sec', 'ECG', 'PPG', 'ABP']
    if not all(c in df.columns for c in need):
        raise ValueError(f'{path} 缺少列 {need}')
    return df[need].copy()

def zscore(x: np.ndarray) -> np.ndarray:
    x = x.astype(np.float32)
    return (x - x.mean()) / (x.std() + 1e-6)

def windowize(df: pd.DataFrame,
              fs: int = FS,
              win_sec: float = WIN_SEC,
              step_sec: float = STEP_SEC):
    L = int(round(win_sec * fs))   # 500
    S = int(round(step_sec * fs))  # 250
    if len(df) < L:
        return np.empty((0, 2, L), np.float32), np.empty((0, 2), np.float32)

    ecg = zscore(df['ECG'].values)
    ppg = zscore(df['PPG'].values)
    abp = df['ABP'].values.astype(np.float32)

    X_list, y_list = [], []
    min_dist = int(0.5 * fs)  

    for start in range(0, len(df) - L + 1, S):
        end = start + L
        abp_w = abp[start:end]

        peaks, _ = find_peaks(abp_w, distance=min_dist)
        troughs, _ = find_peaks(-abp_w, distance=min_dist)
        if len(peaks) == 0 or len(troughs) == 0:
            continue

        mean_sbp = float(np.mean(abp_w[peaks]))
        mean_dbp = float(np.mean(abp_w[troughs]))
        if not (SBP_RANGE[0] < mean_sbp < SBP_RANGE[1] and DBP_RANGE[0] < mean_dbp < DBP_RANGE[1]):
            continue

        ecg_w = ecg[start:end]
        ppg_w = ppg[start:end]
        X_list.append(np.stack([ecg_w, ppg_w], axis=0))  # [2, L]
        y_list.append([mean_sbp, mean_dbp])

    if len(X_list) == 0:
        return np.empty((0, 2, L), np.float32), np.empty((0, 2), np.float32)

    X = np.stack(X_list).astype(np.float32)
    y = np.stack(y_list).astype(np.float32)
    return X, y

In [None]:
# ----------------- PyTorch Dataset -----------------
class BPWindowsDataset(Dataset):
    def __init__(self, split_dir: str,
                 fs: int = FS, win_sec: float = WIN_SEC, step_sec: float = STEP_SEC):
        super().__init__()
        self.fs, self.win_sec, self.step_sec = fs, win_sec, step_sec
        files = list_csvs(split_dir)
        if len(files) == 0:
            raise RuntimeError(f'在 {split_dir} 没有找到 CSV 文件')

        X_all, y_all, src_all = [], [], []
        for f in tqdm(files, desc=f'Loading {os.path.basename(split_dir)}'):
            try:
                df = read_record(f)
                X, y = windowize(df, fs=self.fs, win_sec=self.win_sec, step_sec=self.step_sec)
                if X.shape[0] == 0:
                    continue
                X_all.append(X); y_all.append(y); src_all.extend([f]*X.shape[0])
            except Exception as e:
                print(f'[Skip] {f}: {e}')

        if len(X_all) == 0:
            raise RuntimeError(f'{split_dir} 没有可用窗口样本')

        self.X = np.concatenate(X_all, axis=0)   # [N, 2, L]
        self.y = np.concatenate(y_all, axis=0)   # [N, 2]
        self.src = np.array(src_all)

    def __len__(self): return self.X.shape[0]

    def __getitem__(self, idx):
        x = self.X[idx]  # [2, L] -> 0:ECG, 1:PPG
        y = self.y[idx]  # [SBP, DBP]
        return {
            'ppg': torch.from_numpy(x[1:2]),   # [1, L]
            'ecg': torch.from_numpy(x[0:1]),   # [1, L]
            'sbp': torch.tensor(y[0], dtype=torch.float32),
            'dbp': torch.tensor(y[1], dtype=torch.float32),
            'src': self.src[idx]
        }


In [None]:
# ----------------- DataLoader 构建 -----------------
def build_loaders(root: str,
                  fs: int = FS, win_sec: float = WIN_SEC, step_sec: float = STEP_SEC,
                  batch_size: int = 128, num_workers: int = 0):
    train_dir = os.path.join(root, 'train')
    val_dir   = os.path.join(root, 'val')
    test_dir  = os.path.join(root, 'test')

    train_ds = BPWindowsDataset(train_dir, fs=fs, win_sec=win_sec, step_sec=step_sec)
    val_ds   = BPWindowsDataset(val_dir,   fs=fs, win_sec=win_sec, step_sec=step_sec)
    test_ds  = BPWindowsDataset(test_dir,  fs=fs, win_sec=win_sec, step_sec=step_sec)

    train_ld = DataLoader(train_ds, batch_size=batch_size, shuffle=True,
                          num_workers=num_workers, drop_last=True, pin_memory=True)
    val_ld   = DataLoader(val_ds, batch_size=batch_size, shuffle=False,
                          num_workers=num_workers, drop_last=False, pin_memory=True)
    test_ld  = DataLoader(test_ds, batch_size=batch_size, shuffle=False,
                          num_workers=num_workers, drop_last=False, pin_memory=True)
    return train_ld, val_ld, test_ld

In [None]:

root = "../share/dat_working/split_by_time" 
train_ld, val_ld, test_ld = build_loaders(root, fs=125, win_sec=4.0, step_sec=2.0,
                                          batch_size=128, num_workers=0)

batch = next(iter(train_ld))
print(batch['ppg'].shape, batch['ecg'].shape, batch['sbp'].shape, batch['dbp'].shape)


# PPG $\rightarrow$ ECG

In [None]:
import os, math, time, random
import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm.auto import tqdm, trange


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

In [None]:
batch = next(iter(train_ld))
print(batch['ppg'].shape, batch['ecg'].shape, batch['sbp'].shape, batch['dbp'].shape)

In [None]:
gc.collect() # These commands help you when you face CUDA OOM error
torch.cuda.empty_cache()

In [None]:
# set seed
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
if device.type == "cuda":
    torch.cuda.manual_seed_all(SEED)

In [None]:
# model and DDPM

class Block(nn.Module):
    def __init__(self, c_in, c_out, groups=8):
        super().__init__()
        self.conv1 = nn.Conv1d(c_in, c_out, 3, padding=1)
        self.gn1   = nn.GroupNorm(min(groups, c_out), c_out)
        self.conv2 = nn.Conv1d(c_out, c_out, 3, padding=1)
        self.gn2   = nn.GroupNorm(min(groups, c_out), c_out)
    def forward(self, x):
        x = F.silu(self.gn1(self.conv1(x)))
        x = F.silu(self.gn2(self.conv2(x)))
        return x

class UNet1D(nn.Module):
    def __init__(self, c_in=2, base=64, time_dim=256):
        super().__init__()
        self.t_embed = nn.Embedding(time_dim, base*4)
        self.enc1 = Block(c_in, base)
        self.down1= nn.Conv1d(base, base*2, 4, 2, 1)
        self.enc2 = Block(base*2, base*2)
        self.down2= nn.Conv1d(base*2, base*4, 4, 2, 1)
        self.mid  = Block(base*4, base*4)
        self.up1  = nn.ConvTranspose1d(base*4, base*2, 4, 2, 1)
        self.dec1 = Block(base*4, base*2)
        self.up2  = nn.ConvTranspose1d(base*2, base, 4, 2, 1)
        self.dec2 = Block(base*2, base)
        self.out  = nn.Conv1d(base, 1, 1)

    def forward(self, x_t, ppg, t):
        x = torch.cat([x_t, ppg], dim=1)  # (B,2,L)
        e1 = self.enc1(x)
        x  = self.down1(e1); e2 = self.enc2(x)
        x  = self.down2(e2); x  = self.mid(x)
        t_emb = self.t_embed(t % self.t_embed.num_embeddings).unsqueeze(-1)
        x = x + t_emb
        x = self.up1(x); x = self.dec1(torch.cat([x, e2], dim=1))
        x = self.up2(x); x = self.dec2(torch.cat([x, e1], dim=1))
        return self.out(x)  # (B,1,L)

class Diffusion1D(nn.Module):
    def __init__(self, net: UNet1D, T=50, beta_start=1e-4, beta_end=2e-1):
        super().__init__()
        self.net, self.T = net, T
        betas = torch.linspace(beta_start, beta_end, T)
        alphas = 1.0 - betas
        alphas_bar = torch.cumprod(alphas, dim=0)
        self.register_buffer('betas', betas)
        self.register_buffer('alphas', alphas)
        self.register_buffer('alphas_bar', alphas_bar)

    def q_sample(self, x0, t, noise):
        a_bar = self.alphas_bar[t].view(-1,1,1)
        return torch.sqrt(a_bar)*x0 + torch.sqrt(1.0 - a_bar)*noise

    @torch.no_grad()
    def sample(self, ppg, L):
        device = ppg.device
        x = torch.randn(ppg.size(0), 1, L, device=device)
        for t in reversed(range(self.T)):
            tt = torch.full((ppg.size(0),), t, device=device, dtype=torch.long)
            eps = self.net(x, ppg, tt)
            beta_t  = self.betas[t]
            alpha_t = self.alphas[t]
            abar_t  = self.alphas_bar[t]
            mean = (1/torch.sqrt(alpha_t))*(x - (beta_t/torch.sqrt(1-abar_t+1e-8))*eps)
            x = mean if t==0 else mean + torch.sqrt(beta_t)*torch.randn_like(x)
        return x  # (B,1,L)


In [None]:
# train and test

def train_one_epoch(diffusion, loader, optimizer, device):
    diffusion.train()
    total, n = 0.0, 0
    for batch in loader:
        ppg = batch['ppg'].to(device, non_blocking=True)  # (B,1,L)
        ecg = batch['ecg'].to(device, non_blocking=True)  # (B,1,L)
        B, _, L = ecg.shape
        t = torch.randint(0, diffusion.T, (B,), device=device)
        noise = torch.randn_like(ecg)
        x_t = diffusion.q_sample(ecg, t, noise)

        optimizer.zero_grad(set_to_none=True)
        eps_hat = diffusion.net(x_t, ppg, t)
        loss = F.mse_loss(eps_hat, noise)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(diffusion.parameters(), max_norm=1.0)
        optimizer.step()

        total += loss.item() * B
        n += B
    return total / max(1, n)

@torch.no_grad()
def eval_eps_mse(diffusion, loader, device, t_mode='mid'):
    diffusion.eval()
    total, n = 0.0, 0
    for batch in loader:
        ppg = batch['ppg'].to(device, non_blocking=True)
        ecg = batch['ecg'].to(device, non_blocking=True)
        B, _, L = ecg.shape
        t = torch.randint(0, diffusion.T, (B,), device=device) if t_mode=='rand' \
            else torch.full((B,), diffusion.T//2, device=device, dtype=torch.long)
        noise = torch.randn_like(ecg)
        x_t = diffusion.q_sample(ecg, t, noise)
        eps_hat = diffusion.net(x_t, ppg, t)
        loss = F.mse_loss(eps_hat, noise, reduction='sum')
        total += loss.item(); n += B
    return total / max(1, n)


In [None]:
# train with tqdm

def train_one_epoch_tqdm(diffusion, loader, optimizer, device, epoch, epochs):
    diffusion.train()
    running, n = 0.0, 0
    pbar = tqdm(loader, desc=f"Epoch {epoch}/{epochs}", unit="batch", leave=False)
    for batch in pbar:
        ppg = batch['ppg'].to(device, non_blocking=True)
        ecg = batch['ecg'].to(device, non_blocking=True)
        B, _, L = ecg.shape

        t = torch.randint(0, diffusion.T, (B,), device=device)
        noise = torch.randn_like(ecg)
        x_t = diffusion.q_sample(ecg, t, noise)

        optimizer.zero_grad(set_to_none=True)
        eps_hat = diffusion.net(x_t, ppg, t)
        loss = F.mse_loss(eps_hat, noise)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(diffusion.parameters(), max_norm=1.0)
        optimizer.step()

        running += loss.item() * B
        n += B
        pbar.set_postfix(avg_loss=f"{running/max(1,n):.6f}",
                         lr=f"{optimizer.param_groups[0]['lr']:.1e}")
    return running / max(1, n)

@torch.no_grad()
def eval_eps_mse_tqdm(diffusion, loader, device, desc="Val"):
    diffusion.eval()
    total, n = 0.0, 0
    pbar = tqdm(loader, desc=desc, unit="batch", leave=False)
    for batch in pbar:
        ppg = batch['ppg'].to(device, non_blocking=True)
        ecg = batch['ecg'].to(device, non_blocking=True)
        B, _, L = ecg.shape
        t = torch.full((B,), diffusion.T//2, device=device, dtype=torch.long)
        noise = torch.randn_like(ecg)
        x_t = diffusion.q_sample(ecg, t, noise)
        eps_hat = diffusion.net(x_t, ppg, t)
        loss = F.mse_loss(eps_hat, noise, reduction='sum')
        total += loss.item(); n += B
        pbar.set_postfix(avg_epsMSE=f"{(total/max(1,n)):.6f}")
    return total / max(1, n)


In [None]:

L = next(iter(train_ld))['ppg'].shape[-1]

net = UNet1D(c_in=2, base=64)
diffusion = Diffusion1D(net, T=50).to(device)

optimizer = torch.optim.AdamW(diffusion.parameters(), lr=1e-4, weight_decay=1e-4)


In [None]:
# !pip install torchinfo
from torchinfo import summary

B, L = 2, 500
x_t = torch.randn(B, 1, L).to(device)
ppg = torch.randn(B, 1, L).to(device)
t   = torch.randint(0, diffusion.T, (B,), device=device)

summary(net, input_data=(x_t, ppg, t), depth=3, col_names=("kernel_size","num_params","mult_adds","output_size"))


In [None]:
gc.collect() # These commands help you when you face CUDA OOM error
torch.cuda.empty_cache()

In [None]:
# training

best_val = float('inf')
EPOCHS = 50
CKPT = "ppg2ecg_diffusion_best.pt"

epoch_bar = trange(1, EPOCHS+1, desc="Training", unit="epoch")
for epoch in epoch_bar:
    train_loss = train_one_epoch_tqdm(diffusion, train_ld, optimizer, device, epoch, EPOCHS)
    val_loss   = eval_eps_mse_tqdm(diffusion, val_ld, device, desc="Val")
    epoch_bar.set_postfix(train=f"{train_loss:.6f}", val=f"{val_loss:.6f}")

    if val_loss < best_val:
        best_val = val_loss
        torch.save({'model': diffusion.state_dict()}, CKPT)
        epoch_bar.write(f"  -> saved {CKPT} (val_epsMSE={best_val:.6f})")



In [None]:
import matplotlib.pyplot as plt

diffusion.eval()
batch = next(iter(val_ld))
ppg = batch['ppg'][:1].to(device)  
ecg_true = batch['ecg'][:1].cpu()[0,0] # (L,)
ecg_hat  = diffusion.sample(ppg, ppg.size(-1)).cpu()[0,0]  # (L,)

plt.figure(figsize=(10,3))
plt.plot(ecg_true.numpy(), label="ECG True", linewidth=1.0)
plt.plot(ecg_hat.numpy(),  label="ECG Generated", alpha=0.8, linewidth=1.0)
plt.legend(); plt.title("PPG→ECG (DDPM) | validation sample")
plt.show()


# generate ECG

In [None]:
gc.collect() # These commands help you when you face CUDA OOM error
torch.cuda.empty_cache()

In [None]:

net = UNet1D(c_in=2, base=64)
diffusion = Diffusion1D(net, T=50).to(device)

ckpt = torch.load("ppg2ecg_diffusion_best.pt", map_location=device)
key = 'model' if 'model' in ckpt else 'diffusion'  
diffusion.load_state_dict(ckpt[key])
diffusion.eval()

In [None]:
DATA_ROOT = "../share/dat_working/split_by_time"

In [None]:
BATCH = 64
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


train_ds, val_ds, test_ds = train_ld.dataset, val_ld.dataset, test_ld.dataset

def make_noshuffle_loader(ds, batch_size=64, num_workers=0):
    return DataLoader(
        ds, batch_size=batch_size, shuffle=False, drop_last=False,
        num_workers=num_workers, pin_memory=True
    )

train_ld_noshuf = make_noshuffle_loader(train_ds, batch_size=BATCH)
val_ld_noshuf   = make_noshuffle_loader(val_ds,   batch_size=BATCH)  
test_ld_noshuf  = make_noshuffle_loader(test_ds,  batch_size=BATCH)  

In [None]:
gc.collect() # These commands help you when you face CUDA OOM error
torch.cuda.empty_cache()

In [None]:
diffusion.eval()

def cache_generated_ecg(split_loader, out_path, dtype=np.float16):
    outs = []
    for batch in tqdm(split_loader, desc=f"Generating -> {out_path}"):
        ppg = batch['ppg'].to(device, non_blocking=True)  # [B,1,L]
        L = ppg.size(-1)
        with torch.no_grad():
            ecg_hat = diffusion.sample(ppg, L).cpu().numpy()[:,0,:].astype(dtype)  # [B,L]
        outs.append(ecg_hat)
    arr = np.concatenate(outs, axis=0)
    np.save(out_path, arr)
    print("Saved:", out_path, arr.shape, arr.dtype)
    return arr

gen_train = cache_generated_ecg(train_ld_noshuf, "gen_ecg_train.npy")
gen_val   = cache_generated_ecg(val_ld_noshuf,   "gen_ecg_val.npy")
gen_test  = cache_generated_ecg(test_ld_noshuf,  "gen_ecg_test.npy")

In [None]:
class BPWithGenECG(Dataset):
    def __init__(self, base_ds, gen_path):
        self.base = base_ds
        self.gen = np.load(gen_path)       
        assert len(self.base) == len(self.gen), "生成的ECG数量与数据集不一致！"
    def __len__(self): return len(self.base)
    def __getitem__(self, idx):
        item = self.base[idx]
        gen_ecg = torch.from_numpy(self.gen[idx]).unsqueeze(0).to(torch.float32)  # [1,L]
        return {
            'ppg': item['ppg'], 'ecg_gen': gen_ecg,
            'sbp': item['sbp'], 'dbp': item['dbp'], 'src': item['src']
        }

train_bp_ds = BPWithGenECG(train_ds, "gen_ecg_train.npy")
val_bp_ds   = BPWithGenECG(val_ds,   "gen_ecg_val.npy")
test_bp_ds  = BPWithGenECG(test_ds,  "gen_ecg_test.npy")

train_bp_ld = DataLoader(train_bp_ds, batch_size=128, shuffle=True,  pin_memory=True)
val_bp_ld   = DataLoader(val_bp_ds,   batch_size=256, shuffle=False, pin_memory=True)
test_bp_ld  = DataLoader(test_bp_ds,  batch_size=256, shuffle=False, pin_memory=True)

In [None]:
class BiLSTMRegressor(nn.Module):
    def __init__(self, in_dim=2, hid=128):
        super().__init__()
        self.proj = nn.Conv1d(in_dim, 64, 3, padding=1)
        self.rnn  = nn.LSTM(input_size=64, hidden_size=hid, num_layers=1,
                            batch_first=True, bidirectional=True)
        self.fc   = nn.Linear(hid*2, 2) 

    def forward(self, ppg, ecg):
        x = torch.cat([ppg, ecg], dim=1) 
        x = self.proj(x)              
        x = x.transpose(1, 2)          
        y, _ = self.rnn(x)            
        y = self.fc(y.mean(dim=1))    
        return y

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
bp = BiLSTMRegressor(in_dim=2, hid=128).to(device)

B = 4
ppg_dummy = torch.randn(B, 1, L).to(device)
ecg_dummy = torch.randn(B, 1, L).to(device)

summary(
    bp,
    input_data=(ppg_dummy, ecg_dummy),  
    depth=3,
    col_names=("input_size","output_size","num_params","kernel_size","mult_adds")
)

In [None]:
opt = torch.optim.AdamW(bp.parameters(), lr=5e-4, weight_decay=1e-1)
sched = torch.optim.lr_scheduler.ReduceLROnPlateau(
    opt, mode='min', factor=0.5, patience=3, min_lr=1e-6
)

EPOCHS   = 50 
PATIENCE = 8  

In [None]:
def one_epoch_train(bp, loader, opt, device, epoch, epochs):
    bp.train(); run=0; n=0
    pbar = tqdm(loader, desc=f"Epoch {epoch}/{epochs}", unit="batch", leave=False)
    for batch in pbar:
        ppg = batch['ppg'].to(device, non_blocking=True)
        ecg = (batch['ecg_gen'] if 'ecg_gen' in batch else batch['ecg']).to(device, non_blocking=True)
        y   = torch.stack([batch['sbp'].to(device), batch['dbp'].to(device)], dim=1)

        opt.zero_grad(set_to_none=True)
        pred = bp(ppg, ecg)
        loss = F.mse_loss(pred, y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(bp.parameters(), 1.0)
        opt.step()

        run += loss.item()*ppg.size(0); n += ppg.size(0)
        pbar.set_postfix(train_MSE=f"{run/max(1,n):.4f}",
                         lr=f"{opt.param_groups[0]['lr']:.1e}")
    return run/max(1,n)

@torch.no_grad()
def eval_rmse(bp, loader, device, desc="Val"):
    bp.eval(); se=0; n=0
    for batch in tqdm(loader, desc=desc, unit="batch", leave=False):
        ppg = batch['ppg'].to(device, non_blocking=True)
        ecg = (batch['ecg_gen'] if 'ecg_gen' in batch else batch['ecg']).to(device, non_blocking=True)
        y   = torch.stack([batch['sbp'].to(device), batch['dbp'].to(device)], dim=1)
        pred = bp(ppg, ecg)
        se += F.mse_loss(pred, y, reduction='sum').item(); n += ppg.size(0)
    return (se/max(1,n))**0.5  # RMSE

In [None]:
best = float('inf'); bad = 0
CKPT = "bp_regressor_best.pt"

for epoch in trange(1, EPOCHS+1, desc="BP Training", unit="epoch"):
    tr_mse = one_epoch_train(bp, train_bp_ld, opt, device, epoch, EPOCHS)
    val_rmse = eval_rmse(bp, val_bp_ld, device, desc="Val")
    sched.step(val_rmse)
    tqdm.write(f"Epoch {epoch:02d} | train MSE {tr_mse:.4f} | val RMSE {val_rmse:.4f}")

    if val_rmse < best - 1e-6:
        best = val_rmse; bad = 0
        torch.save({'bp': bp.state_dict()}, CKPT)
        tqdm.write(f"  -> saved {CKPT} (val RMSE {best:.4f})")
    else:
        bad += 1
        if bad >= PATIENCE:
            tqdm.write("Early stopping triggered.")
            break

In [None]:
gc.collect() # These commands help you when you face CUDA OOM error
torch.cuda.empty_cache()

In [None]:
import numpy as np
@torch.no_grad()
def eval_mae_std(bp, loader, device):
    bp.eval(); preds=[]; gts=[]
    for b in loader:
        ppg = b['ppg'].to(device)
        ecg = (b['ecg_gen'] if 'ecg_gen' in b else b['ecg']).to(device)
        y   = torch.stack([b['sbp'].to(device), b['dbp'].to(device)], dim=1)
        preds.append(bp(ppg, ecg).cpu()); gts.append(y.cpu())
    preds = torch.cat(preds,0).numpy(); gts = torch.cat(gts,0).numpy()
    err = preds - gts
    mae = np.mean(np.abs(err), axis=0); std = np.std(err, axis=0)
    print(f"SBP  MAE={mae[0]:.2f}  STD={std[0]:.2f}")
    print(f"DBP  MAE={mae[1]:.2f}  STD={std[1]:.2f}")

ck = torch.load("bp_regressor_best.pt", map_location=device)
bp.load_state_dict(ck['bp'])
eval_mae_std(bp, test_bp_ld, device)
