Modified Code for PPG-to-ECG Synthesis (using U-Net)

In [None]:
# === 1. Install and Import Libraries ===

# Install xlrd if not already present
!pip install -q xlrd

import os
import zipfile
import glob
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import warnings
import shutil

# Scipy for signal processing
from scipy.signal import find_peaks

# Sklearn for metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error

# TensorFlow/Keras for Deep Learning
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, Concatenate
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Conv1DTranspose, LayerNormalization
from tensorflow.keras.callbacks import EarlyStopping

# Suppress warnings
warnings.filterwarnings('ignore')

# === 2. Mount Drive & Define Data Functions ===

from google.colab import drive
drive.mount('/content/drive')

def unzip_data(zip_path, extract_folder):
    """Unzips a file and returns a list of all .csv files inside."""
    if not os.path.exists(zip_path):
        print(f"Error: {zip_path} not found. Check your Google Drive path.")
        return []
    os.makedirs(extract_folder, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_folder)
    csv_files = glob.glob(os.path.join(extract_folder, '**/*.csv'), recursive=True)
    print(f"Extracted {len(csv_files)} files from {zip_path}")
    return csv_files

def create_sequences_ppg_to_ecg(df, seq_length=256, step=128):
    """
    Creates overlapping sequences for PPG-to-ECG translation.
    Input (X) is PPG signal.
    Output (y) is ECG signal.
    """
    ecg = df['ECG'].values
    ppg = df['PPG'].values

    # Normalize signals individually
    ecg = (ecg - np.mean(ecg)) / (np.std(ecg) + 1e-6)
    ppg = (ppg - np.mean(ppg)) / (np.std(ppg) + 1e-6)

    X_seq = []
    y_seq = []

    for i in range(0, len(df) - seq_length, step):
        end_idx = i + seq_length

        # Input is PPG
        X_window = ppg[i:end_idx]

        # Output (label) is ECG
        y_window = ecg[i:end_idx]

        # We can add a simple check to skip flat/dead signals
        if np.std(X_window) > 0.1 and np.std(y_window) > 0.1:
            X_seq.append(X_window)
            y_seq.append(y_window)

    # Add a "channels" dimension for Conv1D
    return np.expand_dims(np.array(X_seq), -1), np.expand_dims(np.array(y_seq), -1)

def load_and_process(zip_path, extract_folder, seq_length=256, debug_limit=None):
    """Main function to load zips and process all files for sequence models."""
    file_list = unzip_data(zip_path, extract_folder)
    if debug_limit is not None:
        file_list = file_list[:debug_limit]
        print(f"--- DEBUG MODE: Processing only {len(file_list)} files. ---")

    if not file_list: return np.array([]), np.array([])
    all_X, all_y = [], []

    for f in tqdm(file_list, desc=f"Processing {zip_path}"):
        try:
            df = pd.read_csv(f)
        except Exception as e:
            print(f"Could not read {f}: {e}")
            continue
        if not all(col in df.columns for col in ['t_sec', 'ECG', 'PPG', 'ABP']):
            print(f"Skipping {f}: missing required columns.")
            continue

        # Use the new sequence creation function
        X, y = create_sequences_ppg_to_ecg(df, seq_length=seq_length)
        if X.shape[0] > 0:
            all_X.append(X)
            all_y.append(y)

    if not all_X:
        print(f"No valid data found in {zip_path} for sequence mode.")
        return np.array([]), np.array([])

    all_X = np.concatenate(all_X, axis=0)
    all_y = np.concatenate(all_y, axis=0)
    print(f"Finished processing {zip_path}. Found {all_X.shape[0]} samples.")
    return all_X, all_y

# === 3. U-Net Model Definition ===

def conv_block(inputs, num_filters):
    """A block of two 1D convolutions with ReLU activation."""
    x = Conv1D(num_filters, 3, activation='relu', padding='same')(inputs)
    x = Conv1D(num_filters, 3, activation='relu', padding='same')(x)
    return x

def build_unet_1d(input_shape=(256, 1)):
    """Builds a 1D U-Net model for signal-to-signal translation."""
    inputs = Input(shape=input_shape)

    # Encoder (Downsampling path)
    c1 = conv_block(inputs, 16)
    p1 = MaxPooling1D(2)(c1)

    c2 = conv_block(p1, 32)
    p2 = MaxPooling1D(2)(c2)

    c3 = conv_block(p2, 64)
    p3 = MaxPooling1D(2)(c3)

    c4 = conv_block(p3, 128)
    p4 = MaxPooling1D(2)(c4)

    # Bottleneck
    b = conv_block(p4, 256)

    # Decoder (Upsampling path)
    u4 = Conv1DTranspose(128, 2, strides=2, padding='same')(b)
    u4 = Concatenate()([u4, c4]) # Skip connection
    c5 = conv_block(u4, 128)

    u3 = Conv1DTranspose(64, 2, strides=2, padding='same')(c5)
    u3 = Concatenate()([u3, c3]) # Skip connection
    c6 = conv_block(u3, 64)

    u2 = Conv1DTranspose(32, 2, strides=2, padding='same')(c6)
    u2 = Concatenate()([u2, c2]) # Skip connection
    c7 = conv_block(u2, 32)

    u1 = Conv1DTranspose(16, 2, strides=2, padding='same')(c7)
    u1 = Concatenate()([u1, c1]) # Skip connection
    c8 = conv_block(u1, 16)

    # Output layer
    # Use a 1x1 convolution to map to the desired number of output channels (1 for ECG)
    outputs = Conv1D(1, 1, activation='linear')(c8) # 'linear' for regression

    return Model(inputs, outputs)


# === 4. U-Net Model Training and Evaluation ===

print("\n--- Starting U-Net PPG-to-ECG Model ---")

# 1. Define Model Parameters
# U-Nets work best with input sizes that are powers of 2
SEQ_LENGTH = 256
STEP = 128
NUM_FEATURES = 1  # Input is just PPG
NUM_OUTPUTS = 1   # Output is just ECG
BATCH_SIZE = 64
EPOCHS = 20

# --- Define Paths ---
# !!! EDIT THESE PATHS !!!
train_zip_path = '/content/drive/MyDrive/11785FinalData/train.zip'
val_zip_path = '/content/drive/MyDrive/11785FinalData/val.zip'
test_zip_path = '/content/drive/MyDrive/11785FinalData/test.zip'

# 2. Load and process data
X_train_seq, y_train_seq = load_and_process(train_zip_path, 'data/train', seq_length=SEQ_LENGTH)
X_val_seq, y_val_seq = load_and_process(val_zip_path, 'data/val', seq_length=SEQ_LENGTH)
X_test_seq, y_test_seq = load_and_process(test_zip_path, 'data/test', seq_length=SEQ_LENGTH)

if X_train_seq.shape[0] == 0:
    print("No training data found for sequence-based model. Aborting.")
else:
    print(f"Training data shape: {X_train_seq.shape}")
    print(f"Training labels shape: {y_train_seq.shape}")

    # 3. Build and compile the U-Net model
    input_shape = (SEQ_LENGTH, NUM_FEATURES)

    model = build_unet_1d(input_shape)

    model.compile(optimizer='adam', tran=['mean_squared_error', 'mean_absolute_error'])
    model.summary()

    # 4. Train Model
    print("\nTraining U-Net model...")
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    history = model.fit(
        X_train_seq, y_train_seq,
        validation_data=(X_val_seq, y_val_seq),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=[early_stopping],
        verbose=1
    )

    # 5. Evaluate on Test Set
    print("\nEvaluating U-Net on test set...")
    # This will return [test_loss, test_mae]
    results = model.evaluate(X_test_seq, y_test_seq, batch_size=BATCH_SIZE)
    test_loss = results[0]
    test_mae = results[1]

    # 6. Report Results
    print("\n--- U-Net Model Test Results ---")
    print(f"Test Set MSE (Loss): {test_loss:.4f}")
    print(f"Test Set MAE:        {test_mae:.4f}")
    print("----------------------------------")

    # Optional: Predict a few samples to visualize later
    # y_pred_seq = model.predict(X_test_seq[:5])


# === 5. Save a Trained Model to Your Drive ===

# First, create a path to a folder in your Google Drive
save_folder = '/content/drive/My Drive/MyProject'
os.makedirs(save_folder, exist_ok=True)

# Define the full path to save your model file
model_save_path = os.path.join(save_folder, 'unet_ppg_to_ecg_model.keras')

# Save the model
try:
    model.save(model_save_path)
    print(f"Model successfully saved to: {model_save_path}")
except NameError:
    print("Could not save model. Make sure you have trained the model and it is in a variable named 'model'.")
except Exception as e:
    print(f"An error occurred while saving: {e}")

Mounted at /content/drive

--- Starting U-Net PPG-to-ECG Model ---
