This first code block will install the necessary xlrd library for reading older Excel files (though your data is .csv, this was in your original code) and import all required libraries from TensorFlow, Keras, Pandas, and Numpy.

In [None]:
# === 1. Install and Import Libraries ===

# Install xlrd if not already present
!pip install -q xlrd

import os
import zipfile
import glob
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import warnings
import shutil

# Sklearn for metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error

# TensorFlow/Keras for Deep Learning
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.layers import Conv1D, Conv1DTranspose, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping

# Suppress warnings
warnings.filterwarnings('ignore')

This section handles all data preparation. It includes:

Mounting your Google Drive.

unzip_data: A function to extract your .zip files.

create_sequences_ppg_to_ecg: The most important function. It reads a dataframe, normalizes the PPG and ECG signals, and slices them into overlapping windows.

Input (X): A window of the PPG signal.

Output (y): The corresponding window of the ECG signal.

load_and_process: The main wrapper function that orchestrates the unzipping and sequence creation for all your data splits.

In [None]:
# === 2. Mount Drive & Define Data Functions ===

from google.colab import drive
drive.mount('/content/drive')

def unzip_data(zip_path, extract_folder):
    """Unzips a file and returns a list of all .csv files inside."""
    if not os.path.exists(zip_path):
        print(f"Error: {zip_path} not found. Check your Google Drive path.")
        return []
    os.makedirs(extract_folder, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_folder)
    csv_files = glob.glob(os.path.join(extract_folder, '**/*.csv'), recursive=True)
    print(f"Extracted {len(csv_files)} files from {zip_path}")
    return csv_files

def create_sequences_ppg_to_ecg(df, seq_length=256, step=128):
    """
    Creates overlapping sequences for PPG-to-ECG translation.
    Input (X) is PPG signal.
    Output (y) is ECG signal.
    """
    ecg = df['ECG'].values
    ppg = df['PPG'].values

    # Normalize signals individually
    ecg = (ecg - np.mean(ecg)) / (np.std(ecg) + 1e-6)
    ppg = (ppg - np.mean(ppg)) / (np.std(ppg) + 1e-6)

    X_seq = []
    y_seq = []

    for i in range(0, len(df) - seq_length, step):
        end_idx = i + seq_length

        X_window = ppg[i:end_idx]
        y_window = ecg[i:end_idx]

        if np.std(X_window) > 0.1 and np.std(y_window) > 0.1:
            X_seq.append(X_window)
            y_seq.append(y_window)

    # Add a "channels" dimension
    return np.expand_dims(np.array(X_seq), -1), np.expand_dims(np.array(y_seq), -1)

def load_and_process(zip_path, extract_folder, seq_length=256, debug_limit=None):
    """Main function to load zips and process all files for sequence models."""
    file_list = unzip_data(zip_path, extract_folder)
    if debug_limit is not None:
        file_list = file_list[:debug_limit]
        print(f"--- DEBUG MODE: Processing only {len(file_list)} files. ---")

    if not file_list: return np.array([]), np.array([])
    all_X, all_y = [], []

    for f in tqdm(file_list, desc=f"Processing {zip_path}"):
        try:
            df = pd.read_csv(f)
        except Exception as e:
            print(f"Could not read {f}: {e}")
            continue
        if not all(col in df.columns for col in ['t_sec', 'ECG', 'PPG', 'ABP']):
            print(f"Skipping {f}: missing required columns.")
            continue

        X, y = create_sequences_ppg_to_ecg(df, seq_length=seq_length)
        if X.shape[0] > 0:
            all_X.append(X)
            all_y.append(y)

    if not all_X:
        print(f"No valid data found in {zip_path} for sequence mode.")
        return np.array([]), np.array([])

    all_X = np.concatenate(all_X, axis=0)
    all_y = np.concatenate(all_y, axis=0)
    print(f"Finished processing {zip_path}. Found {all_X.shape[0]} samples.")
    return all_X, all_y

Key features of this architecture:

Encoder: Uses Conv1D layers with strides=2 to downsample the signal. This is a learnable downsampling, which is more powerful than a fixed MaxPooling operation.

Decoder: Uses Conv1DTranspose with strides=2 to upsample the signal back to its original length.

Skip Connections: Concatenate layers merge the high-resolution feature maps from the encoder with the upsampled maps from the decoder. This is crucial for preserving the exact timing and sharp details (like the QRS complex) from the original signal.

Activation: We use LeakyReLU as recommended by the Wave-U-Net paper, which can help prevent "dying ReLU" problems.

In [None]:
# === 3. Wave-U-Net Model Definition ===

def build_wave_unet_model(input_shape=(256, 1), kernel_size=15, filters=16):
    """
    Builds a 1D Wave-U-Net model.
    Downsampling is done with Conv1D(strides=2).
    Upsampling is done with Conv1DTranspose(strides=2).
    """
    inputs = Input(shape=input_shape)

    # --- Encoder Path ---
    # Level 1
    c1 = Conv1D(filters, kernel_size, padding='same')(inputs)
    c1 = LeakyReLU(0.2)(c1)
    c1 = Conv1D(filters, kernel_size, padding='same')(c1)
    c1 = LeakyReLU(0.2)(c1)
    d1 = Conv1D(filters, kernel_size, strides=2, padding='same')(c1) # Downsample
    d1 = LeakyReLU(0.2)(d1)

    # Level 2
    c2 = Conv1D(filters*2, kernel_size, padding='same')(d1)
    c2 = LeakyReLU(0.2)(c2)
    c2 = Conv1D(filters*2, kernel_size, padding='same')(c2)
    c2 = LeakyReLU(0.2)(c2)
    d2 = Conv1D(filters*2, kernel_size, strides=2, padding='same')(c2) # Downsample
    d2 = LeakyReLU(0.2)(d2)

    # Level 3
    c3 = Conv1D(filters*4, kernel_size, padding='same')(d2)
    c3 = LeakyReLU(0.2)(c3)
    c3 = Conv1D(filters*4, kernel_size, padding='same')(c3)
    c3 = LeakyReLU(0.2)(c3)
    d3 = Conv1D(filters*4, kernel_size, strides=2, padding='same')(c3) # Downsample
    d3 = LeakyReLU(0.2)(d3)

    # Level 4
    c4 = Conv1D(filters*8, kernel_size, padding='same')(d3)
    c4 = LeakyReLU(0.2)(c4)
    c4 = Conv1D(filters*8, kernel_size, padding='same')(c4)
    c4 = LeakyReLU(0.2)(c4)
    d4 = Conv1D(filters*8, kernel_size, strides=2, padding='same')(c4) # Downsample
    d4 = LeakyReLU(0.2)(d4)

    # --- Bottleneck ---
    b = Conv1D(filters*16, kernel_size, padding='same')(d4)
    b = LeakyReLU(0.2)(b)
    b = Conv1D(filters*16, kernel_size, padding='same')(b)
    b = LeakyReLU(0.2)(b)

    # --- Decoder Path ---
    # Level 4
    u4 = Conv1DTranspose(filters*8, kernel_size, strides=2, padding='same')(b)
    u4 = LeakyReLU(0.2)(u4)
    u4 = Concatenate()([u4, c4]) # Skip connection
    u4_conv = Conv1D(filters*8, kernel_size, padding='same')(u4)
    u4_conv = LeakyReLU(0.2)(u4_conv)
    u4_conv = Conv1D(filters*8, kernel_size, padding='same')(u4_conv)
    u4_conv = LeakyReLU(0.2)(u4_conv)

    # Level 3
    u3 = Conv1DTranspose(filters*4, kernel_size, strides=2, padding='same')(u4_conv)
    u3 = LeakyReLU(0.2)(u3)
    u3 = Concatenate()([u3, c3]) # Skip connection
    u3_conv = Conv1D(filters*4, kernel_size, padding='same')(u3)
    u3_conv = LeakyReLU(0.2)(u3_conv)
    u3_conv = Conv1D(filters*4, kernel_size, padding='same')(u3_conv)
    u3_conv = LeakyReLU(0.2)(u3_conv)

    # Level 2
    u2 = Conv1DTranspose(filters*2, kernel_size, strides=2, padding='same')(u3_conv)
    u2 = LeakyReLU(0.2)(u2)
    u2 = Concatenate()([u2, c2]) # Skip connection
    u2_conv = Conv1D(filters*2, kernel_size, padding='same')(u2)
    u2_conv = LeakyReLU(0.2)(u2_conv)
    u2_conv = Conv1D(filters*2, kernel_size, padding='same')(u2_conv)
    u2_conv = LeakyReLU(0.2)(u2_conv)

    # Level 1
    u1 = Conv1DTranspose(filters, kernel_size, strides=2, padding='same')(u2_conv)
    u1 = LeakyReLU(0.2)(u1)
    u1 = Concatenate()([u1, c1]) # Skip connection
    u1_conv = Conv1D(filters, kernel_size, padding='same')(u1)
    u1_conv = LeakyReLU(0.2)(u1_conv)
    u1_conv = Conv1D(filters, kernel_size, padding='same')(u1_conv)
    u1_conv = LeakyReLU(0.2)(u1_conv)

    # Output layer
    outputs = Conv1D(1, 1, activation='linear')(u1_conv) # 'linear' for regression

    return Model(inputs, outputs)

Here we define all the "hyperparameters" for our model and job.

SEQ_LENGTH: 256 is a good power-of-2, which works well with the 4 downsampling steps in the U-Net (256 -> 128 -> 64 -> 32 -> 16).

KERNEL_SIZE: A larger kernel (like 15) is common in Wave-U-Nets to capture a wider time-span in each convolution.

Paths: You must edit these paths to point to the correct location of your .zip files in Google Drive.

After setting the config, we call load_and_process to load all three datasets (train, validation, and test) into memory.

In [None]:
# === 4. Configuration & Data Loading ===

print("\n--- Starting Wave-U-Net PPG-to-ECG Model ---")

# 1. Define Model Parameters
SEQ_LENGTH = 256
STEP = 128
NUM_FEATURES = 1  # Input is just PPG
NUM_OUTPUTS = 1   # Output is just ECG
BATCH_SIZE = 64
EPOCHS = 20

# --- Wave-U-Net-specific Hyperparameters ---
KERNEL_SIZE = 15  # Larger kernel size is common in Wave-U-Nets
FILTERS = 16      # Starting number of filters
# ---------------------------------------------

# --- Define Paths ---
# !!! EDIT THESE PATHS !!!
train_zip_path = '/content/drive/MyDrive/11785FinalData/train.zip'
val_zip_path = '/content/drive/MyDrive/11785FinalData/val.zip'
test_zip_path = '/content/drive/MyDrive/11785FinalData/test.zip'

# 2. Load and process data
X_train_seq, y_train_seq = load_and_process(train_zip_path, 'data/train', seq_length=SEQ_LENGTH)
X_val_seq, y_val_seq = load_and_process(val_zip_path, 'data/val', seq_length=SEQ_LENGTH)
X_test_seq, y_test_seq = load_and_process(test_zip_path, 'data/test', seq_length=SEQ_LENGTH)

# Check if data loading was successful
if X_train_seq.shape[0] > 0:
    print(f"--- Data Loaded Successfully ---")
    print(f"Training data shape: {X_train_seq.shape}")
    print(f"Training labels shape: {y_train_seq.shape}")
    print(f"Validation data shape: {X_val_seq.shape}")
    print(f"Test data shape: {X_test_seq.shape}")
else:
    print("--- ERROR: No training data was loaded. Check your paths and data. ---")

This is the final step. We:

Wrap the main logic in an if statement to ensure the script only runs if data was loaded correctly.

Build the model using our function from Section 3.

Compile the model. We use mean_squared_error as the loss function, which is standard for signal regression. We also monitor mean_absolute_error as a more interpretable metric.

Print model.summary() so you can see the architecture and parameter count.

Train the model using model.fit(), passing in our training and validation data. We use EarlyStopping to prevent overfitting.

Evaluate the final model on the unseen test set and print the results.

In [None]:
# === 5. Build, Train, and Evaluate ===

if X_train_seq.shape[0] > 0:
    # 3. Build and compile the Wave-U-Net model
    input_shape = (SEQ_LENGTH, NUM_FEATURES)

    model = build_wave_unet_model(
        input_shape,
        kernel_size=KERNEL_SIZE,
        filters=FILTERS
    )

    model.compile(optimizer='adam',
                  loss='mean_squared_error',
                  metrics=['mean_absolute_error'])

    print("\n--- Model Summary ---")
    model.summary()

    # 4. Train Model
    print("\n--- Training Wave-U-Net model... ---")
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    history = model.fit(
        X_train_seq, y_train_seq,
        validation_data=(X_val_seq, y_val_seq),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=[early_stopping],
        verbose=1
    )

    # 5. Evaluate on Test Set
    print("\n--- Evaluating Wave-U-Net on test set... ---")
    results = model.evaluate(X_test_seq, y_test_seq, batch_size=BATCH_SIZE)
    test_loss = results[0]
    test_mae = results[1]

    # 6. Report Results
    print("\n--- Wave-U-Net Model Test Results ---")
    print(f"Test Set MSE (Loss): {test_loss:.4f}")
    print(f"Test Set MAE:        {test_mae:.4f}")
    print("-------------------------------------")

else:
    print("Skipping model training due to data loading error.")

As a final step, this block saves your trained model (weights and architecture) to your Google Drive. This allows you to reload it later for inference or further training without having to start from scratch.

In [None]:
# === 6. Save a Trained Model to Your Drive ===

# First, create a path to a folder in your Google Drive
save_folder = '/content/drive/My Drive/MyProject'
os.makedirs(save_folder, exist_ok=True)

# Define the full path to save your model file
model_save_path = os.path.join(save_folder, 'wave_unet_ppg_to_ecg_model.keras')

# Save the model
try:
    model.save(model_save_path)
    print(f"Model successfully saved to: {model_save_path}")
except NameError:
    print("Could not save model. 'model' variable is not defined.")
    print("This likely means the training step was skipped due to a data error.")
except Exception as e:
    print(f"An error occurred while saving: {e}")