In [14]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from joblib import Parallel, delayed
import neurokit2 as nk
import json

In [15]:
def load_data(train_path, test_path):
    train = pd.read_csv(train_path, index_col="id")
    test = pd.read_csv(test_path, index_col="id")
    return train, test

# Load train and test data
train_path = "data/train.csv"
test_path = "data/test.csv"
train, test = load_data(train_path, test_path)

# Separate labels and signal data for train
train_labels = train["y"]  # Labels
train_signals = train.drop(columns=["y"])  # Signal data

In [20]:
def process_signal(row, signal_columns, sampling_rate=300):
    """
    Process a single signal row, preserving NaN values.

    Parameters:
        row (pd.Series): Row containing the signal.
        signal_columns (list): Columns representing signal time points.
        sampling_rate (int): Sampling rate for the signal.

    Returns:
        dict: Corrected signal and inversion status.
    """
    signal_id = row.name  # Row index is the signal ID
    signal = row[signal_columns].to_numpy(dtype="float32")

    # Preserve NaNs
    valid_indices = ~np.isnan(signal)
    valid_signal = signal[valid_indices]

    # Initialize corrected signal with NaN
    corrected_signal = np.full_like(signal, np.nan)

    if valid_signal.size > 0:
        # Invert valid parts
        corrected_valid_signal, was_inverted = nk.ecg_invert(valid_signal, sampling_rate=sampling_rate, show=False)
        corrected_signal[valid_indices] = corrected_valid_signal
    else:
        # Handle fully NaN signals
        was_inverted = False

    # Construct output
    result = {"id": signal_id, "inverted": int(was_inverted)}
    result.update({f"x{i}": corrected_signal[i] for i in range(len(corrected_signal))})
    return result

def detect_and_correct_inversion_parallel(signals, sampling_rate=300, n_jobs=-1):
    """
    Detect and correct inversions in parallel, treating rows as signals.
    """
    signal_columns = [col for col in signals.columns if col.startswith("x")]

    results = Parallel(n_jobs=n_jobs)(
        delayed(process_signal)(row, signal_columns, sampling_rate)
        for _, row in tqdm(signals.iterrows(), total=len(signals), desc="Detecting and Correcting Inversion")
    )

    corrected_df = pd.DataFrame(results)
    inversion_mask = corrected_df[["id", "inverted"]]
    corrected_df = corrected_df.drop(columns=["inverted"])
    return corrected_df, inversion_mask


In [21]:
corrected_train, train_inversion_mask = detect_and_correct_inversion_parallel(train_signals.copy(), sampling_rate=300)

# Save corrected data
corrected_train.insert(1, "y", train_labels)  # Ensure 'y' is at index 1
corrected_train.to_csv("data/train_inversion_nkecg.csv", index=False)
train_inversion_mask.to_csv("data/train_inversion_nkecg_mask.csv", index=False)


Detecting and Correcting Inversion: 100%|██████████| 5117/5117 [00:44<00:00, 113.87it/s]


In [22]:
corrected_test, test_inversion_mask = detect_and_correct_inversion_parallel(test.copy(), sampling_rate=300)

# Save corrected data
corrected_test.to_csv("data/test_inversion_nkecg.csv", index=False)
test_inversion_mask.to_csv("data/test_inversion_nkecg_mask.csv", index=False)


Detecting and Correcting Inversion: 100%|██████████| 3411/3411 [00:30<00:00, 110.86it/s]
