In [None]:
import pandas as pd
import os
import numpy as np
import pickle
from tqdm import tqdm
import wfdb
from sklearn.metrics import fbeta_score, roc_auc_score, roc_curve, roc_curve, auc
from models.wavelet import WaveletModel
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer
from sklearn.model_selection import train_test_split

In [None]:
# for preprocessing
def load_raw_data_ptbxl(df, sampling_rate, path):
    if sampling_rate == 100:
        if os.path.exists(path + 'raw100.npy'):
            data = np.load(path+'raw100.npy', allow_pickle=True)
        else:
            data = [wfdb.rdsamp(path+f) for f in tqdm(df.filename_lr)]
            data = np.array([signal for signal, meta in data])
            pickle.dump(data, open(path+'raw100.npy', 'wb'), protocol=4)
    elif sampling_rate == 500:
        if os.path.exists(path + 'raw500.npy'):
            data = np.load(path+'raw500.npy', allow_pickle=True)
        else:
            data = [wfdb.rdsamp(path+f) for f in tqdm(df.filename_hr)]
            data = np.array([signal for signal, meta in data])
            pickle.dump(data, open(path+'raw500.npy', 'wb'), protocol=4)
    return data

def apply_standardizer(X, ss):
    X_tmp = []
    for x in X:
        x_shape = x.shape
        X_tmp.append(ss.transform(x.flatten()[:,np.newaxis]).reshape(x_shape))
    X_tmp = np.array(X_tmp)
    return X_tmp

def preprocess_signals(X_train, X_validation, X_test, outputfolder):
    # Standardize data such that mean 0 and variance 1
    ss = StandardScaler()
    ss.fit(np.vstack(X_train).flatten()[:,np.newaxis].astype(float))
    
    # Save Standardizer data
    with open(outputfolder+'standard_scaler.pkl', 'wb') as ss_file:
        pickle.dump(ss, ss_file)

    return apply_standardizer(X_train, ss), apply_standardizer(X_validation, ss), apply_standardizer(X_test, ss)
    

In [84]:
def train(X_train,y_train, X_val, y_val):
    input_shape = X_train[0].shape
    model = WaveletModel('wm', 1, 100, 'outputs/', input_shape)
    model.fit(X_train,y_train, X_val, y_val)
    return model

df = pd.read_csv('data/mi_df.csv').reset_index(drop=True)
X = load_raw_data_ptbxl(df,100,'data/')

train_val_indicies = df[df['bmi_group'].isna()].index
X_train_val = X[train_val_indicies.to_list()]
y_train_val = np.array(df.iloc[train_val_indicies]['MI'])
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, random_state=1)
model = train(X_train,y_train, X_val, y_val)

In [85]:
for group in [1,2,3]:
    X = load_raw_data_ptbxl(df,100,'data/')
    test_indicies = df[df['bmi_group']==group].index
    y_test = np.array(df.iloc[test_indicies]['MI'])
    X_test = X[test_indicies.to_list()]
    preds = model.predict(X_test)
    auc = round(roc_auc_score(y_test, preds, average='macro'),5)
    print(f"Group {group}: auc = {auc}")

Group 1: auc = 0.44968
Group 2: auc = 0.46414
Group 3: auc = 0.52242
