In [7]:
# Import essential libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch import nn
from sklearn.metrics import confusion_matrix, classification_report
import torch.nn.functional as F
from sklearn.preprocessing import OneHotEncoder
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics, preprocessing
from sklearn.preprocessing import MultiLabelBinarizer
import pywt

In [4]:
# Dataset, using SCRIPT CarpeDiem Dataset as example
df = pd.read_csv('...Data root...')

In [9]:
# static features and dynamic features and labels

df_static = df[['Patient_id','Age', 'Ethnicity','Gender','Race','Smoking_status','BMI','Admit_APS_score','Admit_SOFA_score']]
df_dynamic = df[['Patient_id','SOFA_score', 'Temperature',
       'Heart_rate', 'Systolic_blood_pressure', 'Diastolic_blood_pressure',
       'Mean_arterial_pressure', 'Respiratory_rate', 'Oxygen_saturation',
       'Urine_output', 'PEEP', 'FiO2', 'Plateau_Pressure', 'Lung_Compliance',
       'PEEP_changes', 'Respiratory_rate_changes', 'FiO2_changes', 'WBC_count',
       'Lymphocytes', 'Neutrophils', 'Hemoglobin', 'Platelets', 'Bicarbonate',
       'Creatinine', 'Albumin', 'Bilirubin', 'Procalcitonin']]
y_label = df[['Patient_id','Patient_category']]


df_static_result = df_static.drop_duplicates(subset='Patient_id', keep='first')
# select multi feature as new dataframe
multi = df_static_result[['Ethnicity', 'Gender', 'Race','Smoking_status']]
multi_nominal = multi.to_numpy()
multi_one_hot = MultiLabelBinarizer()
static_one_hot = multi_one_hot.fit_transform(multi_nominal)
numer_stat = df_static_result[['Age','BMI', 'Admit_APS_score', 'Admit_SOFA_score']]
multi_numer = numer_stat.to_numpy()
numericalOfStatic = preprocessing.normalize(multi_numer, norm='l2')
static_feature = np.concatenate((static_one_hot, numericalOfStatic), axis=1)

y_label_result = y_label.drop_duplicates(subset='Patient_id',keep='first')
enc = preprocessing.LabelEncoder()
y_label_final = enc.fit_transform(y_label_result['Patient_category'])



cols_to_normalize = df_dynamic.columns[1:]


normalized_data = preprocessing.normalize(df_dynamic[cols_to_normalize], norm='l2')


df_normalized = pd.concat([df_dynamic['Patient_id'], pd.DataFrame(normalized_data, columns=cols_to_normalize)], axis=1)

In [10]:
grouped_data = df_normalized.groupby('Patient_id').head(10)  

features_data = [[] for _ in range(26)]  

for idx, group in grouped_data.groupby('Patient_id'):  
    for i in range(26):  
        features_data[i].append(group.iloc[:10, i + 1].tolist())  



In [13]:
 
processed_features_data = [[] for _ in range(26)]  

for j in range(len(features_data)):  
    for i in range(len(features_data[j])):  
        wavelet = 'sym18'  
        level = 1  
        mode = 'symmetric'  
        coeffs = pywt.wavedec(features_data[j][i], wavelet, level=level, mode=mode)  
        processed_features_data[j].append(coeffs)  

del processed_features_data[-5] 


In [38]:
# Low frequency component (trend information)  
def trend_information(coe):  
    return [c[0] for c in coe]  

trend_info_list = [trend_information(feature) for feature in processed_features_data]  


cA_a, cA_b, cA_c, cA_d, cA_e, cA_f, cA_g, cA_h, cA_I, \
cA_j, cA_k, cA_l, cA_m, cA_n, cA_o, cA_p, cA_q, cA_r, \
cA_s, cA_t, cA_u, cA_w, cA_xx, cA_yy, cA_zz = trend_info_list  



print(len(cA_a))

295


In [31]:

def high_information(coe):  
    return [c[1:] for c in coe]  

processed_high_data = [high_information(feature) for feature in processed_features_data]  

 
processed_high_data_extracted = [[sublist[0] for sublist in feature] for feature in processed_high_data]  

 
cD3_a, cD3_b, cD3_c, cD3_d, cD3_e, cD3_f, cD3_g, cD3_h, cD3_I, \
cD3_j, cD3_k, cD3_l, cD3_m, cD3_n, cD3_o, cD3_p, cD3_q, cD3_r, \
cD3_s, cD3_t, cD3_u, cD3_w, cD3_xx, cD3_yy, cD3_zz = processed_high_data_extracted  

 
print(len(cD3_a[0]))

22


In [36]:
def first_order_difference(high_information_list):  
     
    return [  
        [high_information[i+1] - high_information[i] for i in range(len(high_information) - 1)]  
        for high_information in high_information_list  
    ]  


high_data_list = [  
    cD3_a, cD3_b, cD3_c, cD3_d, cD3_e, cD3_f, cD3_g, cD3_h, cD3_I, cD3_j,   
    cD3_k, cD3_l, cD3_m, cD3_n, cD3_o, cD3_p, cD3_q, cD3_r, cD3_s, cD3_t,   
    cD3_u, cD3_w, cD3_xx, cD3_yy, cD3_zz  
]  

 
diff_high_data_list = [first_order_difference(data) for data in high_data_list]  


diff_cat_all = np.concatenate([np.array(diff) for diff in diff_high_data_list], axis=1)  


print(diff_cat_all.shape)

(295, 525)


In [39]:
import numpy as np  

 
cA_list = [  
    cA_a, cA_b, cA_c, cA_d, cA_e, cA_f, cA_g, cA_h, cA_I, cA_j,   
    cA_k, cA_l, cA_m, cA_n, cA_o, cA_p, cA_q, cA_r, cA_s, cA_t,   
    cA_u, cA_w, cA_xx, cA_yy, cA_zz  
]  

cD3_list = [  
    cD3_a, cD3_b, cD3_c, cD3_d, cD3_e, cD3_f, cD3_g, cD3_h, cD3_I, cD3_j,   
    cD3_k, cD3_l, cD3_m, cD3_n, cD3_o, cD3_p, cD3_q, cD3_r, cD3_s, cD3_t,   
    cD3_u, cD3_w, cD3_xx, cD3_yy, cD3_zz  
]  

  
tensor_list = [np.stack([cA, cD3], axis=1) for cA, cD3 in zip(cA_list, cD3_list)]  

 
tensor_all = np.concatenate(tensor_list, axis=2)  

 
print(tensor_all.shape)

(295, 2, 550)


In [44]:
import torch
from torch.utils.data import Dataset

class MyDataset(Dataset):
    def __init__(self, dynamic_features, static_features, diff_cat_all, labels):
        self.dynamic_features = dynamic_features
        self.static_features = static_features

        # new
        self.diff_cat_all = diff_cat_all

        self.labels = labels

    def __getitem__(self, index):
        
        dynamic_features = self.dynamic_features[index]
        static_features = self.static_features[index]
        
        # new
        
        diff_cat_all = self.diff_cat_all[index]

        
        static_features = torch.tensor(static_features, dtype=torch.float32)

        
        labels = self.labels[index]

        
        sample = {'dynamic_features': dynamic_features, 'static_features': static_features, 'diff_cat_all': diff_cat_all, 'labels': labels}
        return sample

    def __len__(self):
        return len(self.labels)

In [45]:
import torch.nn as nn
import torch

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.dynamic_conv1 = nn.Conv2d(550, 16, kernel_size=1, stride=1, padding=0).double()
        self.dynamic_conv2 = nn.Conv2d(550, 16, kernel_size=1, stride=1, padding=0, dilation=(1, 2)).double()
        self.dynamic_conv3 = nn.Conv2d(550, 16, kernel_size=1, stride=1, padding=0, dilation=(1, 3)).double()
        self.static_linear1 = nn.Linear(16, 8).float()
        self.final_linear = nn.Linear(629, 4).float()
        self.norm = nn.BatchNorm1d(64).double()
        self.norm2 = nn.BatchNorm1d(32).double()
        self.tanh = nn.Tanh()

        # changes with attention
        self.diff_linear = nn.Linear(21, 21).float()
        self.diff_attention = nn.Linear(525, 525).float()



    def forward(self, dynamic_feature, static_feature, diff_cat_all):
        
        dynamic_feature_groups = dynamic_feature.unfold(1, 2, 12)

        
        cat_results = []
        for i in range(dynamic_feature_groups.shape[1]):
            group = dynamic_feature_groups[:, i, :, :]
            group = torch.unsqueeze(group, dim=3)  

            conv1_output = self.dynamic_conv1(group)
            conv2_output = self.dynamic_conv2(group)
            conv3_output = self.dynamic_conv3(group)
            cat_result = torch.cat([conv1_output, conv2_output, conv3_output], dim=1)
            cat_results.append(cat_result)

        #################################################
        diff_cat_all_group = diff_cat_all.unfold(1, 21, 21)
        weight = []
        for j in range(diff_cat_all_group.shape[1]):
            group_diff = diff_cat_all_group[:, j, :]
            # group_diff = torch.unsqueeze(group_diff, dim=3)
            group_1 = torch.softmax(self.diff_linear(group_diff).float(), dim=1).float()
            weight.append(group_1)
        #################################################

        
        cat_final = torch.cat(cat_results, dim=1)
        cat_weight = torch.cat(weight, dim=1)

        static_feature = static_feature.float()
        
        static_output = self.static_linear1(static_feature).float()

        # write attention here
        # weight = torch.softmax(self.diff_attention(diff_cat_all).float(), dim=1).float()


        
        combined_output = torch.cat([cat_final.view(cat_final.shape[0], -1), static_output, cat_weight], dim=1)
        combined_output = combined_output.float()
        final_output = self.final_linear(combined_output).float()

    
        return final_output


In [55]:
import torch
import numpy as np
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_fscore_support


def k_fold_cross_validation(k, dataset, model, loss_fn, optimizer, num_epochs, batch_size):
    
    fold_size = len(dataset) // k
    folds = [torch.utils.data.Subset(dataset, range(i*fold_size, (i+1)*fold_size)) for i in range(k)]

    
    aurocs = []
    auprcs = []
    min_sens_p = []
    weight_sum = None 


    for i in range(k):
        print(f"Fold {i+1}/{k}")
        
        test_data = folds[i]
        train_data = torch.utils.data.ConcatDataset([fold for j, fold in enumerate(folds) if j != i])
        train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)

        
        model.train()
        for epoch in range(num_epochs):
            for batch in train_loader:
                dynamic_features = batch['dynamic_features']
                static_features = batch['static_features']
                # new
                diff_cat_all = batch['diff_cat_all']
                labels = batch['labels']

                optimizer.zero_grad()
                # new
                outputs = model(dynamic_features, static_features, diff_cat_all)
                loss = loss_fn(outputs, labels)
                loss.backward()
                optimizer.step()






        
        model.eval()
        with torch.no_grad():
            dynamic_features = test_data.dataset.dynamic_features
            static_features = test_data.dataset.static_features

            # new
            diff_cat_all = test_data.dataset.diff_cat_all







            diff_cat_all_group = diff_cat_all.unfold(1, 21, 21)
            weight = [] 
            for j in range(diff_cat_all_group.shape[1]):
                group_diff = diff_cat_all_group[:, j, :]
                group_1 = torch.softmax(model.diff_linear(group_diff).float(), dim=1).float()
                weight.append(group_1)

            weight = torch.cat(weight, dim=1)
        
            if weight_sum is None:
                weight_sum = weight
            else:
                weight_sum += weight
            









            labels = test_data.dataset.labels

            outputs = model(dynamic_features, static_features, diff_cat_all)
            probs = torch.softmax(outputs.clone().detach().requires_grad_(True), dim=1).cpu().numpy()
           
            preds = np.argmax(probs, axis=1)
            
            labels = np.eye(len(np.unique(labels)))[labels.cpu().numpy()]

            auroc = roc_auc_score(labels, probs, multi_class='ovo')
            auprc = average_precision_score(labels, probs, average='weighted')

            p, r, f, s = precision_recall_fscore_support(np.argmax(labels, axis=1), preds, average='weighted')
            if s is not None and p is not None:
                min_sens_p.append(np.min([s, p]))
            else:
                min_sens_p.append(0.0)
            
            
            # min_sens_p.append(np.min([s, p]))

            print(f"Epoch {epoch+1}/{num_epochs}: AUROC={auroc:.4f}, AUPRC={auprc:.4f}")

        aurocs.append(auroc)
        auprcs.append(auprc)

    # Mean AUROC and AUPRC
    print(f"Mean AUROC={np.mean(aurocs):.4f}, Mean AUPRC={np.mean(auprcs):.4f}")

In [59]:
# Create Dataset
dataset = MyDataset(dynamic_features=torch.DoubleTensor(tensor_all), static_features=torch.DoubleTensor(static_feature), diff_cat_all=torch.FloatTensor(diff_cat_all), labels=torch.LongTensor(y_label_final))

# model, loss, and optimizer
model = MyModel()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# 10-fold cross validation
k = 10
num_epochs = 600
batch_size = 64
k_fold_cross_validation(k, dataset, model, loss_fn, optimizer, num_epochs, batch_size)

Fold 1/10
Epoch 600/600: AUROC=0.7804, AUPRC=0.6766
Fold 2/10
Epoch 600/600: AUROC=0.8231, AUPRC=0.7218
Fold 3/10
Epoch 600/600: AUROC=0.8583, AUPRC=0.7672
Fold 4/10
Epoch 600/600: AUROC=0.8739, AUPRC=0.7868
Fold 5/10
Epoch 600/600: AUROC=0.8863, AUPRC=0.8069
Fold 6/10
Epoch 600/600: AUROC=0.9081, AUPRC=0.8421
Fold 7/10
Epoch 600/600: AUROC=0.9225, AUPRC=0.8667
Fold 8/10
Epoch 600/600: AUROC=0.9363, AUPRC=0.8829
Fold 9/10
Epoch 600/600: AUROC=0.9493, AUPRC=0.9060
Fold 10/10
Epoch 600/600: AUROC=0.9572, AUPRC=0.9211
Mean AUROC=0.8895, Mean AUPRC=0.8178
