In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

In [2]:
# read the validation data
val_data = pd.read_csv('validation.csv')

In [3]:
# import the mortality prediction from Wen Rui's model
mortality = pd.read_csv('Validation_death_combined_3.csv')

In [6]:
# To input the predicted death status as feature
mortality['id'] = val_data['id'].unique()
val_data = pd.merge(val_data, mortality, on='id', how='right').drop(columns=['icu_death', 'Unnamed: 0'])
val_data = val_data.rename(columns={'0': 'icu_death'})

In [7]:
def data_preprocessing(df):
    # change class into 0-1 format
    df['los_icu_class'] = df['los_icu_class'].apply(lambda x: 0 if x == 'less than 3 days' else 1)

    # drop redundant columns
    df = df.drop(columns=['charttime','hosp_admittime','hosp_dischtime','icu_intime','icu_outtime','los_icu','text_embeddings'])

    # fill nan
    df_full = df.groupby('id').transform(lambda x: x.fillna(x.mean()))
    df = df_full.fillna(df_full.mean())
    return df

In [8]:
def split_X_y(df):
    X = df.drop(columns=['los_icu_class'])
    y = df['los_icu_class']

    # numerical columns
    num = df.select_dtypes(include=['float']).columns
    X_num = X[num]

    # min-max standardization
    scaler = MinMaxScaler(feature_range=(0, 1))
    X_num_scaled = scaler.fit_transform(X_num)
    X_num_scaled = pd.DataFrame(X_num_scaled, columns=num, index=X_num.index)
    X_scaled = X.copy()
    X_scaled[num] = X_num_scaled[num]

    time_steps = 25
    X_scaled = np.array([X_scaled[i:i + time_steps] for i in range(0, len(X_scaled), time_steps)])
    y = np.array([y[i] for i in range(time_steps - 1, len(y), time_steps)])
    return X_scaled, y

In [9]:
model = tf.keras.models.load_model('lstm_los.keras')

In [10]:
val_data = data_preprocessing(val_data)
X_scaled, y = split_X_y(val_data)

# get probability
probs = model.predict(X_scaled)

[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


In [11]:
# output probablity as csv file
result = pd.DataFrame(probs, columns=['probs_of_los'])
result.to_csv('lstm_probs_of_los_valid.csv', index=False)