In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score, f1_score

In [2]:
# read the validation data
val_data = pd.read_csv('validation.csv')

In [3]:
# import the mortality prediction from Wen Rui's model
mortality = pd.read_csv('Validation_death_combined_3.csv')

In [4]:
# To input the predicted death status as feature
mortality['id'] = val_data['id'].unique()
val_data = pd.merge(val_data, mortality, on='id', how='right').drop(columns=['icu_death', 'Unnamed: 0', 'Survive_prob', 'Death_prob'])
val_data = val_data.rename(columns={'death': 'icu_death'})

In [5]:
def data_preprocessing(df):
    # change class into 0-1 format
    df['los_icu_class'] = df['los_icu_class'].apply(lambda x: 0 if x == 'less than 3 days' else 1)

    # drop redundant columns
    df = df.drop(columns=['charttime', 'hosp_admittime', 'hosp_dischtime', 'icu_intime', 'icu_outtime','los_icu','text_embeddings'])

    # fill nan
    df_full = df.groupby('id').transform(lambda x: x.fillna(x.mean()))
    df = df_full.fillna(df_full.mean())
    return df

In [6]:
def split_X_y(df):
    X = df.drop(columns=['los_icu_class'])
    y = df['los_icu_class']
    
    scaler = MinMaxScaler(feature_range=(0, 1))
    X_scaled = scaler.fit_transform(X)

    time_steps = 25
    X_scaled = np.array([X_scaled[i:i + time_steps] for i in range(0, len(X_scaled), time_steps)])
    y = np.array([y[i] for i in range(time_steps - 1, len(y), time_steps)])

    # change the shape of data to fit the network requirement
    X_scaled = X_scaled.reshape((X_scaled.shape[0], X_scaled.shape[1], X_scaled.shape[2]))
    y = y.reshape(-1, 1)
    return X_scaled, y

In [7]:
model = tf.keras.models.load_model('sd_tcn_los.keras')

In [8]:
val_data = data_preprocessing(val_data)
X_scaled, y = split_X_y(val_data)

# get probability
probs = model.predict(X_scaled)

[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [9]:
# output probablity as csv file
result = pd.DataFrame(probs, columns=['probs_of_los'])
result.to_csv('lstm_probs_of_los_valid.csv', index=False)

In [10]:
# evaluate the result
y_pred = model.predict(X_scaled)

# classify based on probability
y_pred_class = (y_pred > 0.5).astype(int)

# evaluate the result
precision = precision_score(y, y_pred_class)
recall = recall_score(y, y_pred_class)
f1 = f1_score(y, y_pred_class)

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Precision: 0.48782911077993046
Recall: 0.4939637826961771
F1 Score: 0.49087728067983005
