In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Data Preprocessing

In [2]:
df_raw = pd.read_csv('train_test.csv')

In [3]:
select_cols = [col for col in df_raw.columns if col not in ('charttime','hosp_admittime','hosp_dischtime','icu_intime','icu_outtime','los_icu','text_embeddings','los_icu_class')]

In [4]:
df_full = df_raw[select_cols].groupby('id').transform(lambda x: x.fillna(x.mean()))

In [5]:
df = df_full.fillna(df_full.mean())

# Prepare data for training

In [6]:
feature_cols = df.columns.drop(['icu_death'])
target = 'icu_death'
X = df[feature_cols].values
y = df[target].values

scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X)

time_steps = 25
X_scaled = np.array([X_scaled[i:i + time_steps] for i in range(0, len(X_scaled), time_steps)])
y = np.array([y[i] for i in range(time_steps - 1, len(y), time_steps)])

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Modeling

In [7]:
# Construction
model = Sequential()
model.add(LSTM(units=25, return_sequences=False, input_shape=(time_steps, X_train.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(**kwargs)


In [8]:
model.compile(optimizer='adam', loss='binary_crossentropy') # , metrics=[tf.keras.metrics.F1Score()]

In [9]:
# calculate weight of each classification
total_samples = len(y_train)
class_count = np.bincount(y_train)
class_weights = total_samples / (len(class_count) * class_count)
# change into dictionary format
class_weights_dict = dict(enumerate(class_weights))

In [10]:
class_weights_dict

{0: 0.5592341956484496, 1: 4.720535068691251}

In [11]:
# fit model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), class_weight=class_weights_dict)

Epoch 1/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 0.6740 - val_loss: 0.4235
Epoch 2/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.6042 - val_loss: 0.5527
Epoch 3/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.5555 - val_loss: 0.5532
Epoch 4/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.5480 - val_loss: 0.5574
Epoch 5/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.5478 - val_loss: 0.5237
Epoch 6/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.5340 - val_loss: 0.5324
Epoch 7/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.5225 - val_loss: 0.6598
Epoch 8/50
[1m409/409[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 0.5313 - val_loss: 0.4771
Epoch 9/50
[1m409/409[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x287d2395790>

In [12]:
# save the trained model
model.save('lstm_mortality.keras')

In [13]:
# on testing set
y_test_pred = model.predict(X_test)

# classify based on probability
y_pred_class = (y_test_pred > 0.5).astype(int)

# evaluate the result
precision = precision_score(y_test, y_pred_class)
recall = recall_score(y_test, y_pred_class)
f1 = f1_score(y_test, y_pred_class)

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Precision: 0.21682464454976302
Recall: 0.6019736842105263
F1 Score: 0.31881533101045295


In [14]:
# predict the whole X
y_pred = model.predict(X_scaled)

# classify based on probability
y_pred_class = (y_pred > 0.5).astype(int)

# evaluate the result
precision = precision_score(y, y_pred_class)
recall = recall_score(y, y_pred_class)
f1 = f1_score(y, y_pred_class)

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Precision: 0.34360189573459715
Recall: 0.8595139300533492
F1 Score: 0.4909429490435077


In [15]:
result_df = pd.DataFrame(y_pred, columns=['predictions'])

# output death probability of all patients
result_df.to_csv('lstm_probs_of_death_traintest.csv', index=False)