In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
from keras.optimizers import Adam
from sklearn.metrics import precision_score, recall_score, f1_score

# Data preparation

In [2]:
data = pd.read_csv('train_test.csv')

In [3]:
selected_cols = [col for col in data.columns if col not in ['charttime', 'hosp_admittime', 'hosp_dischtime', 'icu_intime', 'icu_outtime','los_icu','icu_death','text_embeddings','los_icu_class']]

In [4]:
# fill the nan
df_full = data[selected_cols].groupby('id').transform(lambda x: x.fillna(x.mean()))
df = df_full.fillna(df_full.mean())

In [5]:
# split X,y
X = df
y = data['icu_death']

# numerical columns
num = df.select_dtypes(include=['float']).columns
X_num = X[num]

# min-max standardization
scaler = MinMaxScaler(feature_range=(0, 1))
X_num_scaled = scaler.fit_transform(X_num)
X_num_scaled = pd.DataFrame(X_num_scaled, columns=num, index=X_num.index)
X_scaled = X.copy()
X_scaled[num] = X_num_scaled[num]

In [6]:
# set time step
time_steps = 25
X_scaled = np.array([X_scaled[i:i + time_steps] for i in range(0, len(X_scaled) - time_steps + 1, time_steps)])
y = np.array([y[i] for i in range(time_steps - 1, len(y), time_steps)])

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# change the shape of data to fit the network requirement
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2]))
y_train = y_train.reshape(-1, 1)

# Modeling

In [7]:
# get the time stamp and num of features
time_steps = X_train.shape[1]
num_features = X_train.shape[2]

# tcn model
model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu',dilation_rate=1, input_shape=(time_steps, num_features)),
    #MaxPooling1D(pool_size=2),
    Conv1D(filters=64, kernel_size=3, activation='relu'dilation_rate=2),
    #MaxPooling1D(pool_size=2),
    Conv1D(filters=64, kernel_size=3, activation='relu'dilation_rate=4)
    Flatten(),
    Dense(50, activation='relu'),
    Dense(1, activation='sigmoid')  # output layer, bi-classification
])


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.F1Score()])


model.fit(X_train, y_train, epochs=50, batch_size=64, validation_split=0.2)

  super().__init__(


Epoch 1/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - f1_score: 0.1873 - loss: 0.3557 - val_f1_score: 0.1779 - val_loss: 0.2952
Epoch 2/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - f1_score: 0.2029 - loss: 0.3127 - val_f1_score: 0.1779 - val_loss: 0.3000
Epoch 3/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - f1_score: 0.1891 - loss: 0.2872 - val_f1_score: 0.1779 - val_loss: 0.2891
Epoch 4/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - f1_score: 0.1977 - loss: 0.2868 - val_f1_score: 0.1779 - val_loss: 0.2835
Epoch 5/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - f1_score: 0.1901 - loss: 0.2774 - val_f1_score: 0.1779 - val_loss: 0.2865
Epoch 6/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - f1_score: 0.1936 - loss: 0.2791 - val_f1_score: 0.1779 - val_loss: 0.2835
Epoch 7/50
[1m164/164[0m 

<keras.src.callbacks.history.History at 0x1658bbc1fa0>

In [8]:
# save the trained model
model.save('tcn_mortality.keras')

In [9]:
# on testing set
y_test_pred = model.predict(X_test)

# classify based on probability
y_pred_class = (y_test_pred > 0.5).astype(int)

# evaluate the result
precision = precision_score(y_test, y_pred_class)
recall = recall_score(y_test, y_pred_class)
f1 = f1_score(y_test, y_pred_class)

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
Precision: 0.3042071197411003
Recall: 0.3092105263157895
F1 Score: 0.3066884176182708


In [10]:
# predict the whole X and output probability
y_pred = model.predict(X_scaled)

result= pd.DataFrame(y_pred, columns=['probs'])
result.to_csv('tcn_probs_of_death_traintest.csv', index=False)

[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step
