In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
from keras.optimizers import Adam
from sklearn.metrics import precision_score, recall_score, f1_score

# Data preparation

In [2]:
data = pd.read_csv('train_test.csv')

In [3]:
selected_cols = [col for col in data.columns if col not in ['charttime', 'hosp_admittime', 'hosp_dischtime', 'icu_intime', 'icu_outtime','los_icu','icu_death','text_embeddings','los_icu_class']]

In [4]:
# fill the nan
df_full = data[selected_cols].groupby('id').transform(lambda x: x.fillna(x.mean()))
df = df_full.fillna(df_full.mean())

In [5]:
# standardization
scaler = StandardScaler()
df = scaler.fit_transform(df)

# split X,y
X = df
y = data['icu_death']

# min-max
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
# set time step
time_steps = 25
X_scaled = np.array([X_scaled[i:i + time_steps] for i in range(0, len(X_scaled) - time_steps + 1, time_steps)])
y = np.array([y[i] for i in range(time_steps - 1, len(y), time_steps)])

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# change the shape of data to fit the network requirement
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], X_test.shape[2]))
y_train = y_train.reshape(-1, 1)

# Modeling

In [9]:
# get the time stamp and num of features
time_steps = X_train.shape[1]
num_features = X_train.shape[2]

# tcn model
model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(time_steps, num_features)),
    MaxPooling1D(pool_size=2),
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(50, activation='relu'),
    Dense(1, activation='sigmoid')  # output layer, bi-classification
])


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.F1Score()])


model.fit(X_train, y_train, epochs=50, batch_size=64, validation_split=0.2)

Epoch 1/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - f1_score: 0.1977 - loss: 0.3599 - val_f1_score: 0.1779 - val_loss: 0.2994
Epoch 2/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - f1_score: 0.1930 - loss: 0.2997 - val_f1_score: 0.1779 - val_loss: 0.2861
Epoch 3/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - f1_score: 0.1890 - loss: 0.2898 - val_f1_score: 0.1779 - val_loss: 0.2816
Epoch 4/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - f1_score: 0.1958 - loss: 0.2859 - val_f1_score: 0.1779 - val_loss: 0.2833
Epoch 5/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - f1_score: 0.1988 - loss: 0.2825 - val_f1_score: 0.1779 - val_loss: 0.2800
Epoch 6/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - f1_score: 0.1909 - loss: 0.2762 - val_f1_score: 0.1779 - val_loss: 0.2844
Epoch 7/50
[1m164/164[0m 

<keras.src.callbacks.history.History at 0x17c05bc72b0>

In [10]:
# save the trained model
model.save('sd_tcn_death.keras')

In [11]:
# on testing set
y_test_pred = model.predict(X_test)

# classify based on probability
y_pred_class = (y_test_pred > 0.5).astype(int)

# evaluate the result
precision = precision_score(y_test, y_pred_class)
recall = recall_score(y_test, y_pred_class)
f1 = f1_score(y_test, y_pred_class)

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Precision: 0.2862595419847328
Recall: 0.24671052631578946
F1 Score: 0.26501766784452296


In [12]:
# predict the whole X and output probability
y_pred = model.predict(X_scaled)

result= pd.DataFrame(y_pred, columns=['probs'])
result.to_csv('tcn_probs_of_death_traintest.csv', index=False)

[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
