In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten
from keras.optimizers import Adam
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score

# Data Preparation

In [2]:
data = pd.read_csv('train_test.csv')

In [3]:
data['los_icu_class'] = data['los_icu_class'].apply(lambda x: 0 if x == 'less than 3 days' else 1)

In [4]:
drop_cols = ['charttime','hosp_admittime','hosp_dischtime','icu_intime','icu_outtime','los_icu','text_embeddings',
             'gender','admission_age','weight_admit','height','admission_type','charlson_score','atrial_fibrillation',
             'malignant_cancer','chf','ckd','cld','copd','diabetes','hypertension','ihd','stroke']
data = data.drop(columns=drop_cols)

In [5]:
# fill the nan
df_full = data.groupby('id').transform(lambda x: x.fillna(x.mean()))
df = df_full.fillna(df_full.mean())

In [6]:
X = df.drop(columns=['los_icu_class'])
y = df['los_icu_class']

time_steps = 25
X = np.array([X[i:i + time_steps] for i in range(0, len(X), time_steps)])
y = np.array([y[i] for i in range(time_steps - 1, len(y), time_steps)])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
X_train_2d = X_train.reshape(-1, X_train.shape[-1])
X_test_2d = X_test.reshape(-1, X_test.shape[-1])

In [8]:
# min-max standardization
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(X_train_2d)
X_train_2d_scaled = scaler.transform(X_train_2d)
X_test_2d_scaled = scaler.transform(X_test_2d)

In [9]:
X_train = np.array([X_train_2d_scaled[i:i + time_steps] for i in range(0, len(X_train_2d_scaled), time_steps)])
X_test = np.array([X_test_2d_scaled[i:i + time_steps] for i in range(0, len(X_test_2d_scaled), time_steps)])
y_train = y_train.reshape(-1, 1)

# Modeling

In [10]:
# get the time stamp and num of features
time_steps = X_train.shape[1]
num_features = X_train.shape[2]

# tcn model
model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(time_steps, num_features)),
    MaxPooling1D(pool_size=2),
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(50, activation='relu'),
    Dense(1, activation='sigmoid')  # output layer, bi-classification
])


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.F1Score()])

model.fit(X_train, y_train, epochs=50, batch_size=64, validation_split=0.2)

  super().__init__(


Epoch 1/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - f1_score: 0.6441 - loss: 0.6922 - val_f1_score: 0.6445 - val_loss: 0.6841
Epoch 2/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - f1_score: 0.6392 - loss: 0.6860 - val_f1_score: 0.6445 - val_loss: 0.6821
Epoch 3/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - f1_score: 0.6462 - loss: 0.6824 - val_f1_score: 0.6445 - val_loss: 0.6807
Epoch 4/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - f1_score: 0.6444 - loss: 0.6797 - val_f1_score: 0.6445 - val_loss: 0.6800
Epoch 5/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - f1_score: 0.6390 - loss: 0.6776 - val_f1_score: 0.6445 - val_loss: 0.6788
Epoch 6/50
[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - f1_score: 0.6440 - loss: 0.6736 - val_f1_score: 0.6445 - val_loss: 0.6889
Epoch 7/50
[1m164/164[0m 

<keras.src.callbacks.history.History at 0x1d42bb17ca0>

In [11]:
# # save the trained model
# model.save('tcn_los.keras')

In [12]:
# on testing set
y_test_pred = model.predict(X_test)

# classify based on probability
y_pred_class = (y_test_pred > 0.5).astype(int)

# evaluate the result
auroc = roc_auc_score(y_test, y_pred_class)
precision = precision_score(y_test, y_pred_class)
recall = recall_score(y_test, y_pred_class)
f1 = f1_score(y_test, y_pred_class)

print("AUROC:", auroc)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
AUROC: 0.5537669216296771
Precision: 0.5278001611603546
Recall: 0.4384203480589023
F1 Score: 0.4789762340036563


In [13]:
# X_scaled = np.concatenate((X_train, X_test), axis=0)
# # predict the whole X and output probability
# y_pred = model.predict(X_scaled)

# result= pd.DataFrame(y_pred, columns=['probs'])
# result.to_csv('tcn_probs_of_los_traintest.csv', index=False)

[1m511/511[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
