In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

In [2]:
# read the validation data
val_data = pd.read_csv('validation.csv')

In [3]:
def data_preprocessing(df):
    # drop redundant columns
    df = df.drop(columns=['charttime', 'hosp_admittime', 'hosp_dischtime', 'icu_intime', 'icu_outtime','los_icu',
                          'text_embeddings','los_icu_class'])

    # fill nan
    df_full = df.groupby('id').transform(lambda x: x.fillna(x.mean()))
    df = df_full.fillna(df_full.mean())
    return df

In [4]:
def split_X_y(df):
    X = df.drop(columns=['icu_death'])
    y = df['icu_death']

    # numerical columns
    num = df.select_dtypes(include=['float']).columns
    X_num = X[num]

    # min-max standardization
    scaler = MinMaxScaler(feature_range=(0, 1))
    X_num_scaled = scaler.fit_transform(X_num)
    X_num_scaled = pd.DataFrame(X_num_scaled, columns=num, index=X_num.index)
    X_scaled = X.copy()
    X_scaled[num] = X_num_scaled[num]

    # set time step
    time_steps = 25
    X_scaled = np.array([X_scaled[i:i + time_steps] for i in range(0, len(X_scaled) - time_steps + 1, time_steps)])
    y = np.array([y[i] for i in range(time_steps - 1, len(y), time_steps)])

    # change the shape of data to fit the network requirement
    X_scaled = X_scaled.reshape((X_scaled.shape[0], X_scaled.shape[1], X_scaled.shape[2]))
    y = y.reshape(-1, 1)
    return X_scaled, y

In [5]:
model = tf.keras.models.load_model('tcn_mortality.keras')

In [6]:
val_data = data_preprocessing(val_data)
X_scaled, y = split_X_y(val_data)

# get probability
probs = model.predict(X_scaled)

[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [7]:
# output probablity as csv file
result = pd.DataFrame(probs, columns=['probs_of_death'])
result.to_csv('tcn_probs_of_death_valid.csv', index=False)