In [2]:
import numpy as np
import csv
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow import keras
import tensorflow as tf
# import keras_tuner as kt
import sklearn as sk
from sklearn.metrics import mean_squared_error

In [3]:
def read_data(filename):
    df = pd.read_csv(filename, dtype='float64')
    fieldnames = list(df.columns)
    data = np.array(df.to_dict(orient='records'))
    return data, fieldnames

In [4]:
data, fieldnames = read_data('dataset/rl_data_final_cont.csv')

In [5]:
print(f'All Fieldnames({len(fieldnames)}):', [i for i in enumerate(fieldnames)])

All Fieldnames(62): [(0, 'bloc'), (1, 'icustayid'), (2, 'charttime'), (3, 'gender'), (4, 'age'), (5, 'elixhauser'), (6, 're_admission'), (7, 'died_in_hosp'), (8, 'died_within_48h_of_out_time'), (9, 'mortality_90d'), (10, 'delay_end_of_record_and_discharge_or_death'), (11, 'Weight_kg'), (12, 'GCS'), (13, 'HR'), (14, 'SysBP'), (15, 'MeanBP'), (16, 'DiaBP'), (17, 'RR'), (18, 'SpO2'), (19, 'Temp_C'), (20, 'FiO2_1'), (21, 'Potassium'), (22, 'Sodium'), (23, 'Chloride'), (24, 'Glucose'), (25, 'BUN'), (26, 'Creatinine'), (27, 'Magnesium'), (28, 'Calcium'), (29, 'Ionised_Ca'), (30, 'CO2_mEqL'), (31, 'SGOT'), (32, 'SGPT'), (33, 'Total_bili'), (34, 'Albumin'), (35, 'Hb'), (36, 'WBC_count'), (37, 'Platelets_count'), (38, 'PTT'), (39, 'PT'), (40, 'INR'), (41, 'Arterial_pH'), (42, 'paO2'), (43, 'paCO2'), (44, 'Arterial_BE'), (45, 'Arterial_lactate'), (46, 'HCO3'), (47, 'mechvent'), (48, 'Shock_Index'), (49, 'PaO2_FiO2'), (50, 'median_dose_vaso'), (51, 'max_dose_vaso'), (52, 'input_total'), (53, 'inp

In [6]:
all_features = fieldnames[3:7] + fieldnames[11:50] + [fieldnames[56]] + fieldnames[57:59] + fieldnames[50:52] + fieldnames[59:61]
#features = ['Weight_kg', 'GCS', 'HR', 'SysBP', 'MeanBP', 'DiaBP', 'RR', 'SpO2', 'Temp_C', 'FiO2_1', 'Potassium', 'Sodium', 'Chloride', 'Glucose', 'BUN', 'Creatinine', 'Magnesium', 'Calcium', 'Ionised_Ca', 'CO2_mEqL', 'SGOT', 'SGPT', 'Total_bili', 'Albumin', 'Hb', 'WBC_count', 'Platelets_count', 'PTT', 'PT', 'INR', 'Arterial_pH', 'paO2', 'paCO2', 'Arterial_BE', 'Arterial_lactate', 'HCO3', 'mechvent', 'Shock_Index', 'PaO2_FiO2', 'cumulated_balance', 'SOFA', 'SIRS', 'median_dose_vaso', 'max_dose_vaso', 'vaso_input', 'iv_input']
print(f'All Features({len(all_features)}):', [i for i in enumerate(all_features)])

All Features(50): [(0, 'gender'), (1, 'age'), (2, 'elixhauser'), (3, 're_admission'), (4, 'Weight_kg'), (5, 'GCS'), (6, 'HR'), (7, 'SysBP'), (8, 'MeanBP'), (9, 'DiaBP'), (10, 'RR'), (11, 'SpO2'), (12, 'Temp_C'), (13, 'FiO2_1'), (14, 'Potassium'), (15, 'Sodium'), (16, 'Chloride'), (17, 'Glucose'), (18, 'BUN'), (19, 'Creatinine'), (20, 'Magnesium'), (21, 'Calcium'), (22, 'Ionised_Ca'), (23, 'CO2_mEqL'), (24, 'SGOT'), (25, 'SGPT'), (26, 'Total_bili'), (27, 'Albumin'), (28, 'Hb'), (29, 'WBC_count'), (30, 'Platelets_count'), (31, 'PTT'), (32, 'PT'), (33, 'INR'), (34, 'Arterial_pH'), (35, 'paO2'), (36, 'paCO2'), (37, 'Arterial_BE'), (38, 'Arterial_lactate'), (39, 'HCO3'), (40, 'mechvent'), (41, 'Shock_Index'), (42, 'PaO2_FiO2'), (43, 'cumulated_balance'), (44, 'SOFA'), (45, 'SIRS'), (46, 'median_dose_vaso'), (47, 'max_dose_vaso'), (48, 'vaso_input'), (49, 'iv_input')]


In [7]:
static_features = fieldnames[3:7]
print(f'Static Features({len(static_features)}):', [i for i in enumerate(static_features)])

Static Features(4): [(0, 'gender'), (1, 'age'), (2, 'elixhauser'), (3, 're_admission')]


In [8]:
labels = fieldnames[3:7] + fieldnames[11:50] + [fieldnames[56]] + fieldnames[57:59]
#labels = ['Weight_kg', 'GCS', 'HR', 'SysBP', 'MeanBP', 'DiaBP', 'RR', 'SpO2', 'Temp_C', 'FiO2_1', 'Potassium', 'Sodium', 'Chloride', 'Glucose', 'BUN', 'Creatinine', 'Magnesium', 'Calcium', 'Ionised_Ca', 'CO2_mEqL', 'SGOT', 'SGPT', 'Total_bili', 'Albumin', 'Hb', 'WBC_count', 'Platelets_count', 'PTT', 'PT', 'INR', 'Arterial_pH', 'paO2', 'paCO2', 'Arterial_BE', 'Arterial_lactate', 'HCO3', 'mechvent', 'Shock_Index', 'PaO2_FiO2', 'cumulated_balance', 'SOFA', 'SIRS']
print(f'labels({len(labels)}):', [i for i in enumerate(labels)])

labels(46): [(0, 'gender'), (1, 'age'), (2, 'elixhauser'), (3, 're_admission'), (4, 'Weight_kg'), (5, 'GCS'), (6, 'HR'), (7, 'SysBP'), (8, 'MeanBP'), (9, 'DiaBP'), (10, 'RR'), (11, 'SpO2'), (12, 'Temp_C'), (13, 'FiO2_1'), (14, 'Potassium'), (15, 'Sodium'), (16, 'Chloride'), (17, 'Glucose'), (18, 'BUN'), (19, 'Creatinine'), (20, 'Magnesium'), (21, 'Calcium'), (22, 'Ionised_Ca'), (23, 'CO2_mEqL'), (24, 'SGOT'), (25, 'SGPT'), (26, 'Total_bili'), (27, 'Albumin'), (28, 'Hb'), (29, 'WBC_count'), (30, 'Platelets_count'), (31, 'PTT'), (32, 'PT'), (33, 'INR'), (34, 'Arterial_pH'), (35, 'paO2'), (36, 'paCO2'), (37, 'Arterial_BE'), (38, 'Arterial_lactate'), (39, 'HCO3'), (40, 'mechvent'), (41, 'Shock_Index'), (42, 'PaO2_FiO2'), (43, 'cumulated_balance'), (44, 'SOFA'), (45, 'SIRS')]


In [9]:
# Just to indicate, not used.
action_names = fieldnames[50:52] + fieldnames[59:61]
print(f'Action Names({len(action_names)}):', [i for i in enumerate(action_names)])

Action Names(4): [(0, 'median_dose_vaso'), (1, 'max_dose_vaso'), (2, 'vaso_input'), (3, 'iv_input')]


In [10]:
def get_padded_features_and_labels(data):
    _x = []
    _y = []
    current_id = data[0]['icustayid']
    temp_x = []
    temp_y = []
    for i in range(len(data)-1):
        if data[i]['icustayid'] != data[i+1]['icustayid']:
            continue
        if data[i]['icustayid'] != current_id:
            current_id = data[i]['icustayid']
            _x.append(temp_x)
            _y.append(temp_y)
            temp_x = []
            temp_y = []
        temp_x.append([data[i][k] for k in all_features])
        temp_y.append([data[i+1][k] for k in labels])
    if temp_x != []:
        _x.append(temp_x)
        _y.append(temp_y)
    _x = tf.keras.preprocessing.sequence.pad_sequences(_x, padding='pre', dtype='float64', value=0)
    _y = tf.keras.preprocessing.sequence.pad_sequences(_y, padding='pre', dtype='float64', value=0)
    _x = np.array(_x)
    _y = np.array(_y)
    _x = _x.reshape(_x.shape[0]*_x.shape[1], _x.shape[2])
    _y = _y.reshape(_y.shape[0]*_y.shape[1], _y.shape[2])
    return _x, _y

def get_features_and_labels(data):
    _x = []
    _y = []
    for i in range(len(data)-1):
        if data[i]['icustayid'] != data[i+1]['icustayid']:
            continue
 
        _x.append([data[i][k] for k in all_features])
        _y.append([data[i+1][k] for k in labels])
    return np.array(_x), np.array(_y)

def get_comparison_testing_data(data):
    _X, _y = get_padded_features_and_labels(data)
    _, _X_te, _, _y_te = train_test_split(_X, _y, shuffle=False, test_size=0.2)
    X_te_comp = np.array([i for i in _X_te if not np.array_equal(i, np.zeros(50))])
    y_te_comp = np.array([i for i in _y_te if not np.array_equal(i, np.zeros(46))])
    return X_te_comp, y_te_comp


In [11]:
X, y = get_padded_features_and_labels(data)
print(X.shape, y.shape)

X_tr, X_te, y_tr, y_te = train_test_split(X, y, shuffle=False, test_size=0.2)

X_te_comp, y_te_comp = get_comparison_testing_data(data)


# Normalize the features
scaler_X = sk.preprocessing.StandardScaler().fit(X)
X_tr_scaled = scaler_X.transform(X_tr)
X_te_scaled = scaler_X.transform(X_te)


# Normalize the labels
scaler_y = sk.preprocessing.StandardScaler().fit(y)
y_tr_scaled = scaler_y.transform(y_tr)
y_te_scaled = scaler_y.transform(y_te)


print(X_tr_scaled.shape, X_te_scaled.shape, y_tr_scaled.shape, y_te_scaled.shape)


(393072, 50) (393072, 46)
(314457, 50) (78615, 50) (314457, 46) (78615, 46)


In [12]:
# Extract Static features

X_tr_st_scaled = X_tr_scaled[:, :4]

X_te_st_scaled = X_te_scaled[:, :4]


print("X_tr_st_scaled shape:: ", X_tr_st_scaled.shape)
print("X_te_st_scaled shape:: ", X_te_st_scaled.shape)


X_tr_st_scaled shape::  (314457, 4)
X_te_st_scaled shape::  (78615, 4)


Hyperparameter Tuning

In [15]:
def model_builder(hp):
### Static Data Input layer (only static)
    static_inputs = tf.keras.Input(shape=(len(static_features)))

    ### Mixed Data Input Layer (both static and dynamic)
    mixed_inputs = tf.keras.Input(shape=(len(all_features)))

    ### Mixed Data Hidden layers
    hp_units1 = hp.Int('units1', min_value=64, max_value=1280, step=64)
    mixed_h1 = Dense(hp_units1)(mixed_inputs)
#     mixed_h2 = tf.keras.layers.Flatten()(mixed_h1)
    hp_units3 = hp.Int('units3', min_value=48, max_value=512, step=16)
    mixed_h3 = Dense(hp_units3)(mixed_h1)

    ### Main Output Layer
    static_outputs = (static_inputs)
    mixed_outputs = Dense(len(labels)-4)(mixed_h3)
    main_outputs = tf.keras.layers.concatenate([static_outputs, mixed_outputs])

    ### Model Creation
    model = tf.keras.Model(inputs=[static_inputs, mixed_inputs], outputs=main_outputs)

    ### Compile the model
    hp_lr = hp.Choice('learning_rate', values=[0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001])
    opt = keras.optimizers.Adam(learning_rate=hp_lr) #set=0.005, #default=0.001
    ls = keras.losses.MeanSquaredError()
    model.compile(optimizer=opt, loss=ls, metrics='mape')

    ### Model summary
#     model.summary()
    return model


In [16]:
tuner = kt.Hyperband(model_builder,
                     objective='val_loss',
                     max_epochs=50,
                     factor=3)
earlystop = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                             patience=5)
tuner.search([X_tr_st_scaled, X_tr_scaled], y_tr_scaled, epochs=50, batch_size=512,
                    shuffle=False,
                    validation_data=([X_te_st_scaled, X_te_scaled], y_te_scaled), 
                    callbacks=[earlystop])
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

Trial 90 Complete [00h 00m 32s]
val_loss: 0.13763993978500366

Best val_loss So Far: 0.13472634553909302
Total elapsed time: 00h 29m 08s


In [17]:
print(best_hps.get('units1'))
print(best_hps.get('units3'))
print(best_hps.get('learning_rate'))

448
480
0.0001


End Tuning

In [15]:
    ### Static Data Input layer (only static)
    static_inputs = tf.keras.Input(shape=(len(static_features)))

    ### Mixed Data Input Layer (both static and dynamic)
    mixed_inputs = tf.keras.Input(shape=(len(all_features)))

    ### Mixed Data Hidden layers
    mixed_h1 = Dense(448)(mixed_inputs)
#     mixed_h2 = tf.keras.layers.Flatten()(mixed_h1)
    
    mixed_h3 = Dense(480)(mixed_h1)

    ### Main Output Layer
    static_outputs = (static_inputs)
    mixed_outputs = Dense(len(labels)-4)(mixed_h3)
    main_outputs = tf.keras.layers.concatenate([static_outputs, mixed_outputs])

    ### Model Creation
    model = tf.keras.Model(inputs=[static_inputs, mixed_inputs], outputs=main_outputs)

    ### Compile the model
    opt = keras.optimizers.Adam(learning_rate=0.0001) #set=0.005, #default=0.001
    ls = keras.losses.MeanSquaredError()
    model.compile(optimizer=opt, loss=ls, metrics='mape')

    ### Model summary
    model.summary()


Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 50)]         0                                            
__________________________________________________________________________________________________
dense_3 (Dense)                 (None, 448)          22848       input_4[0][0]                    
__________________________________________________________________________________________________
dense_4 (Dense)                 (None, 480)          215520      dense_3[0][0]                    
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 4)]          0                                            
____________________________________________________________________________________________

In [16]:
X_tr_reshaped = X_tr_scaled#.reshape((X_tr_scaled.shape[0], 
                            #        1, X_tr_scaled.shape[1]))
X_te_reshaped = X_te_scaled#.reshape((X_te_scaled.shape[0], 
                            #        1, X_te_scaled.shape[1]))

#early stopping
#==============
earlystop = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                             mode='min',
                                             min_delta=0,
                                             patience=5,
                                             verbose=1)
#==============

history = model.fit([X_tr_st_scaled, X_tr_reshaped], y_tr_scaled, epochs=1000, batch_size=2048,
                    shuffle=False,
                    validation_data=([X_te_st_scaled, X_te_reshaped], y_te_scaled), 
                    verbose=1,
                    callbacks=[earlystop])


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 00017: early stopping


In [17]:
loss = model.evaluate([X_tr_st_scaled, X_tr_reshaped], y_tr_scaled, verbose=0)
print("Train loss:", loss)
loss = model.evaluate([X_te_st_scaled, X_te_reshaped], y_te_scaled, verbose=0)
print("Test loss:", loss)
# loss = model.evaluate([X_te_st_unpadded_scaled, X_te_unpadded_reshaped], y_te_unpadded_scaled, verbose=0)
# print("Test loss (unpadded):", loss)

Train loss: [0.1399281620979309, 149.74696350097656]
Test loss: [0.13474597036838531, 158.11798095703125]


In [18]:
def get_prediction_mse(model, X, y):
    _X = scaler_X.transform(X)
    _y = scaler_y.transform(y)
    _X_st = _X[:, :4]
#     _X = _X.reshape((_X.shape[0], 1, _X.shape[1]))
    prediction = model.predict([_X_st, _X])
#     prediction = scaler_y.inverse_transform(prediction)
    prediction[:, :4] = _X_st[:, :4]
    error = sk.metrics.mean_squared_error(prediction, _y)
    return error

In [19]:
print(get_prediction_mse(model, X_te_comp, y_te_comp))

0.2066437396008102


In [17]:
# start_ind = 20
# end_ind = 21
# prediction = model.predict([X_te_st_scaled[start_ind:end_ind], X_te_reshaped[start_ind:end_ind]])
# print(prediction.shape)

(1, 46)


In [18]:
# for i, j in list(zip(prediction, y_te[start_ind:end_ind])):
#     # scaler_y.inverse_transform() de-normalize the predicted values by the normalization scaler.
#     zipped = list(zip(scaler_y.inverse_transform([i])[0], j))
#     print("(prediction, actual label)")
#     print()
#     for z in range(len(labels)):
#         print(labels[z], ":: ", zipped[z])

(prediction, actual label)

gender ::  (8.673890050658883e-09, 0.0)
age ::  (28002.069690710494, 28002.0690625)
elixhauser ::  (6.000000112258407, 6.0)
re_admission ::  (-4.1935801775583315e-09, 0.0)
Weight_kg ::  (97.0377384381062, 67.7)
GCS ::  (6.7580315619060585, 15.0)
HR ::  (81.22872439072742, 81.8)
SysBP ::  (111.1549068605593, 121.2)
MeanBP ::  (75.23129144791707, 84.4)
DiaBP ::  (58.62683729080226, 66.0)
RR ::  (19.942423419509367, 17.8)
SpO2 ::  (95.48716691517072, 96.4)
Temp_C ::  (36.453570695345164, 36.4777777777778)
FiO2_1 ::  (0.48600067716983264, 0.4)
Potassium ::  (4.161803526502958, 4.2)
Sodium ::  (138.05271603330792, 137.0)
Chloride ::  (101.6840038116212, 102.0)
Glucose ::  (211.79461542622138, 195.6)
BUN ::  (20.655104033968524, 20.0)
Creatinine ::  (0.9178235622062855, 0.9)
Magnesium ::  (2.5971945898421414, 2.7)
Calcium ::  (8.583900585879597, 8.6)
Ionised_Ca ::  (1.1088589784881608, 1.2284210526315802)
CO2_mEqL ::  (28.942530229131755, 19.0)
SGOT ::  (130.35677

In [20]:
model.save('saved_models/stacked_dense_layers')

INFO:tensorflow:Assets written to: saved_models/stacked_dense_layers\assets


In [21]:
temp_model = tf.keras.models.load_model('saved_models/stacked_dense_layers')

In [22]:
loss = temp_model.evaluate([X_tr_st_scaled, X_tr_reshaped], y_tr_scaled, verbose=0)
print("Train loss:", loss)
loss = temp_model.evaluate([X_te_st_scaled, X_te_reshaped], y_te_scaled, verbose=0)
print("Test loss:", loss)
# loss = model.evaluate([X_te_st_unpadded_scaled, X_te_unpadded_reshaped], y_te_unpadded_scaled, verbose=0)
# print("Test loss (unpadded):", loss)

Train loss: [0.1399281620979309, 149.74696350097656]
Test loss: [0.13474597036838531, 158.11798095703125]


In [23]:
print(get_prediction_mse(temp_model, X_te_comp, y_te_comp))

0.2066437396008102
