In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import pandas as pd
from utils.plotting import _plot_element
from database.MIMIC_cache_exp import MIMIC_cache_discretized_joint_exp_independent_measurement

In [2]:
def plot(plt_func, *args, **kwargs):
    plt.figure(figsize=[12,8])
    plt_func(*args)
    plt.grid(True)
    _plot_element(**kwargs)
    plt.show()

In [3]:
features = ["Albumin", "Bicarbonate", "Bilirubin", "Blood urea nitrogen", "CO2", "Calcium", 
            "Calcium ionized", "Capillary refill rate", "Chloride", "Cholesterol", "Creatinine",
            "Diastolic blood pressure", "Fraction inspired oxygen", "Glascow coma scale total",
            "Glucose", "Heart Rate", "Hemoglobin", "Lactate", "Magnesium", "Mean blood pressure",
            "Oxygen saturation", "Partial pressure of carbon dioxide", "Partial pressure of oxygen",
            "Partial thromboplastin time", "Platelets", "Potassium", "Prothrombin time", "Respiratory rate",
            "Systolic blood pressure", "Temperature", "Urine output", "Weight", "White blood cell count",
            "pH", "noise_0", "noise_1", "noise_2", "noise_3", "noise_4", "noise_5"]

In [25]:
agent_action_freq = [0.069205, 0.329928, 0.177276, 0.0, 0.166872, 0.058717, 0.086532,
                     0.048083, 0.006214, 0.018036, 0.000021, 0.054839, 0.092412, 0.075356,
                     0.100794, 0.996956, 0.139995, 0.009737, 0.002043, 0.158344, 0.000375, 
                     0.175087, 0.044371, 0.126963, 0.0, 0.286495, 0.0, 0.99756, 0.815238, 0.019225, 
                     0.066223, 0.181468, 0.0, 0.0, 0.002606, 0.004608, 0.00196, 0.0, 0.255593, 0.057299]

In [26]:
df = pd.DataFrame({'feature': features, 'agent_action_freq': agent_action_freq})

In [6]:
cashe_dir = '../RL_exp_cache/1024-15mins-24hrs-joint-indep-measurement-rnn-all-pataient/'

In [7]:
config = tf.ConfigProto()
config.gpu_options.allow_growth = True


with tf.Session(config=config) as sess:
        mimic_exp = MIMIC_cache_discretized_joint_exp_independent_measurement(cache_dir=cashe_dir)

        test_loader = mimic_exp.gen_test_experience(sess, batch_size=1000000, shuffle=False)
        for idx, the_dict in enumerate(test_loader):
            data_dict = the_dict
            break


In [8]:
data_dict.keys()

dict_keys(['cur_state', 'next_state', 'gain_per_action', 'prob_gain_per_action', 'std_gain_per_action', 'cur_action', 'next_action', 'labels', 'patient_inds', 'the_steps', 'total_steps', 'gain_joint', 'prob_joint', 'std_joint', 'prob_null', 'std_null', 'prev_prob_joint', 'prev_std_joint'])

In [27]:
# Calculate phy action freq
print(data_dict['cur_action'].shape)
df['phy_action_freq'] = np.mean(data_dict['cur_action'], axis=0)

(558038, 40)


In [22]:
len(np.mean(data_dict['cur_action'], axis=0))

40

In [28]:
df = df.sort_values(['phy_action_freq'], ascending=False)
df['phy_action_rank'] = np.arange(len(features))
df = df.sort_values(['agent_action_freq'], ascending=False)
df['agent_action_rank'] = np.arange(len(features))

In [29]:
df

Unnamed: 0,feature,agent_action_freq,phy_action_freq,phy_action_rank,agent_action_rank
27,Respiratory rate,0.99756,0.220523,1,0
15,Heart Rate,0.996956,0.224076,0,1
28,Systolic blood pressure,0.815238,0.212301,2,2
1,Bicarbonate,0.329928,0.012101,23,3
25,Potassium,0.286495,0.01487,20,4
38,noise_4,0.255593,0.050414,10,5
31,Weight,0.181468,0.005677,31,6
2,Bilirubin,0.177276,0.002953,35,7
21,Partial pressure of carbon dioxide,0.175087,0.005817,30,8
4,CO2,0.166872,0.005862,29,9


In [13]:
exp_df = pd.DataFrame({k: data_dict[k] for k in ['patient_inds', 'labels', 'the_steps', 'total_steps']})
patient_final_status  = exp_df.groupby('patient_inds')['labels'].apply(lambda x: max(x))

live_patient_inds = patient_final_status[patient_final_status != 1]
dead_patient_inds = patient_final_status[patient_final_status == 1]

exp_df_live = exp_df.join(live_patient_inds, on='patient_inds', how='right', rsuffix='_final')
exp_df_dead = exp_df.join(dead_patient_inds, on='patient_inds', how='right', rsuffix='_final')

phy_action_freq_live = np.mean(data_dict['cur_action'][exp_df_live.index], axis=0)
phy_action_freq_dead = np.mean(data_dict['cur_action'][exp_df_dead.index], axis=0)


In [14]:
print('number of patient lived')
len(live_patient_inds)

number of patient lived


5554

In [15]:
print('number of patient died')
sum(dead_patient_inds)

number of patient died


522

In [16]:
action_freq_live_and_dead = pd.DataFrame({'phy_action_freq_live': phy_action_freq_live,
                                         'phy_action_freq_dead': phy_action_freq_dead,
                                         'feature_name': features})
action_freq_live_and_dead['precentage_increase'] = (action_freq_live_and_dead['phy_action_freq_dead'] - action_freq_live_and_dead['phy_action_freq_live'])/ action_freq_live_and_dead['phy_action_freq_live']

In [17]:
action_freq_live_and_dead = action_freq_live_and_dead.sort_values(['phy_action_freq_live'], ascending=False)
action_freq_live_and_dead['phy_action_rank_live'] = np.arange(len(features))

action_freq_live_and_dead = action_freq_live_and_dead.sort_values(['phy_action_freq_dead'], ascending=False)
action_freq_live_and_dead['phy_action_rank_dead'] = np.arange(len(features))



In [18]:
action_freq_live_and_dead

Unnamed: 0,phy_action_freq_live,phy_action_freq_dead,feature_name,precentage_increase,phy_action_rank_live,phy_action_rank_dead
15,0.221221,0.254447,Heart Rate,0.150193,0,0
27,0.217662,0.250944,Respiratory rate,0.152903,1,1
28,0.20974,0.239538,Systolic blood pressure,0.142071,2,2
11,0.209579,0.2391,Diastolic blood pressure,0.140857,3,3
19,0.207264,0.236973,Mean blood pressure,0.14334,5,4
20,0.207776,0.216164,Oxygen saturation,0.040371,4,5
30,0.116166,0.119331,Urine output,0.027243,6,6
29,0.060877,0.068016,Temperature,0.117279,7,7
35,0.049351,0.062282,noise_1,0.262024,10,8
36,0.049188,0.062053,noise_2,0.261536,12,9


In [39]:
less_than = exp_df_dead.loc[exp_df_dead['the_steps'] < exp_df_dead['total_steps'] / 2]
bigger = exp_df_dead.loc[(exp_df_dead['the_steps'] > exp_df_dead['total_steps'] / 2)]# & (exp_df_dead['the_steps'] < exp_df_dead['total_steps'] * 3 / 5)]

phy_action_freq_less = np.mean(data_dict['cur_action'][less_than.index], axis=0)
phy_action_freq_bigger = np.mean(data_dict['cur_action'][bigger.index], axis=0)

In [40]:
action_freq_less_and_bigger = pd.DataFrame({'phy_action_freq_less': phy_action_freq_less,
                                         'phy_action_freq_bigger': phy_action_freq_bigger,
                                         'feature_name': features})
action_freq_less_and_bigger['precentage_increase'] = (action_freq_less_and_bigger['phy_action_freq_bigger'] - action_freq_less_and_bigger['phy_action_freq_less'])/ action_freq_less_and_bigger['phy_action_freq_less']

action_freq_less_and_bigger = action_freq_less_and_bigger.sort_values(['phy_action_freq_less'], ascending=False)
action_freq_less_and_bigger['phy_action_rank_less'] = np.arange(len(features))

action_freq_less_and_bigger = action_freq_less_and_bigger.sort_values(['phy_action_freq_bigger'], ascending=False)
action_freq_less_and_bigger['phy_action_rank_bigger'] = np.arange(len(features))



In [41]:
action_freq_less_and_bigger

Unnamed: 0,phy_action_freq_less,phy_action_freq_bigger,feature_name,precentage_increase,phy_action_rank_less,phy_action_rank_bigger
15,0.260562,0.248339,Heart Rate,-0.046909,0,0
27,0.258062,0.243867,Respiratory rate,-0.055005,1,1
28,0.251729,0.226959,Systolic blood pressure,-0.098399,2,2
11,0.251479,0.22632,Diastolic blood pressure,-0.100043,3,3
19,0.249313,0.224233,Mean blood pressure,-0.100593,4,4
20,0.238563,0.192845,Oxygen saturation,-0.191641,5,5
30,0.138072,0.100213,Urine output,-0.274197,6,6
35,0.062745,0.061712,noise_1,-0.016458,11,7
36,0.06387,0.060094,noise_2,-0.05912,10,8
39,0.061495,0.059881,noise_5,-0.026248,12,9
