In [1]:
import numpy as np
import pandas as pd
import psycopg2

import matplotlib.pyplot as plt
import seaborn as sns

import matplotlib.style
matplotlib.style.use('ggplot')
%matplotlib inline

In [2]:
# create a database connection
sqluser = 'mimic'
dbname = 'mimic'
schema_name = 'mimiciii'

cur = None

In [3]:
if cur: 
    cur.close()
    con.close()

con = psycopg2.connect(dbname = dbname, user = sqluser)
cur = con.cursor()
cur.execute('SET search_path to ' + schema_name)

In [4]:
query = \
"""
select * from icu_features;
"""
res = pd.read_sql_query(query, con)
res.head()

Unnamed: 0,icustay_id,hadm_id,subject_id,age,gender,height,weight,bmi,ethnicity,insurance,...,ea_tv_pulm_htn,ea_tv_tr,ea_lv_cavity,ea_lv_diastolic,ea_lv_systolic,ea_lv_wall,ea_rv_cavity,ea_rv_diastolic_fluid,ea_rv_systolic,ea_rv_wall
0,287547,189332,109,9146 days 19:49:27,F,152.4,19.368394,8.339186,BLACK/AFRICAN AMERICAN,Medicaid,...,2.0,0.0,0.0,,0.0,3.0,0.0,,0.0,1.0
1,254245,192123,111,24368 days 06:56:33,F,,57.560001,,WHITE,Medicare,...,,,,,1.0,,0.0,,0.0,
2,224803,130684,225,11035 days 00:58:13,M,,69.800003,,WHITE,Medicaid,...,,,0.0,,0.0,0.0,,,,
3,215044,116935,249,27285 days 20:06:02,F,,72.0,,WHITE,Medicare,...,1.0,1.0,0.0,,1.0,,,,,
4,257572,111199,281,21906 days 04:45:22,F,,83.650002,,BLACK/AFRICAN AMERICAN,Government,...,2.0,0.0,0.0,,0.0,0.0,0.0,,0.0,


Check that icustay_id is unique. 

In [5]:
len(res['icustay_id'].unique()) == res.shape[0]

True

+ had sepsis

In [6]:
res['filter_angus_sepsis'].value_counts()

0    44112
1    17420
Name: filter_angus_sepsis, dtype: int64

+ on vasopressors

In [7]:
res['filter_vaso'].value_counts()

False    47716
True     13816
Name: filter_vaso, dtype: int64

+ first care unit or last careunit was micu

In [12]:
res['ic_first_careunit'].value_counts()

MICU     21088
CSRU      9312
SICU      8891
NICU      8100
CCU       7726
TSICU     6415
Name: ic_first_careunit, dtype: int64

In [13]:
res['ic_last_careunit'].value_counts()

MICU     21310
CSRU      9392
SICU      9213
NICU      8100
CCU       7406
TSICU     6111
Name: ic_last_careunit, dtype: int64

+ first or last care unit = MICU, SICU, NICU

In [63]:
pd.Series((res['ic_first_careunit'].apply(lambda u: u in ['MICU', 'SICU', 'TSICU']) | 
 res['ic_last_careunit'].apply(lambda u: u in ['MICU', 'SICU', 'TSICU']))).value_counts()

True     37210
False    24322
dtype: int64

+ service type

In [64]:
res.loc[res.st_micu == 1].shape

(18855, 113)

In [65]:
res.loc[res.st_nsicu == 1].shape

(4626, 113)

In [66]:
res.loc[res.st_sicu == 1].shape

(9755, 113)

In [67]:
res.loc[(res.st_micu == 1) | (res.st_nsicu == 1) | (res.st_sicu == 1)].shape

(26773, 113)

+ service type = {micu, nsicu, sicu} or first care unit = {micu, nsicu, sicu} or last care unit = {micu, nsicu, sicu}

In [74]:
filter_careunit = (res['ic_first_careunit'].apply(lambda u: u in ['MICU', 'SICU', 'TSICU']) | 
    res['ic_last_careunit'].apply(lambda u: u in ['MICU', 'SICU', 'TSICU']) |
    (res.st_micu == 1) | (res.st_nsicu == 1) | (res.st_sicu == 1))
pd.Series(filter_careunit).value_counts()

True     38421
False    23111
dtype: int64

+ had sepsis
+ (and/or) was on vasopressors

In [75]:
res.loc[res['filter_vaso'] & res['filter_angus_sepsis']].shape[0]

6182

In [76]:
res.loc[res['filter_vaso'] | res['filter_angus_sepsis']].shape[0]

25054

+ had sepsis or was on vasopressors
+ service type = {micu, nsicu, sicu} or first care unit = {micu, nsicu, sicu} or last care unit = {micu, nsicu, sicu}

In [77]:
res.loc[(res['filter_vaso'] | res['filter_angus_sepsis']) & filter_careunit].shape[0]

16885

+ had sepsis or was on vasopressors
+ service type = {micu, nsicu, sicu} or first care unit = {micu, nsicu, sicu} or last care unit = {micu, nsicu, sicu}
+ no cardiogenic shock

In [78]:
res.loc[(res['filter_vaso'] | res['filter_angus_sepsis']) 
        & filter_careunit
        & ~res['filter_hard_cardiogenic']
].shape[0]

15801

+ had sepsis or was on vasopressors
+ service type = {micu, nsicu, sicu} or first care unit = {micu, nsicu, sicu} or last care unit = {micu, nsicu, sicu}
+ no cardiogenic shock
+ not on chronic dialysis
+ was adult

In [79]:
res_ = res.loc[(res['filter_vaso'] | res['filter_angus_sepsis']) 
        & filter_careunit
        & ~res['filter_hard_cardiogenic']
        & ~res['filter_chronic_dialysis']
        & res['filter_adult']
]
res_.shape[0]

14392

Number of unique subject ids

In [80]:
len(res_['subject_id'].unique())

11024

In [81]:
final = res_.sort_values('intime', ascending = False).groupby(
    ['subject_id', 'filter_echo']).head(n = 1).sort_values(
    'filter_echo', ascending = False).groupby('subject_id').head(n = 1)
final.shape

(11024, 113)

Look at fluids

In [82]:
final[[c for c in final.columns if 'fb' in c]].describe()

Unnamed: 0,fb_day1_input_ml,fb_day1_output_ml,fb_day1_balance_ml,fb_day2_input_ml,fb_day2_output_ml,fb_day2_balance_ml,fb_day3_input_ml,fb_day3_output_ml,fb_day3_balance_ml
count,10366.0,10366.0,10366.0,8799.0,8799.0,8799.0,6846.0,6846.0,6846.0
mean,3310.400302,2398.819786,911.580516,1865.667959,2061.576083,757.478673,1430.068455,2219.933216,114.499588
std,3109.050763,2881.052934,3888.250736,1802.702934,2594.204631,6025.865315,1432.876042,3019.216099,8654.881796
min,1.1,0.0,-122515.833344,0.333332,0.0,-235268.933342,2.733334,0.0,-334065.792215
25%,1273.262495,1080.25,-886.338403,561.606352,990.0,-1827.727243,374.39867,1067.25,-3113.125078
50%,2480.0,1850.0,452.345826,1425.0,1685.0,303.0,1018.242681,1840.0,-304.538929
75%,4368.192828,2972.75,2272.975001,2545.89046,2665.0,2953.0,2010.187511,2900.0,3163.5
max,38939.0,123200.0,35357.0,22113.447687,130775.0,39227.0,15360.5,161612.0,45132.314719
