In [1]:
# !pip install awswrangler
# !pip install tableone
# !pip install tqdm

In [2]:
import awswrangler as wr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from tableone import TableOne

In [3]:
creatinine = wr.athena.read_sql_query("SELECT * from kdigo_creatinine", database="mimiciv")
crrt = wr.athena.read_sql_query("SELECT * from crrt", database="mimiciv")
uo = wr.athena.read_sql_query("SELECT * from urine_output", database="mimiciv")
aki = wr.athena.read_sql_query("SELECT * from kdigo_aki", database="mimiciv")
icustays = wr.athena.read_sql_query("SELECT * from icustays", database="mimiciv")
demo = wr.athena.read_sql_query("SELECT * from patients", database="mimiciv")
admissions = wr.athena.read_sql_query("SELECT * from admissions", database="mimiciv")
diag = wr.athena.read_sql_query("SELECT * from diagnoses_icd", database="mimiciv")
dnr = wr.athena.read_sql_query("SELECT hadm_id from chartevents where itemid=22378 or itemid=228687", database="mimiciv")
kdigo_creat = wr.athena.read_sql_query("SELECT * from kdigo_creatinine", database="mimiciv")
kdigo_uo = wr.athena.read_sql_query("SELECT * from kdigo_uo", database="mimiciv")

In [4]:
aki.shape

(4103559, 13)

In [5]:
aki.nunique()

subject_id               53569
hadm_id                  69639
stay_id                  76943
charttime              1684027
creat_low_past_7day        280
creat_low_past_48hr        290
creat                      317
aki_stage_creat              4
uo_rt_6hr               174772
uo_rt_12hr              143392
uo_rt_24hr              134629
aki_stage_uo                 1
aki_stage                    4
dtype: int64

In [6]:
aki['aki_stage'].value_counts()

0    3977653
1      76799
3      24624
2      24483
Name: aki_stage, dtype: Int64

In [7]:
# Merge on hadm_id and not subject_id to get the exact encounters.
aki = aki.merge(admissions, on=['hadm_id', 'subject_id'])
aki = aki.merge(demo)

In [8]:
aki.shape

(4103559, 31)

## Inclusion / Exclusion Criteria 
1. Patients > 18
2. 1st ICU stay
3. Measurement of creatinine and urine ouput within 24 hours
4. aki stage 2 
5. not a CEER ICD 10-N18
6. No DNR

In [9]:
# Filter aki for stage 2
aki2 = aki[aki['aki_stage'] != 0].reset_index(drop=True)

In [10]:
aki2.shape

(125906, 31)

In [11]:
# Patients > 18
aki_18 = aki2[aki2['anchor_age'] >= 18]

In [12]:
aki_18.shape

(125906, 31)

In [13]:
# Get the first ICU stay.
def get_first_icu_stay(df):
    result = pd.DataFrame()
    for pat in tqdm(df['subject_id'].unique()):
        tmp = df[df['subject_id'] == pat].reset_index(drop=True)
        min_stay = tmp[tmp['admittime'] == min(tmp['admittime'])]
        result = result.append(min_stay)
        
    return result

In [14]:
aki_first = get_first_icu_stay(aki_18)

100%|██████████| 17667/17667 [12:13<00:00, 24.10it/s]


In [15]:
aki_first.shape

(104358, 31)

In [16]:
# Find people with chronic kidney diease. 
# Drop all encounters after the diagnosis
chronic_kidney = diag[diag['icd_code'].str.contains('n18', case=False)]
chronic_kidney_time = chronic_kidney.merge(admissions)
chronic_kidney_time = chronic_kidney_time[['subject_id', 'hadm_id', 'admittime']]
chronic_kidney_time.rename({'admittime' : 'ca_admittime'}, axis=1, inplace=True)
chronic_kidney_time.head()

Unnamed: 0,subject_id,hadm_id,ca_admittime
0,10003400,23559586,2137-08-04 00:07:00
1,10003637,28484061,2149-05-13 15:41:00
2,10070201,24609173,2140-08-15 18:08:00
3,10070539,21243910,2179-12-04 18:12:00
4,10070594,20956461,2174-02-03 21:09:00


In [17]:
# Check this.
to_drop = aki_first.merge(chronic_kidney_time, on=['subject_id'])
to_drop = to_drop[to_drop['ca_admittime'] >= to_drop['admittime']]

In [18]:
to_drop

Unnamed: 0,subject_id,hadm_id_x,stay_id,charttime,creat_low_past_7day,creat_low_past_48hr,creat,aki_stage_creat,uo_rt_6hr,uo_rt_12hr,...,edregtime,edouttime,hospital_expire_flag,gender,anchor_age,anchor_year,anchor_year_group,dod,hadm_id_y,ca_admittime
0,13774759,24895324,32355634,2207-08-21 18:30:00,0.9,0.9,1.4,1,,,...,2207-08-12 15:55:00,2207-08-12 20:32:00,0,F,49,2202,2011 - 2013,,20132626,2207-12-02 00:00:00
1,13774759,24895324,32355634,2207-08-21 18:30:00,0.9,0.9,1.4,1,,,...,2207-08-12 15:55:00,2207-08-12 20:32:00,0,F,49,2202,2011 - 2013,,20611153,2208-06-30 11:49:00
2,13774759,24895324,32355634,2207-08-21 18:30:00,0.9,0.9,1.4,1,,,...,2207-08-12 15:55:00,2207-08-12 20:32:00,0,F,49,2202,2011 - 2013,,23494931,2209-09-01 16:27:00
3,13774759,24895324,32355634,2207-08-21 18:30:00,0.9,0.9,1.4,1,,,...,2207-08-12 15:55:00,2207-08-12 20:32:00,0,F,49,2202,2011 - 2013,,24895324,2207-08-12 19:09:00
4,13774759,24895324,32355634,2207-08-21 18:30:00,0.9,0.9,1.4,1,,,...,2207-08-12 15:55:00,2207-08-12 20:32:00,0,F,49,2202,2011 - 2013,,25710791,2207-09-15 02:25:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69159,17306044,26040460,38290791,2152-06-11 02:03:00,2.5,2.5,2.9,1,,,...,2152-06-09 19:17:00,2152-06-09 21:42:00,0,M,78,2152,2014 - 2016,2154-04-19,21367111,2154-02-04 19:08:00
69160,17306044,26040460,38290791,2152-06-11 02:03:00,2.5,2.5,2.9,1,,,...,2152-06-09 19:17:00,2152-06-09 21:42:00,0,M,78,2152,2014 - 2016,2154-04-19,26040460,2152-06-09 20:08:00
69161,17306044,26040460,38290791,2152-06-11 14:02:00,2.5,2.5,3.0,1,,,...,2152-06-09 19:17:00,2152-06-09 21:42:00,0,M,78,2152,2014 - 2016,2154-04-19,20392805,2152-10-18 07:15:00
69162,17306044,26040460,38290791,2152-06-11 14:02:00,2.5,2.5,3.0,1,,,...,2152-06-09 19:17:00,2152-06-09 21:42:00,0,M,78,2152,2014 - 2016,2154-04-19,21367111,2154-02-04 19:08:00


In [19]:
aki_no_ck = aki_first[~aki_first['hadm_id'].isin(to_drop['hadm_id_x'].values)]
aki_no_ck.shape

(80767, 31)

In [20]:
# DNR/DNI Item ID 22378 and 228687 in chart events.

In [21]:
aki_no_ck.shape

(80767, 31)

In [22]:
aki_no_dnr = aki_no_ck[~aki_no_ck.isin(dnr['hadm_id'].values)]
aki_no_dnr.shape

(80767, 31)

In [23]:
aki_no_dnr.shape

(80767, 31)

In [24]:
kdigo_creat.rename({'charttime' : 'creattime'}, axis=1, inplace=True)

In [25]:
def creat_24(y):
    tmp = kdigo_creat[kdigo_creat['hadm_id'] == y['hadm_id']]
    if tmp.shape[0] == 0:
        return 0
    tmp['24'] = tmp.apply(lambda x: 1 if x['creattime'] >= y['admittime'] and x['creattime'] <= (y['admittime'] + np.timedelta64(1, 'D')) else 0, axis=1)
    if max(tmp['24']) == 1:
        return 1
    else: 
        return 0
    
def uo_24(y):
    tmp = kdigo_uo[kdigo_uo['stay_id'] == y['stay_id']]
    if tmp.shape[0] == 0:
        return 0
    
    if max(pd.isnull(tmp['urineoutput_24hr'])) == 1:
        return 0
    else:
        return 1

In [None]:
# 24 hour urine output and model.
aki_no_dnr['creat_24'] = aki_no_dnr.apply(creat_24, axis=1)
aki_no_dnr['urine_24'] = aki_no_dnr.apply(uo_24, axis=1)

In [None]:
aki_no_dnr['creat_24'].value_counts()

In [None]:
aki_no_dnr['urine_24'].value_counts()

In [None]:
aki_creat_uo_24 = aki_no_dnr[(aki_no_dnr['urine_24'] == 1) & (aki_no_dnr['creat_24'] == 1)]

In [None]:
aki_creat_uo_24 = aki_creat_uo_24.reset_index(drop=True)
aki_creat_uo_24.shape

In [None]:
cols = ['gender', 'anchor_age', 'race', 'hospital_expire_flag']
categorical = ['gender', 'race', 'hospital_expire_flag']
tab = TableOne(aki_creat_uo_24, columns=cols, categorical=categorical, groupby='aki_stage', pval=True)