In [1]:
# !pip install awswrangler
# !pip install tableone
# !pip install tqdm

In [2]:
#Set environment variables for your notebook
import os 
project_id = 'mimic-369422'
os.environ['GOOGLE_CLOUD_PROJECT'] = project_id

import pydata_google_auth
credentials = pydata_google_auth.get_user_credentials(
    ['https://www.googleapis.com/auth/bigquery'],use_local_webserver=False
)

from google.cloud import bigquery

bigquery_client = bigquery.Client()#Write Query on BQ



In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from tableone import TableOne

In [4]:
def run_query(query):
    return pd.io.gbq.read_gbq(
      query,
      project_id=project_id,
      credentials=credentials,
      configuration={'query': {
          'useLegacySql': False
      }})

In [5]:
creatinine = run_query("""SELECT * FROM `physionet-data.mimiciv_derived.kdigo_creatinine`""")
crrt = run_query("""SELECT * FROM `physionet-data.mimiciv_derived.crrt`""")
uo = run_query("""SELECT * FROM `physionet-data.mimiciv_derived.kdigo_uo`""")
aki = run_query("""SELECT * FROM `physionet-data.mimiciv_derived.kdigo_stages`""")

In [6]:
icustays = run_query("""SELECT * FROM `physionet-data.mimiciv_icu.icustays`""")
demo = run_query("""SELECT * FROM `physionet-data.mimiciv_hosp.patients`""")
admissions = run_query("""SELECT * FROM `physionet-data.mimiciv_hosp.admissions`""")
diag = run_query("""SELECT * FROM `physionet-data.mimiciv_hosp.diagnoses_icd`""")
dnr = run_query("""SELECT hadm_id from `physionet-data.mimiciv_icu.chartevents` where itemid=22378 or itemid=228687""")

In [7]:
aki.shape

(4103559, 13)

In [8]:
aki.nunique()

subject_id               53569
hadm_id                  69639
stay_id                  76943
charttime              1684027
creat_low_past_7day        282
creat_low_past_48hr        291
creat                      321
aki_stage_creat              4
uo_rt_6hr                83659
uo_rt_12hr               77383
uo_rt_24hr               73792
aki_stage_uo                 4
aki_stage                    4
dtype: int64

In [9]:
aki['aki_stage'].value_counts()

0    3251195
2     400254
3     230228
1     221882
Name: aki_stage, dtype: Int64

In [10]:
# Merge on hadm_id and not subject_id to get the exact encounters.
aki = aki.merge(admissions, on=['hadm_id', 'subject_id'])
aki = aki.merge(icustays, on=['hadm_id', 'subject_id', 'stay_id'])
aki = aki.merge(demo)

In [11]:
aki.shape

(4103559, 36)

In [12]:
aki.head()

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,creat_low_past_7day,creat_low_past_48hr,creat,aki_stage_creat,uo_rt_6hr,uo_rt_12hr,...,first_careunit,last_careunit,intime,outtime,los,gender,anchor_age,anchor_year,anchor_year_group,dod
0,17275599,29372610,30633205,2158-11-11 14:23:00,1.3,1.8,2.8,2.0,,,...,Surgical Intensive Care Unit (SICU),Medical Intensive Care Unit (MICU),2158-11-07 07:40:55,2158-11-15 09:38:12,8.081447,M,65,2158,2017 - 2019,2158-11-15
1,17275599,29372610,30633205,2158-11-09 02:14:00,1.3,1.3,1.3,0.0,,,...,Surgical Intensive Care Unit (SICU),Medical Intensive Care Unit (MICU),2158-11-07 07:40:55,2158-11-15 09:38:12,8.081447,M,65,2158,2017 - 2019,2158-11-15
2,17275599,29372610,30633205,2158-11-09 02:00:00,,,,,0.5497,0.5374,...,Surgical Intensive Care Unit (SICU),Medical Intensive Care Unit (MICU),2158-11-07 07:40:55,2158-11-15 09:38:12,8.081447,M,65,2158,2017 - 2019,2158-11-15
3,17275599,29372610,30633205,2158-11-07 13:00:00,,,,,0.7047,0.7047,...,Surgical Intensive Care Unit (SICU),Medical Intensive Care Unit (MICU),2158-11-07 07:40:55,2158-11-15 09:38:12,8.081447,M,65,2158,2017 - 2019,2158-11-15
4,17275599,29372610,30633205,2158-11-15 04:00:00,,,,,3.012,0.6903,...,Surgical Intensive Care Unit (SICU),Medical Intensive Care Unit (MICU),2158-11-07 07:40:55,2158-11-15 09:38:12,8.081447,M,65,2158,2017 - 2019,2158-11-15


## Inclusion / Exclusion Criteria 
1. Patients > 18
2. 1st ICU stay
3. Measurement of creatinine and urine ouput within 24 hours
4. aki stage 2 
5. not a CEER ICD 10-N18
6. No DNR

In [13]:
# Filter aki for stage 2
aki2 = aki[aki['aki_stage'] != 0].reset_index(drop=True)

In [14]:
aki2.shape

(852364, 36)

In [15]:
# Patients > 18
aki_18 = aki2[aki2['anchor_age'] >= 18]

In [16]:
aki_18.shape

(852364, 36)

In [19]:
# Get the first ICU stay.
def get_first_icu_stay(df):
    min_stay = df.groupby(by='subject_id')['intime'].min().reset_index()
    result = df.merge(min_stay)
    return result

In [20]:
aki_first = get_first_icu_stay(aki_18)

In [21]:
aki_first.shape

(615481, 36)

In [24]:
# Find people with chronic kidney diease. 
# Drop all encounters after the diagnosis
chronic_kidney = diag[diag['icd_code'].str.contains('n18', case=False)]
chronic_kidney_time = chronic_kidney.merge(admissions)
chronic_kidney_time = chronic_kidney_time[['subject_id', 'hadm_id', 'admittime']]
chronic_kidney_time.rename({'admittime' : 'ca_admittime'}, axis=1, inplace=True)
chronic_kidney_time.head()

Unnamed: 0,subject_id,hadm_id,ca_admittime
0,10079700,24239751,2115-09-14 22:01:00
1,10108435,24349938,2192-04-12 00:07:00
2,10109956,26022059,2184-05-05 00:00:00
3,10131638,23938729,2165-02-21 23:03:00
4,10155734,20778459,2133-12-23 21:34:00


In [25]:
# Check this.
to_drop = aki_first.merge(chronic_kidney_time, on=['subject_id'])
to_drop = to_drop[to_drop['ca_admittime'] >= to_drop['admittime']]

In [29]:
aki_no_ck = aki_first[~aki_first['hadm_id'].isin(to_drop['hadm_id_x'].values)]
aki_no_ck.shape

(519251, 36)

In [30]:
# DNR/DNI Item ID 22378 and 228687 in chart events.

In [31]:
aki_no_ck.shape

(519251, 36)

In [32]:
aki_no_dnr = aki_no_ck[~aki_no_ck.isin(dnr['hadm_id'].values)]
aki_no_dnr.shape

(519251, 36)

In [33]:
aki_no_dnr.shape

(519251, 36)

In [34]:
creatinine.rename({'charttime' : 'creattime'}, axis=1, inplace=True)

In [39]:
def creat_24(y):
    tmp = creatinine[creatinine['hadm_id'] == y['hadm_id']]
    if tmp.shape[0] == 0:
        return 0
    tmp['48'] = tmp.apply(lambda x: 1 if x['creattime'] >= y['admittime'] and x['creattime'] <= (y['admittime'] + np.timedelta64(2, 'D')) else 0, axis=1)
    if max(tmp['48']) == 1:
        return 1
    else: 
        return 0
    
def uo_24(y):
    tmp = uo[uo['stay_id'] == y['stay_id']]
    if tmp.shape[0] == 0:
        return 0
    
    if max(pd.isnull(tmp['urineoutput_24hr'])) == 1:
        return 0
    else:
        return 1

In [37]:
# 24 hour urine output and model.
aki_no_dnr['creat_24'] = aki_no_dnr.apply(creat_24, axis=1)

KeyError: 'urineoutput_48hr'

In [40]:
aki_no_dnr['urine_24'] = aki_no_dnr.apply(uo_24, axis=1)

In [41]:
aki_no_dnr['creat_24'].value_counts()

1    476673
0     42578
Name: creat_24, dtype: int64

In [42]:
aki_no_dnr['urine_24'].value_counts()

1    518245
0      1006
Name: urine_24, dtype: int64

In [43]:
aki_creat_uo_24 = aki_no_dnr[(aki_no_dnr['urine_24'] == 1) & (aki_no_dnr['creat_24'] == 1)]

In [44]:
aki_creat_uo_24 = aki_creat_uo_24.reset_index(drop=True)
aki_creat_uo_24.shape

(475839, 38)

In [45]:
aki_creat_uo_24.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 475839 entries, 0 to 475838
Data columns (total 38 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   subject_id            475839 non-null  Int64         
 1   hadm_id               475839 non-null  Int64         
 2   stay_id               475839 non-null  Int64         
 3   charttime             475839 non-null  datetime64[ns]
 4   creat_low_past_7day   57525 non-null   float64       
 5   creat_low_past_48hr   57074 non-null   float64       
 6   creat                 57527 non-null   float64       
 7   aki_stage_creat       57527 non-null   Int64         
 8   uo_rt_6hr             421908 non-null  object        
 9   uo_rt_12hr            421908 non-null  object        
 10  uo_rt_24hr            421908 non-null  object        
 11  aki_stage_uo          421908 non-null  Int64         
 12  aki_stage             475839 non-null  Int64         
 13 

In [46]:
aki_creat_uo_24['anchor_age'] = aki_creat_uo_24['anchor_age'].astype(float)

In [47]:
cols = ['gender', 'anchor_age', 'race', 'hospital_expire_flag']
categorical = ['gender', 'race', 'hospital_expire_flag']
tab = TableOne(aki_creat_uo_24, columns=cols, categorical=categorical, groupby='aki_stage', pval=True)

  df['percent'] = df['freq'].div(df.freq.sum(level=0),
  df['percent'] = df['freq'].div(df.freq.sum(level=0),
  df['percent'] = df['freq'].div(df.freq.sum(level=0),
  df['percent'] = df['freq'].div(df.freq.sum(level=0),


In [50]:
tab

Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by aki_stage,Grouped by aki_stage,Grouped by aki_stage,Grouped by aki_stage,Grouped by aki_stage,Grouped by aki_stage
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,1,2,3,P-Value
n,,,475839,125858,236924,113057,
"gender, n (%)",F,0.0,214664 (45.1),53401 (42.4),109325 (46.1),51938 (45.9),<0.001
"gender, n (%)",M,,261175 (54.9),72457 (57.6),127599 (53.9),61119 (54.1),
"anchor_age, mean (SD)",,0.0,65.7 (15.7),65.8 (15.6),66.5 (15.6),63.9 (16.1),<0.001
"race, n (%)",AMERICAN INDIAN/ALASKA NATIVE,0.0,685 (0.1),173 (0.1),436 (0.2),76 (0.1),<0.001
"race, n (%)",ASIAN,,3618 (0.8),1125 (0.9),1678 (0.7),815 (0.7),
"race, n (%)",ASIAN - ASIAN INDIAN,,1267 (0.3),204 (0.2),393 (0.2),670 (0.6),
"race, n (%)",ASIAN - CHINESE,,2013 (0.4),773 (0.6),899 (0.4),341 (0.3),
"race, n (%)",ASIAN - KOREAN,,263 (0.1),90 (0.1),116 (0.0),57 (0.1),
"race, n (%)",ASIAN - SOUTH EAST ASIAN,,600 (0.1),252 (0.2),251 (0.1),97 (0.1),


In [49]:
aki_creat_uo_24.to_csv('initial_cohort_no_covariates.csv', index=False)