In [2]:
import pandas as pd
import numpy as np

In [3]:
df_icd = pd.read_pickle('mimic_iv_extract/df_icd_codes_with_description.pkl')

# Construct a Dataframe of Number of Patients and Hospitalisations with each CKD ICD-9 code

In [4]:
# subset the CKD patients using ICD-9 codes (585)
df_icd_585 = df_icd[df_icd['icd_code'].str.startswith('585')]

# number of patients with each CKD ICD-9 code
df_icd_585_patients = df_icd_585.groupby('icd_code').subject_id.nunique().to_frame().reset_index()

# number of hospital entries with each CKD ICD-9 code
df_icd_585_hosp = df_icd_585.groupby('icd_code').hadm_id.nunique().to_frame().reset_index()

# merge the two tables
df_icd_585_patients_hosp = pd.merge(df_icd_585_patients, df_icd_585_hosp, on='icd_code', how='outer')

# rename columns
df_icd_585_patients_hosp.columns = ['icd_code', 'No. of patients', 'No. of hospital entries']

# add description
df_icd_585_patients_hosp = pd.merge(df_icd_585_patients_hosp, df_icd_585[['icd_code', 'long_title']].drop_duplicates(), on='icd_code', how='left')

df_icd_585_patients_hosp

Unnamed: 0,icd_code,No. of patients,No. of hospital entries,long_title
0,5851,89,98,"Chronic kidney disease, Stage I"
1,5852,827,1105,"Chronic kidney disease, Stage II (mild)"
2,5853,4312,6996,"Chronic kidney disease, Stage III (moderate)"
3,5854,1587,2836,"Chronic kidney disease, Stage IV (severe)"
4,5855,475,682,"Chronic kidney disease, Stage V"
5,5856,2441,8882,End stage renal disease
6,5859,9032,16983,"Chronic kidney disease, unspecified"


In [5]:
# save the table
df_icd_585_patients_hosp.to_excel('mimic_iv_extract/ckd_stage_frequency.xlsx', index=False)