In [1]:
import pandas as pd
import numpy as np

# Import MIMIC-IV Data Extracts

In [2]:
df_patients = pd.read_pickle('df_all_patients.pkl')
df_icd = pd.read_pickle('df_icd_codes_with_description.pkl')
df_lab_events = pd.read_pickle('df_lab_events.pkl')
df_ckd_lab_items = pd.read_pickle('df_ckd_lab_items.pkl')

# Retrieve Number of Unique Patients and Hospitalisations

In [3]:
# no of patients and hospital admissions related to icd9 data in MIMIC-IV
no_of_unique_patients = df_icd.subject_id.nunique()
no_of_unique_hospitalisations = df_icd.hadm_id.nunique()
print(f"There are {no_of_unique_patients:,} unique patients and {no_of_unique_hospitalisations:,} unique hospitalisations in MIMIC-IV related to ICD-9 data.")


There are 180,640 unique patients and 430,852 unique hospitalisations in MIMIC-IV related to ICD-9 data.


# Retrieve Number of CKD Patients and Hospitalisations

In [4]:
# CKD patients in MIMIC-IV have ICD-9 codes 585.1, 585.2, 585.3, 585.4, 585.5, 585.6, 585.9
# subset the patients with icd codes starting with 585
df_icd_585 = df_icd[df_icd['icd_code'].str.startswith('585')]

# no of CKD patients and hospital admissions related to icd9 data in MIMIC-IV
no_of_unique_CKD_patients = df_icd_585.subject_id.nunique()
no_of_unique_CKD_hospitalisations = df_icd_585.hadm_id.nunique()
print(f"There are {no_of_unique_CKD_patients:,} unique CKD patients and {no_of_unique_CKD_hospitalisations:,} unique hospitalisations in MIMIC-IV, \
as classified by the doctor using ICD-9 codes.")


There are 14,012 unique CKD patients and 37,423 unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.


# Retrieve Number of Diabetes Patients and Hospitalisations

In [5]:
# Diabetes patients in MIMIC-IV have ICD-9 codes 250.0, 250.1, 250.2, 250.3, 250.4, 250.5, 250.6, 250.7, 250.8, 250.9
# subset the patients with icd codes starting with 250
df_icd_250 = df_icd[df_icd['icd_code'].str.startswith('250')]

# no of diabetes patients and hospital admissions related to icd9 data in MIMIC-IV
no_of_unique_diab_patients = df_icd_250.subject_id.nunique()
no_of_unique_diab_hospitalisations = df_icd_250.hadm_id.nunique()
print(f"There are {no_of_unique_diab_patients:,} unique diabetes patients and {no_of_unique_diab_hospitalisations:,} unique hospitalisations in MIMIC-IV, \
as classified by the doctor using ICD-9 codes.")


There are 24,378 unique diabetes patients and 64,144 unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.


# Retrieve Number of Hypertension Patients and Hospitalisations

In [6]:
# Hyertension patients in MIMIC-IV have ICD-9 codes 401-405
# subset the patients with icd codes starting with 401-405
df_icd_401_405 = df_icd[df_icd['icd_code'].str.startswith(
    ('401', '402', '403', '404', '405'))]

# no of hypertension patients and hospital admissions related to icd9 data in MIMIC-IV
no_of_unique_hypertension_patients = df_icd_401_405.subject_id.nunique()
no_of_unique_hypertension_hospitalisations = df_icd_401_405.hadm_id.nunique()
print(f"There are {no_of_unique_hypertension_patients:,} unique hypertension patients and {no_of_unique_hypertension_hospitalisations:,} unique \
hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.")


There are 58,953 unique hypertension patients and 134,505 unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.


# Retrieve Number of Heart Failure Patients and Hospitalisations

In [7]:
# Heart Failure patients in MIMIC-IV have ICD-9 codes 428.x
# subset the patients with icd codes starting with 428
df_icd_428 = df_icd[df_icd['icd_code'].str.startswith('428')]

# no of heart failure patients and hospital admissions related to icd9 data in MIMIC-IV
no_of_unique_heart_failure_patients = df_icd_428.subject_id.nunique()
no_of_unique_heart_failure_hospitalisations = df_icd_428.hadm_id.nunique()
print(f"There are {no_of_unique_heart_failure_patients:,} unique heart failure patients and {no_of_unique_heart_failure_hospitalisations:,} \
unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.")


There are 14,794 unique heart failure patients and 35,560 unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.


# Retrieve Number of Ischemic Heart Disease Patients and Hospitalisations

In [8]:
# Ischemic Heart Disease patients in MIMIC-IV have ICD-9 codes 410-414
# subset the patients with icd codes starting with 410-414
df_icd_410_414 = df_icd[df_icd['icd_code'].str.startswith(
    ('410', '411', '412', '413', '414'))]

# no of ischemic heart disease patients and hospital admissions related to icd9 data in MIMIC-IV
no_of_unique_ischemic_heart_disease_patients = df_icd_410_414.subject_id.nunique()
no_of_unique_ischemic_heart_disease_hospitalisations = df_icd_410_414.hadm_id.nunique()
print(f"There are {no_of_unique_ischemic_heart_disease_patients:,} unique ischemic heart disease patients with \
{no_of_unique_ischemic_heart_disease_hospitalisations:,} unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.")


There are 23,468 unique ischemic heart disease patients with 51,705 unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.


# Retrieve Number of Cererbrovascular Disease Patients and Hospitalisations

In [9]:
# Cererbrovascular Disease patients in MIMIC-IV have ICD-9 codes 430-438
# subset the patients with icd codes starting with 430-438
df_icd_430_438 = df_icd[df_icd['icd_code'].str.startswith(
    ('430', '431', '432', '433', '434', '435', '436', '437', '438'))]

# no of cererbrovascular disease patients and hospital admissions related to icd9 data in MIMIC-IV
no_of_unique_cererbrovascular_disease_patients = df_icd_430_438.subject_id.nunique()
no_of_unique_cererbrovascular_disease_hospitalisations = df_icd_430_438.hadm_id.nunique()
print(f"There are {no_of_unique_cererbrovascular_disease_patients:,} unique cererbrovascular disease patients with \
{no_of_unique_cererbrovascular_disease_hospitalisations:,} unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.")


There are 11,302 unique cererbrovascular disease patients with 16,732 unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.


# Retrieve Number of PVD Disease Patients and Hospitalisations

In [10]:
# PVD patients in MIMIC-IV have ICD-9 codes 440, 441, 443
# subset the patients with icd codes starting with 440, 441, 443
df_icd_440_441_443 = df_icd[df_icd['icd_code'].str.startswith(
    ('440', '441', '443'))]

# no of PVD patients and hospital admissions related to icd9 data in MIMIC-IV
no_of_unique_PVD_patients = df_icd_440_441_443.subject_id.nunique()
no_of_unique_PVD_hospitalisations = df_icd_440_441_443.hadm_id.nunique()
print(f"There are {no_of_unique_PVD_patients:,} unique PVD patients and {no_of_unique_PVD_hospitalisations:,} unique hospitalisations in MIMIC-IV, \
as classified by the doctor using ICD-9 codes.")


There are 8,498 unique PVD patients and 18,016 unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.


# Retrieve Number of Cardiac Disarrhythmias Disease Patients and Hospitalisations

In [11]:
# cardiac disarrhythmias patients in MIMIC-IV have ICD-9 codes 427
# subset the patients with icd codes starting with 427
df_icd_427 = df_icd[df_icd['icd_code'].str.startswith('427')]

# no of cardiac disarrhythmias patients and hospital admissions related to icd9 data in MIMIC-IV
no_of_unique_cardiac_disarrhythmias_patients = df_icd_427.subject_id.nunique()
no_of_unique_cardiac_disarrhythmias_hospitalisations = df_icd_427.hadm_id.nunique()
print(f"There are {no_of_unique_cardiac_disarrhythmias_patients:,} unique cardiac disarrhythmias patients with \
{no_of_unique_cardiac_disarrhythmias_hospitalisations:,} unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.")


There are 24,201 unique cardiac disarrhythmias patients with 46,413 unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.


# Retrieve Number of Anemia Disease Patients and Hospitalisations

In [12]:
# Anemia patients in MIMIC-IV have ICD-9 codes 280-285
# subset the patients with icd codes starting with 280-285
df_icd_280_285 = df_icd[df_icd['icd_code'].str.startswith(
    ('280', '281', '282', '283', '284', '285'))]

# no of anemia patients and hospital admissions related to icd9 data in MIMIC-IV
no_of_unique_anemia_patients = df_icd_280_285.subject_id.nunique()
no_of_unique_anemia_hospitalisations = df_icd_280_285.hadm_id.nunique()
print(f"There are {no_of_unique_anemia_patients:,} unique anemia patients and {no_of_unique_anemia_hospitalisations:,} unique hospitalisations \
in MIMIC-IV, as classified by the doctor using ICD-9 codes.")


There are 32,399 unique anemia patients and 59,369 unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.


# Retrieve Number of Thyroid Disease Patients and Hospitalisations

In [13]:
# thyroid disease patients in MIMIC-IV have ICD-9 codes 240-246
# subset the patients with icd codes starting with 240-246
df_icd_240_246 = df_icd[df_icd['icd_code'].str.startswith(
    ('240', '241', '242', '243', '244', '245', '246'))]

# no of thyroid disease patients and hospital admissions related to icd9 data in MIMIC-IV
no_of_unique_thyroid_disease_patients = df_icd_240_246.subject_id.nunique()
no_of_unique_thyroid_disease_hospitalisations = df_icd_240_246.hadm_id.nunique()
print(f"There are {no_of_unique_thyroid_disease_patients:,} unique thyroid disease patients and {no_of_unique_thyroid_disease_hospitalisations:,} \
unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.")


There are 15,831 unique thyroid disease patients and 33,087 unique hospitalisations in MIMIC-IV, as classified by the doctor using ICD-9 codes.


# Construct Dataframe of Disease and Hospitalisation Frequency

In [14]:
# Table 1: diseases frequency of MIMIC-IV patients and hospitalisations

# create a dataframe with the diseases and their frequencies
df_diseases = pd.DataFrame({'Disease': ['CKD', 'Diabetes', 'Hypertension', 'Heart Failure', 'Ischemic Heart Disease',
                                        'Cererbrovascular Disease', 'PVD', 'Cardiac Disarrhythmias', 'Anemia', 'Thyroid Disease'],
                            'Patient Frequency': [no_of_unique_CKD_patients, no_of_unique_diab_patients, no_of_unique_hypertension_patients,
                                                  no_of_unique_heart_failure_patients, no_of_unique_ischemic_heart_disease_patients,
                                                  no_of_unique_cererbrovascular_disease_patients, no_of_unique_PVD_patients,
                                                  no_of_unique_cardiac_disarrhythmias_patients, no_of_unique_anemia_patients,
                                                  no_of_unique_thyroid_disease_patients]})
df_diseases['Hospitalisation Frequency'] = [no_of_unique_CKD_hospitalisations, no_of_unique_diab_hospitalisations, no_of_unique_hypertension_hospitalisations,
                                            no_of_unique_heart_failure_hospitalisations, no_of_unique_ischemic_heart_disease_hospitalisations,
                                            no_of_unique_cererbrovascular_disease_hospitalisations, no_of_unique_PVD_hospitalisations,
                                            no_of_unique_cardiac_disarrhythmias_hospitalisations, no_of_unique_anemia_hospitalisations,
                                            no_of_unique_thyroid_disease_hospitalisations]
df_diseases = df_diseases.sort_values(by='Patient Frequency', ascending=False)
df_diseases = df_diseases.reset_index(drop=True)
df_diseases


Unnamed: 0,Disease,Patient Frequency,Hospitalisation Frequency
0,Hypertension,58953,134505
1,Anemia,32399,59369
2,Diabetes,24378,64144
3,Cardiac Disarrhythmias,24201,46413
4,Ischemic Heart Disease,23468,51705
5,Thyroid Disease,15831,33087
6,Heart Failure,14794,35560
7,CKD,14012,37423
8,Cererbrovascular Disease,11302,16732
9,PVD,8498,18016


In [15]:
df_diseases.to_excel('df_diseases_frequency.xlsx', index=False)