In [26]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from datetime import datetime

# Load the data
patients = pd.read_csv('demo_data/PATIENTS.csv')

admissions = pd.read_csv('demo_data/ADMISSIONS.csv')

diagnosis = pd.read_csv('demo_data/DIAGNOSES_ICD.csv')

icu_stays = pd.read_csv('demo_data/ICUSTAYS.csv')

diagnosis_labes = pd.read_csv('demo_data/D_ICD_DIAGNOSES.csv')


In [27]:
# Helper functions
def get_icd9_description(icd9_code):
    return diagnosis_labes[diagnosis_labes['ICD9_CODE'] == icd9_code]['LONG_TITLE'].values[0]

def calculate_age(dob, dod):
    dob_date = datetime.strptime(dob, '%Y-%m-%d %H:%M:%S')
    dod_date = datetime.strptime(dod, '%Y-%m-%d %H:%M:%S')  
    age = (dod_date - dob_date).days // 365
    return age

In [28]:
import plotly.express as px

# Example: Extracting age distribution of patients
patients['dob'] = pd.to_datetime(patients['dob'])
patients['age'] = (pd.to_datetime('today') - patients['dob']).dt.days // 365
fig = px.histogram(patients, x='age', nbins=20, title='Age Distribution of Patients')
fig.update_layout(xaxis_title='Age', yaxis_title='Frequency')
fig.show()

In [29]:
# Example: Extracting gender distribution
gender_distribution = patients['gender'].value_counts().reset_index()
gender_distribution.columns = ['gender', 'count']
fig = px.bar(gender_distribution, x='gender', y='count', title='Gender Distribution of Patients')
fig.update_layout(xaxis_title='Gender', yaxis_title='Frequency')
fig.show()

In [30]:
# Example: Extracting most common diagnoses
common_diagnoses = diagnosis['icd9_code'].value_counts().head(10).reset_index()
common_diagnoses.columns = ['icd9_code', 'count']
fig = px.bar(common_diagnoses, x='icd9_code', y='count', title='Most Common Diagnoses')
fig.update_layout(xaxis_title='ICD-9 Code', yaxis_title='Frequency')
fig.show()

In [31]:
# Example: Extracting length of stay in ICU
icu_stays['intime'] = pd.to_datetime(icu_stays['intime'])
icu_stays['outtime'] = pd.to_datetime(icu_stays['outtime'])
icu_stays['length_of_stay'] = (icu_stays['outtime'] - icu_stays['intime']).dt.total_seconds() / 3600
fig = px.histogram(icu_stays, x='length_of_stay', nbins=20, title='Length of Stay in ICU')
fig.update_layout(xaxis_title='Hours', yaxis_title='Frequency')
fig.show()