In [7]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
import os
import sys

sys.path.insert(0, os.path.abspath(".."))
from dotenv import load_dotenv
import seaborn as sns
import pandas as pd
import numpy as np

from db import PostgresDB
from schemas import Proband
from cohorts import Cohort
from matplotlib import pyplot as plt

In [9]:
load_dotenv()

True

In [10]:
db = PostgresDB(
    db_name=os.getenv("DB_NAME"),
    host=os.getenv("DB_HOST"),
    port=os.getenv("DB_PORT"),
    user=os.getenv("DB_USER"),
)

Connected to database


## Compare Two Hospital Stays

In [11]:
cohort = Cohort(db=db)

In [18]:
cohort.get_random_cohort(size=10)
cohort.initialize_data()
cohort.remove_encounter_with_missing_data()
cohort.similarity_encounters = cohort.similarity_encounters
# filter patients that have all categories

In [13]:
import pandas as pd

def create_encounter_table(encounters):
    encounters = [e.dict() for e in encounters]
    # Create an empty dataframe
    df_list = []

    # Loop through each encounter and add a row to the dataframe
    for enc in encounters:
        # Get the relevant information from the encounter dictionary
        hadm_id = enc['hadm_id']
        age = enc['demographics']['age']
        gender = enc['demographics']['gender']
        ethnicity = enc['demographics']['ethnicity']
        diagnoses = ', '.join(sorted([d['code'] for d in enc['diagnoses']]))
        labevents = ', '.join(sorted([f"{l['item_id']}: {l['value']}" for l in enc['labevents']]))
        
        # Create a sub-table for the vitalsigns column
        vitalsigns_dict = {}
        for vs in enc['vitalsigns']:
            vs_name = vs['name']
            vs_value = vs['value']
            vitalsigns_dict[vs_name] = vs_value
        vitalsigns = pd.Series(vitalsigns_dict).sort_index().to_frame().T
        vitalsigns.columns = [f"{c}_vitalsigns" for c in vitalsigns.columns]

        inputevents = ', '.join(sorted([str(i['item_id']) for i in enc['inputevents']]))

        # Add a row to the dataframe
        df_list.append([hadm_id, age, gender, ethnicity, diagnoses, labevents, inputevents])
        df_list[-1].extend(vitalsigns.values.tolist()[0])

    # Create a pandas dataframe from the list of rows
    columns = ['hadm_id', 'age', 'gender', 'ethnicity', 'diagnoses', 'labevents', 'inputevents']
    columns.extend([f"{c}_vitalsigns" for c in vitalsigns_dict.keys()])
    df = pd.DataFrame(df_list, columns=columns)

    # Order the columns and rows alphabetically
    # df = df.reindex(sorted(df.columns), axis=1)
    df = df.sort_values(by='hadm_id')
    df = df.set_index("hadm_id")

    return df


In [14]:
import itertools


def display_encounter_similarities(encounters):
    encounters = [e.dict() for e in encounters]
    for i, j in itertools.combinations(range(len(encounters)), 2):
        enc_i = encounters[i]
        enc_j = encounters[j]
        print(
            f"Comparing encounter {enc_i['hadm_id']} with encounter {enc_j['hadm_id']}:"
        )

        # Age difference
        age_i = enc_i["demographics"]["age"]
        age_j = enc_j["demographics"]["age"]
        print(f"Age difference: {abs(age_i - age_j)} years")

        # Gender comparison
        gender_i = enc_i["demographics"]["gender"]
        gender_j = enc_j["demographics"]["gender"]
        if gender_i == gender_j:
            print(f"Gender: {gender_i}")
        else:
            print(f"Gender difference")

        # Ethnicity comparison
        ethnicity_i = enc_i["demographics"]["ethnicity"]
        ethnicity_j = enc_j["demographics"]["ethnicity"]
        if ethnicity_i == ethnicity_j:
            print(f"Ethnicity: {ethnicity_i}")
        else:
            print("Ethnicity difference")

        # Common diagnoses
        diag_i = set([diag["code"] for diag in enc_i["diagnoses"]])
        diag_j = set([diag["code"] for diag in enc_j["diagnoses"]])
        common_diagnoses = diag_i.intersection(diag_j)
        if common_diagnoses:
            print(f"Common diagnoses codes: {', '.join(common_diagnoses)}")
        else:
            print("No common diagnoses codes")

        # Common labevents
        lab_i = {(lab["item_id"], lab["value"]) for lab in enc_i["labevents"]}
        lab_j = {(lab["item_id"], lab["value"]) for lab in enc_j["labevents"]}
        common_labevents = lab_i.intersection(lab_j)
        if common_labevents:
            print("Common labevent item ids:")
            for item_id, value in common_labevents:
                val_i = next(
                    lab["value"]
                    for lab in enc_i["labevents"]
                    if lab["item_id"] == item_id
                )
                val_j = next(
                    lab["value"]
                    for lab in enc_j["labevents"]
                    if lab["item_id"] == item_id
                )
                print(f"Item {item_id}: {abs(val_i - val_j)}")
        else:
            print("No common labevent item ids")

        # Common inputevents
        input_i = {inp["item_id"] for inp in enc_i["inputevents"]}
        input_j = {inp["item_id"] for inp in enc_j["inputevents"]}
        common_input = input_i.intersection(input_j)
        if common_input:
            print(
                f"Common inputevent item ids: {', '.join(map(str, list(common_input)))}"
            )
        else:
            print("No common inputevent item ids")

        # Vitalsigns difference
        vitals_i = {vital["id"]: vital["value"] for vital in enc_i["vitalsigns"]}
        vitals_j = {vital["id"]: vital["value"] for vital in enc_j["vitalsigns"]}
        common_vitals = set(vitals_i.keys()).intersection(set(vitals_j.keys()))
        if common_vitals:
            print("Common vitalsigns:")
            for vital_name in sorted(common_vitals):
                val_i = vitals_i[vital_name]
                val_j = vitals_j[vital_name]
                print(f"{vital_name}: {abs(val_i - val_j)}")
        else:
            print("No common vitalsigns")

        print()  # Blank line to separate encounters


In [15]:
create_encounter_table(cohort.similarity_encounters[:2])

Unnamed: 0_level_0,age,gender,ethnicity,diagnoses,labevents,inputevents,heart_rate_vitalsigns,sbp_ni_vitalsigns,dbp_ni_vitalsigns,mbp_ni_vitalsigns,resp_rate_vitalsigns,temperature_vitalsigns,spo2_vitalsigns,glucose_vitalsigns
hadm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
21764330,67,M,WHITE,"24290, 2724, 30500, 3051, 4019, 412, 41401, 42...","50862: 3.2, 50868: 14.5, 50882: 26.25, 50893: ...","220949, 222011, 222011, 225158, 225798, 226363...",48.75,108.0,58.771429,59.0,19.514286,94.0,98.371429,37.085
24625361,41,M,WHITE,"A400, A4102, B182, B1920, B950, B9562, D649, D...","50802: 1.0, 50804: 26.0, 50813: 1.950000000000...","220949, 220949, 220949, 220949, 220949, 220949...",71.666667,119.625,95.767442,81.02381,21.139535,115.095238,96.093023,36.896667


In [16]:
display_encounter_similarities(cohort.similarity_encounters[:2])

Comparing encounter 21764330 with encounter 24625361:
Age difference: 26 years
Gender: M
Ethnicity: WHITE
No common diagnoses codes
Common labevent item ids:
Item 51476: 0.0
Common inputevent item ids: 226452, 220949, 225158, 225798
Common vitalsigns:
dbp_ni: 22.91666666666667
glucose: 11.625
heart_rate: 36.99601328903654
mbp_ni: 22.02380952380952
resp_rate: 1.6252491694352145
sbp_ni: 21.095238095238102
spo2: 2.27840531561462
temperature: 0.18833333333333258



In [21]:
sims = cohort.compare_encounters(scale_by_distribution=True, normalize_categories=False)

Finished encounter 21764330
Finished encounter 24625361
Finished encounter 27370800
Finished encounter 20532621
Finished encounter 27921263
Finished encounter 22904714
Finished encounter 25415625
Finished encounter 25366699
Finished encounter 21658149
Finished encounter 25155096
Finished encounter 22130791
Finished encounter 20664607
Finished encounter 29068519
Finished encounter 26717744
Finished encounter 24061120
Finished encounter 29056733
Finished encounter 24080968
Finished encounter 28186950
Finished encounter 21842619
Finished encounter 27779433


In [22]:
sims

[{'encounter_a': 21764330,
  'encounter_b': 21764330,
  'similarity': {'demographics_sim': 1.0,
   'diagnoses_sim': 1.0,
   'labevents_sim': 1.0,
   'vitalsigns_sim': 1.0,
   'inputevents_sim': 1.0}},
 {'encounter_a': 21764330,
  'encounter_b': 24625361,
  'similarity': {'demographics_sim': 0.8706467661691543,
   'diagnoses_sim': 0.2977161225312944,
   'labevents_sim': 0.16867670541402444,
   'vitalsigns_sim': 0.08978839848911802,
   'inputevents_sim': 0.2222222222222222}},
 {'encounter_a': 21764330,
  'encounter_b': 27370800,
  'similarity': {'demographics_sim': 0.9857142857142858,
   'diagnoses_sim': 0.4276505891172504,
   'labevents_sim': 0.17749869785740813,
   'vitalsigns_sim': 0.14817885867700778,
   'inputevents_sim': 0.12195121951219512}},
 {'encounter_a': 21764330,
  'encounter_b': 20532621,
  'similarity': {'demographics_sim': 0.8606965174129353,
   'diagnoses_sim': 0.11297823627789069,
   'labevents_sim': 0.21606636748839605,
   'vitalsigns_sim': 0.06111897115211481,
   'inp