In [23]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:
import os
import sys

sys.path.insert(0, os.path.abspath(".."))
from dotenv import load_dotenv
import seaborn as sns
import pandas as pd
import numpy as np

from db import PostgresDB
from schemas import Proband
from cohorts import Cohort
from matplotlib import pyplot as plt

In [25]:
load_dotenv()

True

In [26]:
db = PostgresDB(
    db_name=os.getenv("DB_NAME"),
    host=os.getenv("DB_HOST"),
    port=os.getenv("DB_PORT"),
    user=os.getenv("DB_USER"),
)

Connected to database


## Compare Two Hospital Stays

In [27]:
cohort = Cohort(db=db)

In [28]:
cohort.get_random_cohort(size=10)
cohort.initialize_data()
cohort.remove_encounter_with_missing_category()
cohort.similarity_encounters = cohort.similarity_encounters
# filter patients that have all categories

In [29]:
import pandas as pd

def create_encounter_table(encounters):
    encounters = [e.dict() for e in encounters]
    # Create an empty dataframe
    df_list = []

    # Loop through each encounter and add a row to the dataframe
    for enc in encounters:
        # Get the relevant information from the encounter dictionary
        hadm_id = enc['hadm_id']
        age = enc['demographics']['age']
        gender = enc['demographics']['gender']
        ethnicity = enc['demographics']['ethnicity']
        diagnoses = ', '.join(sorted([d['code'] for d in enc['diagnoses']]))
        labevents = ', '.join(sorted([f"{l['item_id']}: {l['value']}" for l in enc['labevents']]))
        
        # Create a sub-table for the vitalsigns column
        vitalsigns_dict = {}
        for vs in enc['vitalsigns']:
            vs_name = vs['name']
            vs_value = vs['value']
            vitalsigns_dict[vs_name] = vs_value
        vitalsigns = pd.Series(vitalsigns_dict).sort_index().to_frame().T
        vitalsigns.columns = [f"{c}_vitalsigns" for c in vitalsigns.columns]

        inputevents = ', '.join(sorted([str(i['item_id']) for i in enc['inputevents']]))

        # Add a row to the dataframe
        df_list.append([hadm_id, age, gender, ethnicity, diagnoses, labevents, inputevents])
        df_list[-1].extend(vitalsigns.values.tolist()[0])

    # Create a pandas dataframe from the list of rows
    columns = ['hadm_id', 'age', 'gender', 'ethnicity', 'diagnoses', 'labevents', 'inputevents']
    columns.extend([f"{c}_vitalsigns" for c in vitalsigns_dict.keys()])
    df = pd.DataFrame(df_list, columns=columns)

    # Order the columns and rows alphabetically
    # df = df.reindex(sorted(df.columns), axis=1)
    df = df.sort_values(by='hadm_id')
    df = df.set_index("hadm_id")

    return df


In [30]:
import itertools


def display_encounter_similarities(encounters):
    encounters = [e.dict() for e in encounters]
    for i, j in itertools.combinations(range(len(encounters)), 2):
        enc_i = encounters[i]
        enc_j = encounters[j]
        print(
            f"Comparing encounter {enc_i['hadm_id']} with encounter {enc_j['hadm_id']}:"
        )

        # Age difference
        age_i = enc_i["demographics"]["age"]
        age_j = enc_j["demographics"]["age"]
        print(f"Age difference: {abs(age_i - age_j)} years")

        # Gender comparison
        gender_i = enc_i["demographics"]["gender"]
        gender_j = enc_j["demographics"]["gender"]
        if gender_i == gender_j:
            print(f"Gender: {gender_i}")
        else:
            print(f"Gender difference")

        # Ethnicity comparison
        ethnicity_i = enc_i["demographics"]["ethnicity"]
        ethnicity_j = enc_j["demographics"]["ethnicity"]
        if ethnicity_i == ethnicity_j:
            print(f"Ethnicity: {ethnicity_i}")
        else:
            print("Ethnicity difference")

        # Common diagnoses
        diag_i = set([diag["code"] for diag in enc_i["diagnoses"]])
        diag_j = set([diag["code"] for diag in enc_j["diagnoses"]])
        common_diagnoses = diag_i.intersection(diag_j)
        if common_diagnoses:
            print(f"Common diagnoses codes: {', '.join(common_diagnoses)}")
        else:
            print("No common diagnoses codes")

        # Common labevents
        lab_i = {(lab["item_id"], lab["value"]) for lab in enc_i["labevents"]}
        lab_j = {(lab["item_id"], lab["value"]) for lab in enc_j["labevents"]}
        common_labevents = lab_i.intersection(lab_j)
        if common_labevents:
            print("Common labevent item ids:")
            for item_id, value in common_labevents:
                val_i = next(
                    lab["value"]
                    for lab in enc_i["labevents"]
                    if lab["item_id"] == item_id
                )
                val_j = next(
                    lab["value"]
                    for lab in enc_j["labevents"]
                    if lab["item_id"] == item_id
                )
                print(f"Item {item_id}: {abs(val_i - val_j)}")
        else:
            print("No common labevent item ids")

        # Common inputevents
        input_i = {inp["item_id"] for inp in enc_i["inputevents"]}
        input_j = {inp["item_id"] for inp in enc_j["inputevents"]}
        common_input = input_i.intersection(input_j)
        if common_input:
            print(
                f"Common inputevent item ids: {', '.join(map(str, list(common_input)))}"
            )
        else:
            print("No common inputevent item ids")

        # Vitalsigns difference
        vitals_i = {vital["id"]: vital["value"] for vital in enc_i["vitalsigns"]}
        vitals_j = {vital["id"]: vital["value"] for vital in enc_j["vitalsigns"]}
        common_vitals = set(vitals_i.keys()).intersection(set(vitals_j.keys()))
        if common_vitals:
            print("Common vitalsigns:")
            for vital_name in sorted(common_vitals):
                val_i = vitals_i[vital_name]
                val_j = vitals_j[vital_name]
                print(f"{vital_name}: {abs(val_i - val_j)}")
        else:
            print("No common vitalsigns")

        print()  # Blank line to separate encounters


In [31]:
create_encounter_table(cohort.similarity_encounters[:2])

Unnamed: 0_level_0,age,gender,ethnicity,diagnoses,labevents,inputevents,heart_rate_vitalsigns,sbp_ni_vitalsigns,dbp_ni_vitalsigns,mbp_ni_vitalsigns,resp_rate_vitalsigns,temperature_vitalsigns,spo2_vitalsigns,glucose_vitalsigns
hadm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
21055302,87,M,WHITE,"E1122, E1151, E785, G8191, I129, I6389, I6522,...","50802: -4.0, 50804: 20.0, 50806: 112.0, 50808:...","225158, 225158",58.625,134.5,56.0,81.25,14.333333,141.25,96.555556,36.86
24103112,77,M,WHITE,"A419, B258, D509, D638, D696, E46, E869, E873,...","50802: 3.0, 50804: 27.82608695652174, 50806: 9...","220862, 220862, 220862, 220862, 220862, 220864...",58.636364,160.219512,80.61245,70.386364,23.054108,109.242424,97.554217,36.957838


In [32]:
display_encounter_similarities(cohort.similarity_encounters[:2])

Comparing encounter 21055302 with encounter 24103112:
Age difference: 10 years
Gender: M
Ethnicity: WHITE
Common diagnoses codes: N400
Common labevent item ids:
Item 51492: 0.0
Common inputevent item ids: 225158
Common vitalsigns:
dbp_ni: 0.011363636363633134
glucose: 25.71951219512195
heart_rate: 24.61244979919678
mbp_ni: 10.86363636363636
resp_rate: 8.720774883099532
sbp_ni: 32.00757575757575
spo2: 0.9986613119143186
temperature: 0.09783783783783662



In [33]:
sims = cohort.compare_encounters(scale_by_distribution=True, normalize_categories=False, aggregate_method="mean")

Finished encounter 21055302
Finished encounter 24103112
Finished encounter 22733649
Finished encounter 20593279
Finished encounter 29517557
Finished encounter 22648572
Finished encounter 27852107
Finished encounter 22370573
Finished encounter 25811835
Finished encounter 20150600


In [35]:
sims

[{'encounter_a': 21055302,
  'encounter_b': 21055302,
  'similarity': 1.0000000000000002},
 {'encounter_a': 21055302,
  'encounter_b': 24103112,
  'similarity': 0.25813833316843937},
 {'encounter_a': 21055302,
  'encounter_b': 22733649,
  'similarity': 0.1619521130312452},
 {'encounter_a': 21055302,
  'encounter_b': 20593279,
  'similarity': 0.23160637668410217},
 {'encounter_a': 21055302,
  'encounter_b': 29517557,
  'similarity': 0.3146701554057554},
 {'encounter_a': 21055302,
  'encounter_b': 22648572,
  'similarity': 0.18214242869113753},
 {'encounter_a': 21055302,
  'encounter_b': 27852107,
  'similarity': 0.17811942713751855},
 {'encounter_a': 21055302,
  'encounter_b': 22370573,
  'similarity': 0.26317848730263427},
 {'encounter_a': 21055302,
  'encounter_b': 25811835,
  'similarity': 0.25295260853212204},
 {'encounter_a': 21055302,
  'encounter_b': 20150600,
  'similarity': 0.21727291682835115},
 {'encounter_a': 24103112,
  'encounter_b': 21055302,
  'similarity': 0.25813833316