In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys

sys.path.insert(0, os.path.abspath(".."))
from dotenv import load_dotenv
import seaborn as sns
import pandas as pd
import numpy as np

from db import PostgresDB
from schemas import Proband
from cohorts import Cohort
from matplotlib import pyplot as plt

In [3]:
load_dotenv()

True

In [4]:
db = PostgresDB(
    db_name=os.getenv("DB_NAME"),
    host=os.getenv("DB_HOST"),
    port=os.getenv("DB_PORT"),
    user=os.getenv("DB_USER"),
)

Connected to database


## Compare Two Patients

In [5]:
cohort = Cohort(db=db)

In [6]:
cohort.get_random_cohort(size=10)
cohort.initialize_data()
cohort.remove_encounter_with_missing_category()
cohort.similarity_encounters = cohort.similarity_encounters[:2]
# filter patients that have all categories

In [8]:
import pandas as pd

def create_encounter_table(encounters):
    encounters = [e.dict() for e in encounters]
    # Create an empty dataframe
    df_list = []

    # Loop through each encounter and add a row to the dataframe
    for enc in encounters:
        # Get the relevant information from the encounter dictionary
        hadm_id = enc['hadm_id']
        age = enc['demographics']['age']
        gender = enc['demographics']['gender']
        ethnicity = enc['demographics']['ethnicity']
        diagnoses = ', '.join(sorted([d['code'] for d in enc['diagnoses']]))
        labevents = ', '.join(sorted([f"{l['item_id']}: {l['value']}" for l in enc['labevents']]))
        
        # Create a sub-table for the vitalsigns column
        vitalsigns_dict = {}
        for vs in enc['vitalsigns']:
            vs_name = vs['name']
            vs_value = vs['value']
            vitalsigns_dict[vs_name] = vs_value
        vitalsigns = pd.Series(vitalsigns_dict).sort_index().to_frame().T
        vitalsigns.columns = [f"{c}_vitalsigns" for c in vitalsigns.columns]

        inputevents = ', '.join(sorted([str(i['item_id']) for i in enc['inputevents']]))

        # Add a row to the dataframe
        df_list.append([hadm_id, age, gender, ethnicity, diagnoses, labevents, inputevents])
        df_list[-1].extend(vitalsigns.values.tolist()[0])

    # Create a pandas dataframe from the list of rows
    columns = ['hadm_id', 'age', 'gender', 'ethnicity', 'diagnoses', 'labevents', 'inputevents']
    columns.extend([f"{c}_vitalsigns" for c in vitalsigns_dict.keys()])
    df = pd.DataFrame(df_list, columns=columns)

    # Order the columns and rows alphabetically
    # df = df.reindex(sorted(df.columns), axis=1)
    df = df.sort_values(by='hadm_id')
    df = df.set_index("hadm_id")

    return df


In [9]:
import itertools


def display_encounter_similarities(encounters):
    encounters = [e.dict() for e in encounters]
    for i, j in itertools.combinations(range(len(encounters)), 2):
        enc_i = encounters[i]
        enc_j = encounters[j]
        print(
            f"Comparing encounter {enc_i['hadm_id']} with encounter {enc_j['hadm_id']}:"
        )

        # Age difference
        age_i = enc_i["demographics"]["age"]
        age_j = enc_j["demographics"]["age"]
        print(f"Age difference: {abs(age_i - age_j)} years")

        # Gender comparison
        gender_i = enc_i["demographics"]["gender"]
        gender_j = enc_j["demographics"]["gender"]
        if gender_i == gender_j:
            print(f"Gender: {gender_i}")
        else:
            print(f"Gender difference")

        # Ethnicity comparison
        ethnicity_i = enc_i["demographics"]["ethnicity"]
        ethnicity_j = enc_j["demographics"]["ethnicity"]
        if ethnicity_i == ethnicity_j:
            print(f"Ethnicity: {ethnicity_i}")
        else:
            print("Ethnicity difference")

        # Common diagnoses
        diag_i = set([diag["code"] for diag in enc_i["diagnoses"]])
        diag_j = set([diag["code"] for diag in enc_j["diagnoses"]])
        common_diagnoses = diag_i.intersection(diag_j)
        if common_diagnoses:
            print(f"Common diagnoses codes: {', '.join(common_diagnoses)}")
        else:
            print("No common diagnoses codes")

        # Common labevents
        lab_i = {(lab["item_id"], lab["value"]) for lab in enc_i["labevents"]}
        lab_j = {(lab["item_id"], lab["value"]) for lab in enc_j["labevents"]}
        common_labevents = lab_i.intersection(lab_j)
        if common_labevents:
            print("Common labevent item ids:")
            for item_id, value in common_labevents:
                val_i = next(
                    lab["value"]
                    for lab in enc_i["labevents"]
                    if lab["item_id"] == item_id
                )
                val_j = next(
                    lab["value"]
                    for lab in enc_j["labevents"]
                    if lab["item_id"] == item_id
                )
                print(f"Item {item_id}: {abs(val_i - val_j)}")
        else:
            print("No common labevent item ids")

        # Common inputevents
        input_i = {inp["item_id"] for inp in enc_i["inputevents"]}
        input_j = {inp["item_id"] for inp in enc_j["inputevents"]}
        common_input = input_i.intersection(input_j)
        if common_input:
            print(
                f"Common inputevent item ids: {', '.join(map(str, list(common_input)))}"
            )
        else:
            print("No common inputevent item ids")

        # Vitalsigns difference
        vitals_i = {vital["id"]: vital["value"] for vital in enc_i["vitalsigns"]}
        vitals_j = {vital["id"]: vital["value"] for vital in enc_j["vitalsigns"]}
        common_vitals = set(vitals_i.keys()).intersection(set(vitals_j.keys()))
        if common_vitals:
            print("Common vitalsigns:")
            for vital_name in sorted(common_vitals):
                val_i = vitals_i[vital_name]
                val_j = vitals_j[vital_name]
                print(f"{vital_name}: {abs(val_i - val_j)}")
        else:
            print("No common vitalsigns")

        print()  # Blank line to separate encounters


In [10]:
create_encounter_table(cohort.similarity_encounters[:2])

Unnamed: 0_level_0,age,gender,ethnicity,diagnoses,labevents,inputevents,heart_rate_vitalsigns,sbp_ni_vitalsigns,dbp_ni_vitalsigns,mbp_ni_vitalsigns,resp_rate_vitalsigns,temperature_vitalsigns,spo2_vitalsigns,glucose_vitalsigns
hadm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
24888864,91,M,WHITE,"185, 2252, 2948, 36250, 4019, 4271, 85220, E88...","50861: 19.0, 50863: 95.0, 50868: 12.0, 50878: ...","223258, 225823, 225823, 226452, 226452, 226452...",59.866667,168.666667,64.6,77.466667,18.133333,133.6,97.066667,36.29
28257726,70,M,ASIAN - CHINESE,"2761, 32723, 34290, 3484, 34889, 37887, 4019, ...","50861: 44.0, 50862: 3.6, 50863: 62.0, 50867: 2...","220949, 220949, 221828, 221828, 222042, 222042...",70.0,136.0,67.458333,82.0,15.041667,128.0,96.375,36.922


In [11]:
display_encounter_similarities(cohort.similarity_encounters[:2])

Comparing encounter 24888864 with encounter 28257726:
Age difference: 21 years
Gender: M
Ethnicity difference
Common diagnoses codes: 4019
No common labevent item ids
Common inputevent item ids: 226452
Common vitalsigns:
dbp_ni: 10.133333333333333
glucose: 32.66666666666666
heart_rate: 2.8583333333333343
mbp_ni: 4.533333333333331
resp_rate: 3.091666666666667
sbp_ni: 5.599999999999994
spo2: 0.6916666666666629
temperature: 0.6319999999999979



In [12]:
cohort.compare_encounters(scale_by_distribution=True, normalize_categories=False)

Finished encounter 24888864
Finished encounter 28257726


[{'encounter_a': 24888864,
  'encounter_b': 24888864,
  'similarity': {'demographics_sim': 1.0,
   'diagnoses_sim': 1.0,
   'labevents_sim': 1.0,
   'vitalsigns_sim': 1.0,
   'inputevents_sim': 1.0}},
 {'encounter_a': 24888864,
  'encounter_b': 28257726,
  'similarity': {'demographics_sim': 0.8846153846153846,
   'diagnoses_sim': 0.1688287055250619,
   'labevents_sim': 0.22168996782046146,
   'vitalsigns_sim': 0.16870514849304458,
   'inputevents_sim': 0.09090909090909091}},
 {'encounter_a': 28257726,
  'encounter_b': 24888864,
  'similarity': {'demographics_sim': 0.8846153846153846,
   'diagnoses_sim': 0.1688287055250619,
   'labevents_sim': 0.22168996782046146,
   'vitalsigns_sim': 0.16870514849304458,
   'inputevents_sim': 0.09090909090909091}},
 {'encounter_a': 28257726,
  'encounter_b': 28257726,
  'similarity': {'demographics_sim': 1.0,
   'diagnoses_sim': 1.0,
   'labevents_sim': 1.0,
   'vitalsigns_sim': 1.0,
   'inputevents_sim': 1.0}}]