In [15]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
import os
import sys

sys.path.insert(0, os.path.abspath(".."))
from dotenv import load_dotenv
import seaborn as sns
import pandas as pd
import numpy as np

from db import PostgresDB
from schemas import Proband
from cohorts import Cohort
from matplotlib import pyplot as plt


In [17]:
load_dotenv()


True

In [18]:
db = PostgresDB(
    db_name=os.getenv("DB_NAME"),
    host=os.getenv("DB_HOST"),
    port=os.getenv("DB_PORT"),
    user=os.getenv("DB_USER"),
)


Connected to database


In [19]:
cohort = Cohort.from_query(
    db=db,
    query="""
        SELECT sep.subject_id, sta.hadm_id
        FROM mimiciv_derived.sepsis3 sep, mimiciv_icu.icustays sta
        WHERE sep.stay_id = sta.stay_id; 
    """,
    name="sepsis3",
)


In [20]:
cohort.initialize_data(with_tfidf_diagnoses=True)


In [21]:
similarity_scores = cohort.compare_encounters(
    scale_by_distribution=True, normalize_categories=True
)


Finished encounter 26184834
Finished encounter 23581541
Finished encounter 20345487
Finished encounter 23822395
Finished encounter 28994087
Finished encounter 22725460
Finished encounter 20321825
Finished encounter 23473524
Finished encounter 28662225
Finished encounter 21329021
Normalizing categories by scaling to range 0..1


In [22]:
def insert_demographics_sim(cur, hadm_id_a, hadm_id_b, similarity_score, cohort_name):
    query = """
        INSERT INTO demographics_similarity (hadm_id_a, hadm_id_b, raw_similarity_value, cohort_name)
        VALUES (%s, %s, %s, %s);
    """
    cur.execute(query, (hadm_id_a, hadm_id_b, similarity_score, cohort_name))


def insert_icd_sim(cur, hadm_id_a, hadm_id_b, similarity_score, cohort_name):
    query = """
        INSERT INTO icd_diagnoses_similarity (hadm_id_a, hadm_id_b, raw_similarity_value, cohort_name)
        VALUES (%s, %s, %s, %s);
    """
    cur.execute(query, (hadm_id_a, hadm_id_b, similarity_score, cohort_name))


def insert_lab_sim(cur, hadm_id_a, hadm_id_b, similarity_score, cohort_name):
    query = """
        INSERT INTO labevents_similarity (hadm_id_a, hadm_id_b, raw_similarity_value, cohort_name)
        VALUES (%s, %s, %s, %s);
    """
    cur.execute(query, (hadm_id_a, hadm_id_b, similarity_score, cohort_name))


def insert_vitalsign_sim(cur, hadm_id_a, hadm_id_b, similarity_score, cohort_name):
    query = """
        INSERT INTO vitalsigns_similarity (hadm_id_a, hadm_id_b, raw_similarity_value, cohort_name)
        VALUES (%s, %s, %s, %s);
    """
    cur.execute(query, (hadm_id_a, hadm_id_b, similarity_score, cohort_name))


def insert_inputevents_sim(cur, hadm_id_a, hadm_id_b, similarity_score, cohort_name):
    query = """
        INSERT INTO inputevents_similarity (hadm_id_a, hadm_id_b, raw_similarity_value, cohort_name)
        VALUES (%s, %s, %s, %s);
    """
    cur.execute(query, (hadm_id_a, hadm_id_b, similarity_score, cohort_name))


In [23]:
cur = db.conn.cursor()
for item in similarity_scores:
    if item["encounter_a"] == item["encounter_b"]:
        continue
    encounter_a = item["encounter_a"]
    encounter_b = item["encounter_b"]
    demographics_sim = item["similarity"]["demographics_sim"]
    diagnoses_sim = item["similarity"]["diagnoses_sim"]
    labevents_sim = item["similarity"]["labevents_sim"]
    vitalsigns_sim = item["similarity"]["vitalsigns_sim"]
    inputevents_sim = item["similarity"]["inputevents_sim"]
    insert_demographics_sim(
        cur, encounter_a, encounter_b, demographics_sim, cohort.name
    )
    insert_icd_sim(cur, encounter_a, encounter_b, diagnoses_sim, cohort.name)
    insert_lab_sim(cur, encounter_a, encounter_b, labevents_sim, cohort.name)
    insert_vitalsign_sim(cur, encounter_a, encounter_b, vitalsigns_sim, cohort.name)
    insert_inputevents_sim(cur, encounter_a, encounter_b, inputevents_sim, cohort.name)

db.conn.commit()
db.close()
cur.close()


In [24]:
db.close()
