- Immuno compromised

- Antibiotics

- Previous HAIs

- Stay Duration

- Other general HAI Features


### Setup Code

In [89]:
# Import libraries
from datetime import timedelta
import os

import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from IPython.display import display, HTML, Image
%matplotlib inline

plt.style.use('ggplot')
plt.rcParams.update({'font.size': 20})

# Access data using Google BigQuery.
from google.colab import auth
from google.cloud import bigquery
# Authenticate
auth.authenticate_user()

# Set up environment variables
project_id = 'ml-health-application'   ### Please change project_id
os.environ["GOOGLE_CLOUD_PROJECT"] = project_id

# Read data from BigQuery into pandas dataframes.
def run_query(query, project_id=project_id):
  return pd.io.gbq.read_gbq(
      query,
      project_id=project_id,
      dialect='standard')

# set the dataset
# if you want to use the demo, change this to mimic_demo
dataset = 'mimiciv'

In [131]:
# utils
from enum import Enum

class Type(Enum):
    ICDCodes = 1
    ICUStay = 2
    AdmidFirst = 3

def get_patient_query(terms, limit=True):
  join_icd_codes = """INNER JOIN ICDCodes c
  ON c.icd_code = d.icd_code"""
  join_icu_stay = """INNER JOIN ICUStay stay
  ON d.hadm_id = stay.hadm_id and d.subject_id = stay.subject_id"""
  join_admid_first = """INNER JOIN AdmidFirst ad
  ON ad.hadm_id < d.hadm_id"""
  cluases = {
      Type.ICDCodes: join_icd_codes,
      Type.ICUStay: join_icu_stay,
      Type.AdmidFirst: join_admid_first
  }

  base_patient_query = """SELECT *
  FROM `physionet-data.mimiciv_hosp.diagnoses_icd` d
  {}
  INNER JOIN `physionet-data.mimiciv_hosp.admissions` a
    ON d.hadm_id = a.hadm_id
  INNER JOIN `physionet-data.mimiciv_hosp.patients` p
    ON d.subject_id = p.subject_id
  {}"""

  join_cluases = '\n'.join([cluases[term] for term in terms])
  return base_patient_query.format(join_cluases, "LIMIT 1000" if limit else "")

# print(get_patient_query([Type.ICDCodes, Type.ICUStay]))

def generate_sql_with_ctes(cte_dict, main_query):
    """Generate a SQL query with CTEs."""

    # Convert the CTEs into a list of formatted strings
    cte_strings = [f"{name} AS ({query})" for name, query in cte_dict.items()]

    # Join the CTEs with commas and then prepend WITH only once
    ctes_combined = "WITH\n" + ',\n'.join(cte_strings)

    # Combine the formatted CTEs with the main query
    return ctes_combined + '\n' + main_query

def format_icd_codes(icd_codes):
    return str([code.replace('.', '') for code in icd_codes])[1:-1]

def generate_sql_with_icd_codes(icd_codes, limit=True):
    formatted_icd_codes = format_icd_codes(icd_codes)
    icd_diagnose_query = """SELECT *
    FROM `physionet-data.mimiciv_hosp.diagnoses_icd` d
    INNER JOIN `physionet-data.mimiciv_hosp.admissions` a
      ON d.hadm_id = a.hadm_id
    INNER JOIN `physionet-data.mimiciv_hosp.patients` p
      ON d.subject_id = p.subject_id
    WHERE d.icd_code in ({})
    {}"""
    return icd_diagnose_query.format(formatted_icd_codes, "LIMIT 1000" if limit else "")

def generate_sql_with_ctes(cte_dict, main_query):
    """Generate a SQL query with CTEs."""

    # Convert the CTEs into a list of formatted strings
    cte_strings = [f"{name} AS ({query})" for name, query in cte_dict.items()]

    # Join the CTEs with commas and then prepend WITH only once
    ctes_combined = "WITH\n" + ',\n'.join(cte_strings)

    # Combine the formatted CTEs with the main query
    return ctes_combined + '\n' + main_query

def add_like_query(query, terms, label="long_title"):

    # Create the SQL WHERE clause using the list of terms
    where_clauses = [f"lower({label}) LIKE '%{term}%'" for term in terms]
    where_combined = " OR ".join(where_clauses)

    # Combine all parts to generate the final SQL query
    sql_query = f"{query} WHERE {where_combined}"

    return sql_query

# Compromised Immune System

In [100]:
urinary_abnorm_terms = ["obstruction", "urinary retention", "congenital anomaly",
                    "vesicoureteral reflux", "bladder diverticulum"]


# ICD Diagnoses immune-compromising conditions such as Leukemia, HIV and Diabetes
# Patients with conditions like HIV, diabetes, or those receiving immunosuppressants are more susceptible to infections, including CAUTIs
immune_compromised_cte  = {}


immune_compromised_terms = [
    "hiv", "aids", "diabetes", "organ transplant", "leukemia", "lymphoma",
    "cancer", "chronic kidney disease", "end-stage renal disease", "splenectomy",
    "bone marrow transplant", "malnutrition", "congenital immune deficiencies"
]

immune_compromised_diagnoses = add_like_query("SELECT icd_code, long_title FROM `physionet-data.mimiciv_hosp.d_icd_diagnoses`", immune_compromised_terms)

immune_compromised_cte["ICDCodes"] = immune_compromised_diagnoses

immune_compromised_patients = generate_sql_with_ctes(immune_compromised_cte, get_patient_query([Type.ICDCodes]))
run_query(immune_compromised_patients)

Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version,icd_code_1,long_title,subject_id_1,hadm_id_1,admittime,...,race,edregtime,edouttime,hospital_expire_flag,subject_id_2,gender,anchor_age,anchor_year,anchor_year_group,dod
0,10003400,20214994,30,40390,9,40390,"Hypertensive chronic kidney disease, unspecifi...",10003400,20214994,2137-02-24 10:00:00,...,BLACK/AFRICAN AMERICAN,NaT,NaT,0,10003400,F,72,2134,2011 - 2013,2137-09-02
1,10024331,25768667,26,28521,9,28521,Anemia in chronic kidney disease,10024331,25768667,2144-09-07 15:17:00,...,WHITE - RUSSIAN,NaT,NaT,0,10024331,M,72,2140,2008 - 2010,2145-01-23
2,10031358,24522342,33,25072,9,25072,Diabetes with peripheral circulatory disorders...,10031358,24522342,2158-03-10 18:06:00,...,WHITE,2158-03-10 13:12:00,2158-03-10 19:51:00,0,10031358,M,58,2152,2008 - 2010,NaT
3,10031358,24522342,35,40390,9,40390,"Hypertensive chronic kidney disease, unspecifi...",10031358,24522342,2158-03-10 18:06:00,...,WHITE,2158-03-10 13:12:00,2158-03-10 19:51:00,0,10031358,M,58,2152,2008 - 2010,NaT
4,10031358,24522342,36,5859,9,5859,"Chronic kidney disease, unspecified",10031358,24522342,2158-03-10 18:06:00,...,WHITE,2158-03-10 13:12:00,2158-03-10 19:51:00,0,10031358,M,58,2152,2008 - 2010,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,16021373,23184382,30,25050,9,25050,"Diabetes with ophthalmic manifestations, type ...",16021373,23184382,2187-10-30 08:00:00,...,WHITE,NaT,NaT,0,16021373,F,72,2187,2014 - 2016,NaT
996,16032226,20056878,29,25062,9,25062,"Diabetes with neurological manifestations, typ...",16032226,20056878,2185-07-01 19:04:00,...,WHITE,2185-07-01 15:21:00,2185-07-01 20:48:00,0,16032226,M,73,2182,2008 - 2010,2185-10-07
997,16032226,20056878,30,40310,9,40310,"Hypertensive chronic kidney disease, benign, w...",16032226,20056878,2185-07-01 19:04:00,...,WHITE,2185-07-01 15:21:00,2185-07-01 20:48:00,0,16032226,M,73,2182,2008 - 2010,2185-10-07
998,16043614,26710319,26,25003,9,25003,Diabetes mellitus without mention of complicat...,16043614,26710319,2188-04-29 12:59:00,...,WHITE,2188-04-29 10:04:00,2188-04-29 15:40:00,0,16043614,M,49,2182,2008 - 2010,2188-07-22


# Antibiotics

list of some commonly used immunosuppressant drugs that could potentially increase the risk of HAIs:

1.Corticosteroids: These are a class of drugs that include:

* Prednisone
* Methylprednisolone (Medrol)
* Hydrocortisone
* Dexamethasone
2.Calcineurin Inhibitors: These drugs block the action of calcineurin, which plays a role in the activation of T cells.

* Cyclosporine (Neoral, Sandimmune, Gengraf)
* Tacrolimus (Prograf)

3.mTOR Inhibitors (Mammalian Target of Rapamycin): These drugs block the action of mTOR, a protein involved in cell multiplication.

* Sirolimus (Rapamune)
* Everolimus (Zortress)
4.Antiproliferative/Antimetabolite Agents: These prevent the synthesis of DNA and thus prevent the proliferation of cells.

* Mycophenolate mofetil (CellCept)
* Mycophenolate sodium (Myfortic)
* Azathioprine (Imuran, Azasan)
* Methotrexate
5. Biologic Agents: These are antibodies or related proteins that target specific parts of the immune system.

* Infliximab (Remicade)
* Adalimumab (Humira)
* Rituximab (Rituxan, MabThera)
* Abatacept (Orencia)
* Etanercept (Enbrel)
* Ustekinumab (Stelara)
6. Antithymocyte Globulin (ATG):

* Thymoglobulin
* Atgam
7.Other Agents:

* Cyclophosphamide
* Chlorambucil


In [30]:
# patients received immunosuppressive prescriptions
immunosuppressive_prescriptions = [
    "prednisone", "cyclosporine", "tacrolimus", "mycophenolate", "azathioprine",
    "sirolimus", "rapamycin", "methotrexate", "rituximab", "basiliximab", "antithymocyte globulin"
]

precriptions_query = "SELECT DISTINCT subject_id, hadm_id, drug FROM `physionet-data.mimiciv_hosp.prescriptions`"
immunosuppressive_prescription_patients = add_like_query(precriptions_query, immunosuppressive_prescriptions, "drug")
run_query(immunosuppressive_prescription_patients)

Unnamed: 0,subject_id,hadm_id,drug
0,15354831,28353406,CycloSPORINE (Neoral) MODIFIED
1,19970563,22810064,PredniSONE
2,19517597,25065292,PredniSONE
3,12481952,29113809,Tacrolimus
4,14526420,23641073,Tacrolimus
...,...,...,...
62682,15884728,21277394,Methotrexate
62683,13680570,26561572,Methotrexate
62684,13680570,25449638,Methotrexate
62685,16934035,20526633,Methotrexate


# Previous HAIs

**ICD codes related to common HAIs**
ICD-9 codes:

1. **Central Line-Associated Bloodstream Infections (CLABSIs)**:
   - 999.31: Sepsis due to central venous catheter
   - 999.32: Bloodstream infection due to other vascular device, implant, and graft

2. **Catheter-Associated Urinary Tract Infections (CAUTIs)**:
   - 996.64: Infection and inflammatory reaction due to indwelling urinary catheter
   - 599.0: Urinary tract infection, site not specified

3. **Ventilator Associated Pneumonia (VAP)**:
   - There isn't a specific ICD-9 code for VAP, but nosocomial pneumonia can be represented by:
     - 997.31: Ventilator-associated pneumonia (However, note that this is a complication code and does not necessarily identify the causative agent or indicate that it was hospital-acquired.)
     - 482.41, 482.42, etc.: Pneumonia due to specific bacteria, which might be used in conjunction with clinical judgment to identify potential VAP cases.

4. **Other potential HAIs**:
   - 996.62: Infection due to other vascular device, implant, and graft
   - 999.3: Other and unspecified infection due to medical care

ICD-10 codes:

1. **Central Line-Associated Bloodstream Infections (CLABSIs)**:
   - T80.211A: Bloodstream infection due to central venous catheter
   - T80.212A: Bloodstream infection due to other vascular device, implant, and graft
   - T80.213A: Bloodstream infection following infusion, transfusion, and therapeutic injection

2. **Catheter-Associated Urinary Tract Infections (CAUTIs)**:
   - N99.511: Infection due to indwelling urinary catheter
   - N39.0: Urinary tract infection, site not specified

3. **Ventilator Associated Pneumonia (VAP)**:
   - J95.851: Ventilator associated pneumonia
   - J15.9: Bacterial pneumonia, unspecified

4. **Other potential HAIs**:
   - T81.4XXA: Infection following a procedure
   - T88.0XXA: Infection following immunization
   - T80.8XXA: Other complications following infusion, transfusion, and therapeutic injection




In [134]:
icd_9_list = [
    "999.31", "999.32",
    "996.64", "599.0",
    "997.31", "482.41", "482.42",
    "996.62", "999.3"
]
icd_10_list = [
    "T80.211A", "T80.212A", "T80.213A",
    "N99.511", "N39.0",
    "J95.851", "J15.9",
    "T81.4XXA", "T88.0XXA", "T80.8XXA"
]

hai_icd_codes = icd_10_list + icd_9_list
hai_icd_diagnoses = "SELECT icd_code, long_title FROM `physionet-data.mimiciv_hosp.d_icd_diagnoses` where icd_code in ({})".format(format_icd_codes(hai_icd_codes))
# get patients with HAIs ICDCodes
run_query(generate_sql_with_icd_codes(hai_icd_codes))

Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version,subject_id_1,hadm_id_1,admittime,dischtime,deathtime,...,race,edregtime,edouttime,hospital_expire_flag,subject_id_2,gender,anchor_age,anchor_year,anchor_year_group,dod
0,14527596,28165014,1,99931,9,14527596,28165014,2165-11-28 19:02:00,2165-12-07 17:23:00,NaT,...,BLACK/CARIBBEAN ISLAND,NaT,NaT,0,14527596,F,54,2163,2008 - 2010,NaT
1,15902500,27166666,1,5990,9,15902500,27166666,2124-10-26 19:37:00,2124-10-28 16:30:00,NaT,...,BLACK/CAPE VERDEAN,NaT,NaT,0,15902500,F,39,2122,2008 - 2010,NaT
2,16625434,25780525,1,99662,9,16625434,25780525,2135-09-05 17:38:00,2135-10-14 15:08:00,NaT,...,WHITE,NaT,NaT,0,16625434,M,48,2135,2008 - 2010,NaT
3,16625434,25780525,7,5990,9,16625434,25780525,2135-09-05 17:38:00,2135-10-14 15:08:00,NaT,...,WHITE,NaT,NaT,0,16625434,M,48,2135,2008 - 2010,NaT
4,10336412,24756293,3,5990,9,10336412,24756293,2192-09-07 13:51:00,2192-09-08 17:26:00,NaT,...,BLACK/AFRICAN AMERICAN,2192-09-07 09:24:00,2192-09-07 14:49:00,0,10336412,M,69,2190,2008 - 2010,2195-02-23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,17900533,20463954,8,5990,9,17900533,20463954,2148-12-24 03:03:00,2149-01-15 18:00:00,NaT,...,UNKNOWN,NaT,NaT,0,17900533,M,80,2148,2008 - 2010,2149-02-21
996,17966508,28775682,9,99731,9,17966508,28775682,2131-09-18 21:56:00,2131-10-06 16:46:00,NaT,...,UNKNOWN,NaT,NaT,0,17966508,F,68,2131,2011 - 2013,2131-10-12
997,18054935,29200153,11,N390,10,18054935,29200153,2164-11-06 20:47:00,2164-12-03 15:43:00,NaT,...,WHITE - OTHER EUROPEAN,2164-11-06 15:52:00,2164-11-06 23:30:00,0,18054935,F,71,2160,2011 - 2013,NaT
998,18161587,23587598,5,99731,9,18161587,23587598,2170-09-12 22:57:00,2170-09-27 16:11:00,NaT,...,UNKNOWN,NaT,NaT,0,18161587,F,46,2170,2011 - 2013,NaT


In [None]:
# TODO: fix this query

immune_compromised_icd_codes = run_query(immune_compromised_diagnoses)["icd_code"]

query = """
SELECT prev_hai_pat.*
FROM ({}) prev_hai_pat
INNER JOIN ({}) pat
ON prev_hai_pat.subject_id = pat.subject_id
WHERE prev_hai_pat.hadm_id > pat.hadm_id
""".format(generate_sql_with_icd_codes(hai_icd_codes), generate_sql_with_icd_codes(immune_compromised_icd_codes))
# print(query)
# run_query(query)
# immune_compromised_patients_with_prev =
# immunosuppressive_prescription_patients_with_prev

# Stay Duration

In [127]:
# icu stay for immune_compromised_patient
icu_stay_query = "SELECT subject_id, hadm_id, los FROM `physionet-data.mimiciv_icu.icustays`"
patients_with_los_cte = {}
patients_with_los_cte["ICUStay"] = icu_stay_query
patients_with_los_cte["ICDCodes"] = immune_compromised_diagnoses

patients_with_los = generate_sql_with_ctes(patients_with_los_cte, get_patient_query([Type.ICUStay, Type.ICDCodes]))
run_query(patients_with_los)


Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version,subject_id_1,hadm_id_1,los,icd_code_1,long_title,...,race,edregtime,edouttime,hospital_expire_flag,subject_id_3,gender,anchor_age,anchor_year,anchor_year_group,dod
0,10003400,20214994,30,40390,9,10003400,20214994,12.911308,40390,"Hypertensive chronic kidney disease, unspecifi...",...,BLACK/AFRICAN AMERICAN,NaT,NaT,0,10003400,F,72,2134,2011 - 2013,2137-09-02
1,10035168,26166196,31,V180,9,10035168,26166196,8.809178,V180,Family history of diabetes mellitus,...,WHITE,NaT,NaT,1,10035168,F,56,2144,2008 - 2010,2147-01-07
2,10038081,20755971,29,5859,9,10038081,20755971,3.698519,5859,"Chronic kidney disease, unspecified",...,UNKNOWN,NaT,NaT,1,10038081,F,63,2115,2014 - 2016,2115-10-12
3,10038081,20755971,30,40390,9,10038081,20755971,3.698519,40390,"Hypertensive chronic kidney disease, unspecifi...",...,UNKNOWN,NaT,NaT,1,10038081,F,63,2115,2014 - 2016,2115-10-12
4,10098215,22394571,32,5859,9,10098215,22394571,12.797535,5859,"Chronic kidney disease, unspecified",...,WHITE,2118-05-03 15:58:00,2118-05-04 02:25:00,0,10098215,F,56,2117,2008 - 2010,2118-09-19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,16662264,26390489,26,25083,9,16662264,26390489,8.355405,25083,"Diabetes with other specified manifestations, ...",...,WHITE,2138-07-03 18:18:00,2138-07-03 20:52:00,0,16662264,F,47,2133,2008 - 2010,2139-07-14
996,16686303,22817956,26,25000,9,16686303,22817956,6.385868,25000,Diabetes mellitus without mention of complicat...,...,WHITE,2176-12-05 16:13:00,2176-12-05 23:25:00,0,16686303,F,76,2175,2011 - 2013,2177-05-13
997,16686303,22817956,26,25000,9,16686303,22817956,3.131898,25000,Diabetes mellitus without mention of complicat...,...,WHITE,2176-12-05 16:13:00,2176-12-05 23:25:00,0,16686303,F,76,2175,2011 - 2013,2177-05-13
998,16690709,22616266,28,28521,9,16690709,22616266,1.270613,28521,Anemia in chronic kidney disease,...,WHITE,2177-02-08 20:23:00,2177-02-09 01:00:00,0,16690709,M,79,2175,2008 - 2010,NaT


In [135]:
# icu stay for HAI patients
icu_stay_query = "SELECT subject_id, hadm_id, los FROM `physionet-data.mimiciv_icu.icustays`"
patients_with_los_cte = {}
patients_with_los_cte["ICUStay"] = icu_stay_query
patients_with_los_cte["ICDCodes"] = hai_icd_diagnoses

patients_with_los = generate_sql_with_ctes(patients_with_los_cte, get_patient_query([Type.ICUStay, Type.ICDCodes]))
run_query(patients_with_los)

Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version,subject_id_1,hadm_id_1,los,icd_code_1,long_title,...,race,edregtime,edouttime,hospital_expire_flag,subject_id_3,gender,anchor_age,anchor_year,anchor_year_group,dod
0,10723086,20456851,28,99932,9,10723086,20456851,54.843808,99932,Bloodstream infection due to central venous ca...,...,BLACK/AFRICAN,2191-09-14 23:43:00,2191-09-16 01:20:00,0,10723086,F,41,2185,2008 - 2010,2193-02-12
1,10723086,20456851,28,99932,9,10723086,20456851,5.748646,99932,Bloodstream infection due to central venous ca...,...,BLACK/AFRICAN,2191-09-14 23:43:00,2191-09-16 01:20:00,0,10723086,F,41,2185,2008 - 2010,2193-02-12
2,10947245,24307798,27,99931,9,10947245,24307798,25.729398,99931,Other and unspecified infection due to central...,...,WHITE,NaT,NaT,0,10947245,F,43,2135,2008 - 2010,NaT
3,10947245,24307798,27,99931,9,10947245,24307798,5.924722,99931,Other and unspecified infection due to central...,...,WHITE,NaT,NaT,0,10947245,F,43,2135,2008 - 2010,NaT
4,10947245,24307798,27,99931,9,10947245,24307798,3.956701,99931,Other and unspecified infection due to central...,...,WHITE,NaT,NaT,0,10947245,F,43,2135,2008 - 2010,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,11226194,24922551,2,48241,9,11226194,24922551,2.614479,48241,Methicillin susceptible pneumonia due to Staph...,...,WHITE,NaT,NaT,0,11226194,M,83,2175,2008 - 2010,NaT
996,11226194,24922551,2,48241,9,11226194,24922551,11.174988,48241,Methicillin susceptible pneumonia due to Staph...,...,WHITE,NaT,NaT,0,11226194,M,83,2175,2008 - 2010,NaT
997,11257221,28123938,2,5990,9,11257221,28123938,2.474630,5990,"Urinary tract infection, site not specified",...,WHITE,NaT,NaT,0,11257221,F,85,2144,2008 - 2010,NaT
998,11257221,28123938,2,5990,9,11257221,28123938,1.419456,5990,"Urinary tract infection, site not specified",...,WHITE,NaT,NaT,0,11257221,F,85,2144,2008 - 2010,NaT


# Other general HAI Features