In [14]:
# Import libraries
from datetime import timedelta
import os

import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from IPython.display import display, HTML, Image
%matplotlib inline

plt.style.use('ggplot')
plt.rcParams.update({'font.size': 20})

# Access data using Google BigQuery.
from google.colab import auth
from google.cloud import bigquery

# authenticate
auth.authenticate_user()

# Set up environment variables
project_id = 'eighth-arbor-396212'

os.environ["GOOGLE_CLOUD_PROJECT"] = project_id

# Read data from BigQuery into pandas dataframes.
def run_query(query, project_id=project_id):
  return pd.io.gbq.read_gbq(
      query,
      project_id=project_id,
      dialect='standard')

# set the dataset
dataset = 'mimiciv'


In [17]:
def generate_surgery_query(surgery_terms, surgery_cte_name, limit=True):
    term_conditions = " OR ".join([f"LOWER(u.long_title) LIKE '%{term}%'" for term in surgery_terms])

    cte = f"""WITH {surgery_cte_name}Procedures AS (
        SELECT icd_code, long_title
        FROM `physionet-data.mimiciv_hosp.d_icd_procedures` u
        WHERE {term_conditions}
    )"""

    main_query = f"""SELECT DISTINCT p.subject_id, p.hadm_id, long_title
    FROM `physionet-data.mimiciv_hosp.procedures_icd` p
    INNER JOIN {surgery_cte_name}Procedures u
    ON p.icd_code = u.icd_code
    {'LIMIT 1000' if limit else ''};
    """

    return cte + main_query


In [18]:
urological_terms = ["bladder", "kidney", "prostate", "penile", "penis",
                    "ureter", "urethra", "cystoscopy", "nephrectomy",
                    "lithotripsy", "renal", "orchidectomy", "orchiectomy", "vasectomy"]

urological_query = generate_surgery_query(urological_terms, "Urological")
run_query(urological_query)

Unnamed: 0,subject_id,hadm_id,long_title
0,10133075,27954510,Bilateral adrenalectomy
1,12930339,23232319,Bilateral adrenalectomy
2,13281479,22218287,Bilateral adrenalectomy
3,19715573,28578075,Bilateral adrenalectomy
4,10071129,25200387,Partial nephrectomy
...,...,...,...
995,13533354,25369547,Radical prostatectomy
996,13556533,29481103,Radical prostatectomy
997,13637689,26605817,Radical prostatectomy
998,13638768,28683724,Radical prostatectomy


In [19]:
# Terms associated with surgeries that increase the risk of VAP
pulmonaryterms = ["thoracic", "lung", "pulmonary", "tracheostomy", "lobectomy",
                  "bronchoscopy", "pneumonectomy", "thoracotomy"]

# Generate the CTE and main query for these surgeries
pulmonary_query = generate_surgery_query(pulmonaryterms, "Pulmonary")

run_query(pulmonary_query)

Unnamed: 0,subject_id,hadm_id,long_title
0,10048710,20817906,Unilateral thyroid lobectomy
1,10062807,21729211,Unilateral thyroid lobectomy
2,10069524,26951351,Unilateral thyroid lobectomy
3,10100966,25966859,Unilateral thyroid lobectomy
4,10107267,21092626,Unilateral thyroid lobectomy
...,...,...,...
995,11312346,24837066,Thoracoscopic excision of lesion or tissue of ...
996,11382484,23692452,Thoracoscopic excision of lesion or tissue of ...
997,11382484,26091681,Thoracoscopic excision of lesion or tissue of ...
998,11392794,24462363,Thoracoscopic excision of lesion or tissue of ...


In [20]:
vascular_terms = ["catheter", "central venous", "vascular access", "line placement",
                "port insertion", "jugular", "subclavian", "femoral", "venous catheter"]

vascular_query = generate_surgery_query(vascular_terms, "CLABSI")
run_query(vascular_query)

Unnamed: 0,subject_id,hadm_id,long_title
0,10017764,23125577,Ureteral catheterization
1,10028159,20401884,Ureteral catheterization
2,10036086,27288283,Ureteral catheterization
3,10080443,24427299,Ureteral catheterization
4,10083097,23498090,Ureteral catheterization
...,...,...,...
995,17700343,21806966,Insertion of intercostal catheter for drainage
996,17702558,20136761,Insertion of intercostal catheter for drainage
997,17704774,26088028,Insertion of intercostal catheter for drainage
998,17735421,23438971,Insertion of intercostal catheter for drainage
