CLABSI

In [None]:
# Import necessary libraries
import os
import pandas as pd
import numpy as np
from datetime import timedelta
import matplotlib.pyplot as plt
import seaborn as sns



# Define the CLABSI-specific lab tests you want to analyze
clabsi_lab_tests = ['WBC', 'Platelet Count', 'Lactate']

# Query to fetch CLABSI-related Lab Results
query = f"""
SELECT
    le.stay_id,
    le.charttime,
    le.lab_name,
    le.lab_value,
    le.lab_units
FROM
    `{project_id}.{dataset}.lab_events` AS le
INNER JOIN
    `{project_id}.{dataset}.clabsi_diagnoses` AS clabsi
ON
    le.stay_id = clabsi.stay_id
WHERE
    le.lab_name IN ({','.join(["'" + test + "'" for test in clabsi_lab_tests])})
"""

# Load data from BigQuery
clabsi_lab_results = pd.read_gbq(query, dialect='standard')

# Data cleaning and preprocessing
clabsi_lab_results['charttime'] = pd.to_datetime(clabsi_lab_results['charttime'])
clabsi_lab_results = clabsi_lab_results.dropna()

# Create a pivot table for lab results
lab_results_pivot = clabsi_lab_results.pivot_table(index='charttime', columns='lab_name', values='lab_value')

# Calculate days before CLABSI diagnosis
clabsi_diagnoses = pd.read_gbq(f"SELECT stay_id, clabsi_datetime FROM `{project_id}.{dataset}.clabsi_diagnoses`")
clabsi_diagnoses['clabsi_datetime'] = pd.to_datetime(clabsi_diagnoses['clabsi_datetime'])

lab_results_pivot['days_before_clabsi'] = np.nan
for stay_id, clabsi_time in zip(clabsi_diagnoses['stay_id'], clabsi_diagnoses['clabsi_datetime']):
    if stay_id in lab_results_pivot.index:
        lab_results_pivot.at[stay_id, 'days_before_clabsi'] = (clabsi_time - lab_results_pivot.at[stay_id, 'clabsi_datetime']).days

# Analyze and visualize Lab Results
sns.set_style("whitegrid")

for lab_test in clabsi_lab_tests:
    plt.figure(figsize=(12, 6))
    sns.lineplot(data=lab_results_pivot, x='days_before_clabsi', y=lab_test)
    plt.title(f'Trends in {lab_test} before CLABSI Diagnosis')
    plt.xlabel('Days Before CLABSI Diagnosis')
    plt.ylabel(lab_test)
    plt.show()


VAP

In [None]:

# Define the VAP-specific lab tests you want to analyze
vap_lab_tests = ['White Blood Cells', 'CRP', 'Procalcitonin']

# Query to fetch VAP-related Lab Results
query = f"""
SELECT
    le.stay_id,
    le.charttime,
    le.lab_name,
    le.lab_value,
    le.lab_units
FROM
    `{project_id}.{dataset}.lab_events` AS le
INNER JOIN
    `{project_id}.{dataset}.vap_diagnoses` AS vap
ON
    le.stay_id = vap.stay_id
WHERE
    le.lab_name IN ({','.join(["'" + test + "'" for test in vap_lab_tests])})
"""

# Load data from BigQuery
vap_lab_results = pd.read_gbq(query, dialect='standard')

# Data cleaning and preprocessing
vap_lab_results['charttime'] = pd.to_datetime(vap_lab_results['charttime'])
vap_lab_results = vap_lab_results.dropna()

# Create a pivot table for lab results
lab_results_pivot = vap_lab_results.pivot_table(index='charttime', columns='lab_name', values='lab_value')

# Calculate days before VAP diagnosis
vap_diagnoses = pd.read_gbq(f"SELECT stay_id, vap_datetime FROM `{project_id}.{dataset}.vap_diagnoses`")
vap_diagnoses['vap_datetime'] = pd.to_datetime(vap_diagnoses['vap_datetime'])

lab_results_pivot['days_before_vap'] = np.nan
for stay_id, vap_time in zip(vap_diagnoses['stay_id'], vap_diagnoses['vap_datetime']):
    if stay_id in lab_results_pivot.index:
        lab_results_pivot.at[stay_id, 'days_before_vap'] = (vap_time - lab_results_pivot.at[stay_id, 'charttime']).days

# Analyze and visualize Lab Results
sns.set_style("whitegrid")

for lab_test in vap_lab_tests:
    plt.figure(figsize=(12, 6))
    sns.lineplot(data=lab_results_pivot, x='days_before_vap', y=lab_test)
    plt.title(f'Trends in {lab_test} before VAP Diagnosis')
    plt.xlabel('Days Before VAP Diagnosis')
    plt.ylabel(lab_test)
    plt.show()



CAUTI

In [None]:


# Define CAUTI-specific lab tests of interest
cauti_lab_tests = [
    'LabTest1',
    'LabTest2',
    'LabTest3'
]

# Construct a BigQuery SQL query to retrieve CAUTI-related lab results
query = f"""
SELECT
    le.subject_id,
    le.charttime,
    le.itemid,
    le.label,
    le.value,
    le.valueuom
FROM
    `{project_id}.{dataset_name}.labevents` AS le
JOIN
    `{project_id}.{dataset_name}.cauti_diagnoses` AS cauti
ON
    le.subject_id = cauti.subject_id
WHERE
    le.label IN ({','.join(["'" + test + "'" for test in cauti_lab_tests])})
"""

# Retrieve data from BigQuery
client = bigquery.Client()
cauti_lab_results = client.query(query).to_dataframe()

# Data cleaning and preprocessing
cauti_lab_results['charttime'] = pd.to_datetime(cauti_lab_results['charttime'])
cauti_lab_results = cauti_lab_results.dropna()

# Data analysis and visualization
sns.set_style("whitegrid")

for lab_test in cauti_lab_tests:
    plt.figure(figsize=(12, 6))
    sns.lineplot(data=cauti_lab_results, x='charttime', y='value', hue='label')
    plt.title(f'{lab_test} Trends in CAUTI Patients')
    plt.xlabel('Date')
    plt.ylabel(lab_test)
    plt.show()


# Customized data analysis for CAUTI LAB RESULTS

# Example: Calculate summary statistics
lab_summary_stats = cauti_lab_results.groupby('label')['value'].describe()

# Example: Perform a statistical test
from scipy.stats import ttest_ind
test_group_1 = cauti_lab_results[cauti_lab_results['label'] == 'LabTest1']['value']
test_group_2 = cauti_lab_results[cauti_lab_results['label'] == 'LabTest2']['value']
t_stat, p_value = ttest_ind(test_group_1, test_group_2)

print(f'T-Test Results for LabTest1 and LabTest2: T-Stat = {t_stat}, P-Value = {p_value}')
