In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import json
import pyCLIF
from datetime import timedelta
import pyarrow
import waterfall
import warnings
warnings.filterwarnings('ignore')

✅ Loaded configuration from config.json


In [2]:
clif_c2d2_mapping = pd.read_excel('../mapping/ccm-53-e1045-s002.xlsx')
cohort = pd.read_parquet('../output/final/c2d2_cohort.parquet')

## 🗺️ CLIF Respiratory Support to C2D2 Mapping

In [3]:
mapper = clif_c2d2_mapping[(clif_c2d2_mapping['Domain']=='Interventions') & (clif_c2d2_mapping['Sub-domain']=='Respiratory Support')]
mapper.head()

Unnamed: 0,Domain,Sub-domain,Concept,Common Data Element,CLIF Table,CLIF mCIDE Crosswalk column,CLIF version,mCIDE,Definition,Coding,...,SOFA,PSOFA,nSOFA,PRISM III,PIM3,Charlson CI,MRC-ICU,SOI count,No SOI score,Potential EHR Datasource
157,Interventions,Respiratory Support,Invasive mechanical ventilation,icu_any_imv,respiratory support,device_category,CLIF-2.1.0 (Live),IMV,Whether a patient underwent an intervention in...,Yes; No; Unknown,...,1.0,1.0,1.0,,1.0,,,4,,"Problem list, ICD10 code, flowsheet, orders"
158,Interventions,Respiratory Support,IMV Duration,icu_imv_days,"Respiratory Support,ADT",device_category,CLIF-2.1.0 (Live),calculated,Total number of days that a patient underwent ...,[Integer]\nUnits: days,...,,,,,,,,0,1.0,"Problem list, ICD10 code, flowsheet, orders"
159,Interventions,Respiratory Support,Other oxygen support,icu_any_oxygen_support,"Respiratory Support,ADT",device_category,CLIF-2.1.0 (Live),"NIPPV, CPAP, High Flow NC, Face Mask, Trach Co...",Whether a patient underwent an intervention in...,"[Check all that apply] \nOxygen therapy, C-PAP...",...,,,,,,,1.0,1,,"Problem list, ICD10 code, flowsheet"


In [4]:
clif_respiratory_support= ['hospitalization_id','recorded_dttm','device_category']

rst_cohort = pyCLIF.load_data('clif_respiratory_support',
    columns=clif_respiratory_support,
    filters={'hospitalization_id': cohort['hospitalization_id'].unique().tolist()}
)
rst_cohort = pyCLIF.convert_datetime_columns_to_site_tz(rst_cohort, pyCLIF.helper['timezone'])

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Data loaded successfully from C:/Users/vchaudha/Downloads/rush_parquet_2\clif_respiratory_support.parquet




In [5]:
# Select relevant columns and drop duplicates
cohort_trimmed = cohort[['hospitalization_id', '24hr_icu_in_dttm', '24hr_icu_out_dttm']].drop_duplicates()

# Perform inner join on 'hospitalization_id'
rst_joined_df = pd.merge(cohort_trimmed, rst_cohort, on='hospitalization_id', how='inner')

In [6]:
# Filter rows where recorded_dttm is within the ICU 24hr window
# c2d2 : 'icu_any_imv'
rst_filtered_df = rst_joined_df[
 (rst_joined_df['recorded_dttm'] >= rst_joined_df['24hr_icu_in_dttm']) &
    (rst_joined_df['recorded_dttm'] <= rst_joined_df['24hr_icu_out_dttm']) &
    (rst_joined_df['device_category'].str.lower() == 'imv')
][['hospitalization_id']].drop_duplicates()

rst_filtered_df['icu_any_imv'] = 'yes'

In [7]:
rst_filtered_df.to_parquet('../output/final/c2d2_interventions_rst.parquet',index=False)

## 🗺️ CLIF CRRT to C2D2 Mapping

In [8]:
crrt_cohort = pyCLIF.load_data('clif_crrt_therapy',
    filters={'hospitalization_id': cohort['hospitalization_id'].unique().tolist()}
)
crrt_cohort = pyCLIF.convert_datetime_columns_to_site_tz(crrt_cohort, pyCLIF.helper['timezone'])

Data loaded successfully from C:/Users/vchaudha/Downloads/rush_parquet_2\clif_crrt_therapy.parquet


In [9]:
crrt_cohort = crrt_cohort[['hospitalization_id']].drop_duplicates()
crrt_cohort['icu_any_crrt'] = 'yes'

In [10]:
crrt_cohort.to_parquet('../output/final/c2d2_interventions_crrt.parquet',index=False)