In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import json
import pyCLIF
from datetime import timedelta
import pyarrow
import waterfall
import warnings
warnings.filterwarnings('ignore')

✅ Loaded configuration from config.json


In [2]:
clif_c2d2_mapping = pd.read_excel('../mapping/ccm-53-e1045-s002.xlsx')
cohort = pd.read_parquet('../output/final/c2d2_cohort.parquet')

## 🗺️ CLIF Meds to C2D2 Mapping


In [3]:
mapper = clif_c2d2_mapping[(clif_c2d2_mapping['Domain']=='Medications') & (clif_c2d2_mapping['Sub-domain']=='Vasopressor/Inotropes')]
mapper

Unnamed: 0,Domain,Sub-domain,Concept,Common Data Element,CLIF Table,CLIF mCIDE Crosswalk column,CLIF version,mCIDE,Definition,Coding,...,SOFA,PSOFA,nSOFA,PRISM III,PIM3,Charlson CI,MRC-ICU,SOI count,No SOI score,Potential EHR Datasource
194,Medications,Vasopressor/Inotropes,Dopamine - Max rate,icu_24hr_infusion_dopamine_max,medication admin continuous,med_category,CLIF-2.1.0 (Live),dopamine,Dopamine maximum infusion rate during first 24...,Units: mcg/kg/min,...,1.0,1.0,1.0,,,,,3,,"medication administration record, flowsheet"
195,Medications,Vasopressor/Inotropes,Dobutamine - Max rate,icu_24hr_infusion_dobutamine_max,medication admin continuous,med_category,CLIF-2.1.0 (Live),dobutamine,Dobutamine maximum infusion rate during first ...,Units: mcg/kg/min,...,1.0,1.0,1.0,,,,,3,,"medication administration record, flowsheet"
196,Medications,Vasopressor/Inotropes,Norepinephrine - Max rate,icu_24hr_infusion_norepinephrine_max,medication admin continuous,med_category,CLIF-2.1.0 (Live),norepinephrine,Norepinephrine maximum infusion rate during fi...,Units: mcg/kg/min,...,1.0,1.0,1.0,,,,,3,,"medication administration record, flowsheet"
197,Medications,Vasopressor/Inotropes,Epinephrine - Max rate,icu_24hr_infusion_epinephrine_max,medication admin continuous,med_category,CLIF-2.1.0 (Live),epinephrine,Epinephrine maximum infusion rate during first...,Units: mcg/kg/min,...,1.0,1.0,1.0,,,,,3,,"medication administration record, flowsheet"


In [4]:
meds_required_columns = [
    'hospitalization_id',
    'admin_dttm',
    'med_name',
    'med_category',
    'med_dose',
    'med_dose_unit'
]
meds_of_interest = [
    'norepinephrine'
]

meds_filters = {
    'hospitalization_id': cohort['hospitalization_id'].unique().tolist(),
    'med_category': meds_of_interest
}
meds = pyCLIF.load_data('clif_medication_admin_continuous', columns=meds_required_columns, filters=meds_filters)

meds['med_dose_unit'] = meds['med_dose_unit'].str.lower()
meds = pyCLIF.convert_datetime_columns_to_site_tz(meds,  pyCLIF.helper['timezone'])
meds['med_dose'] = pd.to_numeric(meds['med_dose'], errors='coerce')

Data loaded successfully from C:/Users/vchaudha/Downloads/rush_parquet_2\clif_medication_admin_continuous.parquet




In [5]:
vitals_required_columns = [
    'hospitalization_id',
    'recorded_dttm',
    'vital_category',
    'vital_value'
]

vitals_of_interest = ['weight_kg'] 
vitals_cohort = pyCLIF.load_data('clif_vitals',
    columns=vitals_required_columns,
    filters={'hospitalization_id': cohort['hospitalization_id'].unique().tolist(), 
             'vital_category': vitals_of_interest}
)
vitals_cohort = pyCLIF.convert_datetime_columns_to_site_tz(vitals_cohort, pyCLIF.helper['timezone'])

# Sort by hospitalization_id and recorded_dttm
vitals_cohort = vitals_cohort.sort_values(by=['hospitalization_id', 'recorded_dttm'])

# Drop null vital_value, then take first per group
weights = vitals_cohort[vitals_cohort['vital_value'].notnull()] \
    .groupby('hospitalization_id', as_index=False) \
    .first()

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Data loaded successfully from C:/Users/vchaudha/Downloads/rush_parquet_2\clif_vitals.parquet


In [6]:
# Select relevant columns and drop duplicates
cohort_trimmed = cohort[['hospitalization_id', '24hr_icu_in_dttm', '24hr_icu_out_dttm']].drop_duplicates()

# Perform inner join on 'hospitalization_id'
meds_joined_df = pd.merge(cohort_trimmed, meds, on='hospitalization_id', how='inner')

In [7]:
# Filter rows where recorded_dttm is within the ICU 24hr window
meds_filtered_df = meds_joined_df[
    (meds_joined_df['admin_dttm'] >= meds_joined_df['24hr_icu_in_dttm']) &
    (meds_joined_df['admin_dttm'] <= meds_joined_df['24hr_icu_out_dttm'])
]

# join weight for unit conversion of meds
meds_weight_filtered_df = meds_filtered_df.merge(
    weights,
    on='hospitalization_id',
    how='left',
    suffixes=('', '_weight')  # Optional: helps avoid column name clashes
)

meds_weight_filtered_df['vital_value'].fillna(60, inplace=True)

# Only convert if med_dose_unit is 'mcg/min' and weight is available
meds_weight_filtered_df['dose_mcg_per_kg_min'] = meds_weight_filtered_df.apply(
    lambda row: row['med_dose'] / row['vital_value']
    if row['med_dose_unit'] == 'mcg/min' and pd.notnull(row['vital_value']) and pd.notnull(row['med_dose'])
    else row['med_dose'],
    axis=1
)

In [8]:
meds_agg_df = (
    meds_weight_filtered_df
    .groupby(['hospitalization_id', 'med_category'])['dose_mcg_per_kg_min']
    .agg([ 'max'])
    .unstack(level='med_category')  # Pivot vital_category into columns
)
# Rename columns to format: vital_category_min, vital_category_max
meds_agg_df.columns = [f'icu_24hr_infusion_{vital}_{stat}' for stat, vital in meds_agg_df.columns]

In [9]:
meds_agg_df.reset_index().to_parquet('../output/final/c2d2_medications.parquet',index=False)