<a href="https://colab.research.google.com/github/victormurcia/VCHAMPS/blob/main/VCHAMPS_Gauging_Patient_Sickness_During_Hospitalization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# How Sick Is Patient During Hospitalization?

In [1]:
#General utilities
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm  # Import tqdm for the progress bar
import glob,shutil,os,warnings,math,time,sys,re
from typing import List
import dask.dataframe as dd
from dask.diagnostics import ProgressBar

#For performing UTC normalization on datetime columns based on the STATE column
import pytz

#For Slider viz
import ipywidgets as widgets
from IPython.display import display, clear_output,HTML

#Enable data to be extracted and downloaded from my Google Drive
from google.colab import drive, files
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Specify the path to the desired directory
directory_path = r'/content/drive/MyDrive/VCHAMPS - Train Cleaned'

# Change the current working directory to the desired directory
os.chdir(directory_path)

# Verify the current working directory
cwd = os.getcwd()

print(f"Current working directory: {cwd}")

Current working directory: /content/drive/MyDrive/VCHAMPS - Train Cleaned


#

In [4]:
lab_measurements_df = dd.read_parquet('/content/drive/MyDrive/VCHAMPS - Train Cleaned/lab_results.parquet/*.parquet')
lab_measurements_df = lab_measurements_df.compute()
lab_measurements_df

Unnamed: 0,Internalpatientid,Age at lab test,Lab test date,Result numeric,Specimen source,desc,concept,unit,range_min,range_max
0,23511,66,2013-05-13 19:58:45,143.0,serum,ZSODIUM,na,mmol/L,136.0,145.0
1,23511,66,2013-06-21 17:21:08,144.0,serum,ZSODIUM,na,mmol/L,136.0,145.0
2,23256,51,2001-06-24 23:17:28,137.0,serum,ZSODIUM,na,mmol/L,136.0,148.0
3,23256,56,2006-05-20 02:08:17,149.0,serum,ZSODIUM,na,mmol/L,136.0,148.0
4,23256,60,2010-04-08 10:26:51,140.0,serum,ZSODIUM,na,mmol/L,136.0,148.0
...,...,...,...,...,...,...,...,...,...,...
43314,31585,66,2002-05-07 11:00:57,23.0,serum,ZZCARBON DIOXIDE,bicarb,mmol/L,22.0,30.0
43315,29243,72,2002-07-07 11:11:41,27.0,serum,ZZCARBON DIOXIDE,bicarb,mmol/L,22.0,30.0
43316,32935,71,2000-10-25 04:55:38,27.0,serum,ZZCARBON DIOXIDE,bicarb,mmol/L,22.0,30.0
43317,47851,44,1998-09-28 20:59:36,29.0,serum,ZZCARBON DIOXIDE,bicarb,mmol/L,22.0,30.0


In [5]:
lab_measurements_df['concept'].value_counts()

k                5772567
na               5736600
bicarb           5458451
cr               5035076
hct              3489755
hgb              3403578
wbc              3170735
alt              3049424
ast              2948191
tbili            2906994
a1c              1178708
ldh               354544
ferritin          312719
tropi             305075
inr               268860
lactate           122198
esr               112077
bnp               111472
crp                70861
pco2               38963
gfr                35403
tropt              28484
hscrp              27539
ntprobnp           27439
ddimer             26599
ph                 12746
trophs              7879
methadone_lvl        242
cocaine_lvl            6
Name: concept, dtype: int64

# Heart BNP Up

In [6]:
bnp_df = lab_measurements_df[lab_measurements_df['concept'].isin(['bnp','ntprobnp'])]
bnp_df

Unnamed: 0,Internalpatientid,Age at lab test,Lab test date,Result numeric,Specimen source,desc,concept,unit,range_min,range_max
9095,5293,85,2012-04-05 16:35:10,224.000000,blood*,"zBNP, TOTAL (DC 1-12)",bnp,pg/mL,0.0,100.0
9096,100490,77,2003-09-16 01:58:17,139.000000,blood*,"zBNP, TOTAL (DC 1-12)",bnp,pg/mL,0.0,100.0
9097,100490,79,2005-08-30 14:07:41,242.000000,blood*,"zBNP, TOTAL (DC 1-12)",bnp,pg/mL,0.0,100.0
9098,5293,85,2012-04-24 23:29:58,104.000000,blood*,"zBNP, TOTAL (DC 1-12)",bnp,pg/mL,0.0,100.0
9099,102655,65,2006-04-02 01:46:43,130.000000,blood*,"zBNP, TOTAL (DC 1-12)",bnp,pg/mL,0.0,100.0
...,...,...,...,...,...,...,...,...,...,...
158162,50764,76,2015-03-21 02:32:57,261.000000,serum,BNP-NT-pro,ntprobnp,pg/mL,0.0,125.0
158163,27252,88,2013-01-20 06:01:58,2058.532412,serum,BNP-NT-pro,ntprobnp,pg/mL,0.0,900.0
158164,166455,84,2006-10-12 02:03:24,1672.000000,serum,BNP-NT-pro,ntprobnp,pg/mL,0.0,900.0
158165,25401,69,2016-01-28 11:46:02,3392.000000,serum,BNP-NT-pro,ntprobnp,pg/mL,0.0,125.0


In [7]:
bnp_df['heart_bnp_up'] = np.where(
    (bnp_df['Result numeric'] > 400) |
    ((bnp_df['Result numeric'] > 450) & (bnp_df['Age at lab test'] <= 50)) |
    ((bnp_df['Result numeric'] > 900) & (bnp_df['Age at lab test'] > 50) & (bnp_df['Age at lab test'] <= 75)) |
    ((bnp_df['Result numeric'] > 1800) & (bnp_df['Age at lab test'] > 75)),
    1,  # Value if the condition is true
    0   # Value if the condition is false
)
bnp_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bnp_df['heart_bnp_up'] = np.where(


Unnamed: 0,Internalpatientid,Age at lab test,Lab test date,Result numeric,Specimen source,desc,concept,unit,range_min,range_max,heart_bnp_up
9095,5293,85,2012-04-05 16:35:10,224.000000,blood*,"zBNP, TOTAL (DC 1-12)",bnp,pg/mL,0.0,100.0,0
9096,100490,77,2003-09-16 01:58:17,139.000000,blood*,"zBNP, TOTAL (DC 1-12)",bnp,pg/mL,0.0,100.0,0
9097,100490,79,2005-08-30 14:07:41,242.000000,blood*,"zBNP, TOTAL (DC 1-12)",bnp,pg/mL,0.0,100.0,0
9098,5293,85,2012-04-24 23:29:58,104.000000,blood*,"zBNP, TOTAL (DC 1-12)",bnp,pg/mL,0.0,100.0,0
9099,102655,65,2006-04-02 01:46:43,130.000000,blood*,"zBNP, TOTAL (DC 1-12)",bnp,pg/mL,0.0,100.0,0
...,...,...,...,...,...,...,...,...,...,...,...
158162,50764,76,2015-03-21 02:32:57,261.000000,serum,BNP-NT-pro,ntprobnp,pg/mL,0.0,125.0,0
158163,27252,88,2013-01-20 06:01:58,2058.532412,serum,BNP-NT-pro,ntprobnp,pg/mL,0.0,900.0,1
158164,166455,84,2006-10-12 02:03:24,1672.000000,serum,BNP-NT-pro,ntprobnp,pg/mL,0.0,900.0,1
158165,25401,69,2016-01-28 11:46:02,3392.000000,serum,BNP-NT-pro,ntprobnp,pg/mL,0.0,125.0,1


Merge with labs

In [34]:
columns_to_merge = ['Internalpatientid', 'Lab test date','concept','heart_bnp_up']
merged_df = lab_measurements_df.merge(bnp_df[columns_to_merge], on=['Internalpatientid', 'Lab test date', 'concept'], how='outer')
merged_df

Unnamed: 0,Internalpatientid,Age at lab test,Lab test date,Result numeric,Specimen source,desc,concept,unit,range_min,range_max,heart_bnp_up
0,23511,66,2013-05-13 19:58:45,143.0,serum,ZSODIUM,na,mmol/L,136.0,145.0,
1,23511,66,2013-06-21 17:21:08,144.0,serum,ZSODIUM,na,mmol/L,136.0,145.0,
2,23256,51,2001-06-24 23:17:28,137.0,serum,ZSODIUM,na,mmol/L,136.0,148.0,
3,23256,56,2006-05-20 02:08:17,149.0,serum,ZSODIUM,na,mmol/L,136.0,148.0,
4,23256,60,2010-04-08 10:26:51,140.0,serum,ZSODIUM,na,mmol/L,136.0,148.0,
...,...,...,...,...,...,...,...,...,...,...,...
44013180,31585,66,2002-05-07 11:00:57,23.0,serum,ZZCARBON DIOXIDE,bicarb,mmol/L,22.0,30.0,
44013181,29243,72,2002-07-07 11:11:41,27.0,serum,ZZCARBON DIOXIDE,bicarb,mmol/L,22.0,30.0,
44013182,32935,71,2000-10-25 04:55:38,27.0,serum,ZZCARBON DIOXIDE,bicarb,mmol/L,22.0,30.0,
44013183,47851,44,1998-09-28 20:59:36,29.0,serum,ZZCARBON DIOXIDE,bicarb,mmol/L,22.0,30.0,


# Heart Trop High

In [10]:
trop_df = lab_measurements_df[lab_measurements_df['concept'].isin(['tropi', 'tropt', 'trophs'])]
trop_df

Unnamed: 0,Internalpatientid,Age at lab test,Lab test date,Result numeric,Specimen source,desc,concept,unit,range_min,range_max
24565,9382,85,2022-07-20 14:07:30,0.000000,blood,POC TROPONIN,tropi,ng/mL,0.0,0.08
24566,9382,85,2022-08-17 00:17:07,0.000000,blood,POC TROPONIN,tropi,ng/mL,0.0,0.08
24567,24261,71,2020-05-22 21:50:12,0.020688,blood,POC TROPONIN,tropi,ng/mL,0.0,0.08
24568,22578,80,2019-09-21 08:06:14,0.009667,blood,POC TROPONIN,tropi,ng/mL,0.0,0.08
24569,161952,68,2011-10-18 22:39:36,0.426903,blood,POC TROPONIN,tropi,ng/mL,0.0,1.10
...,...,...,...,...,...,...,...,...,...,...
37032,44677,57,2006-07-06 00:08:22,3.633805,plasma,ZZTROPONIN-I TURBO (DC 10/08),tropi,NG/ML,0.0,0.50
37033,40332,79,2001-10-04 17:12:30,0.909189,plasma,ZZTROPONIN-I TURBO (DC 10/08),tropi,NG/ML,0.0,0.50
37034,41906,88,2004-04-27 23:29:32,0.944372,plasma,ZZTROPONIN-I TURBO (DC 10/08),tropi,NG/ML,0.0,0.50
37035,36737,66,2003-12-24 07:32:24,0.530800,plasma,ZZTROPONIN-I TURBO (DC 10/08),tropi,NG/ML,0.0,0.50


In [24]:
tropi_df  = trop_df[trop_df['concept'] == 'tropi']
tropt_df  = trop_df[trop_df['concept'] == 'tropt']
trophs_df = trop_df[trop_df['concept'] == 'trophs']

tropi_df['Lab test date'] = tropi_df['Lab test date'].dt.strftime('%Y-%m-%d')
tropt_df['Lab test date'] = tropt_df['Lab test date'].dt.strftime('%Y-%m-%d')
trophs_df['Lab test date'] = trophs_df['Lab test date'].dt.strftime('%Y-%m-%d')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tropi_df['Lab test date'] = tropi_df['Lab test date'].dt.strftime('%Y-%m-%d')


In [27]:
tropi_grouped_df  =  tropi_df.groupby(['Internalpatientid', 'Lab test date']).agg({'Result numeric': 'mean'})
tropt_grouped_df  =  tropt_df.groupby(['Internalpatientid', 'Lab test date']).agg({'Result numeric': 'mean'})
trophs_grouped_df =  trophs_df.groupby(['Internalpatientid', 'Lab test date']).agg({'Result numeric': 'mean'})

In [28]:
merged_df['Lab test date'] = merged_df['Lab test date'].dt.strftime('%Y-%m-%d')
merged_df

Unnamed: 0,Internalpatientid,Age at lab test_x,Lab test date,Result numeric_x,Specimen source_x,desc_x,concept,unit_x,range_min_x,range_max_x,Age at lab test_y,Result numeric_y,Specimen source_y,desc_y,unit_y,range_min_y,range_max_y,heart_bnp_up
0,23511,66,2013-05-13,143.0,serum,ZSODIUM,na,mmol/L,136.0,145.0,,,,,,,,
1,23511,66,2013-06-21,144.0,serum,ZSODIUM,na,mmol/L,136.0,145.0,,,,,,,,
2,23256,51,2001-06-24,137.0,serum,ZSODIUM,na,mmol/L,136.0,148.0,,,,,,,,
3,23256,56,2006-05-20,149.0,serum,ZSODIUM,na,mmol/L,136.0,148.0,,,,,,,,
4,23256,60,2010-04-08,140.0,serum,ZSODIUM,na,mmol/L,136.0,148.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44013180,31585,66,2002-05-07,23.0,serum,ZZCARBON DIOXIDE,bicarb,mmol/L,22.0,30.0,,,,,,,,
44013181,29243,72,2002-07-07,27.0,serum,ZZCARBON DIOXIDE,bicarb,mmol/L,22.0,30.0,,,,,,,,
44013182,32935,71,2000-10-25,27.0,serum,ZZCARBON DIOXIDE,bicarb,mmol/L,22.0,30.0,,,,,,,,
44013183,47851,44,1998-09-28,29.0,serum,ZZCARBON DIOXIDE,bicarb,mmol/L,22.0,30.0,,,,,,,,
