# Extracting Blood vital features
## Blood vitals are identified by following itemid in CHARTEVENTS
> HeartRate: 211,220045 <br>
> ManualBP : 442,224167 <br>
> respiration: 228234  <br>
> Oxygen Saturation: 228232  <br>
> Blood temperature: 226329

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Read Data

In [17]:
chartDF = pd.read_csv('../filteredChartData.csv',parse_dates=['charttime'])
patientDetailsDF = pd.read_csv('../PatientDetailsWithMaxDate.csv',parse_dates=['maxDate'])

itemId2Label = {211:"heartRate",220045:"heartRate",442:"manualBP",224167:"manualBP",228234:"respiration",
               228232:"oxygenSaturation",226329:"bloodTemperature"}

chartDF['label'] = chartDF.itemid.map(itemId2Label)
chartDF['chartDate'] = pd.to_datetime(chartDF.charttime.dt.date)
# chartDF['chartDate'] = pd.to_datetime(chartDF.charttime).dt.date
chartDF.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,itemid,charttime,storetime,cgid,value,valuenum,valueuom,warning,error,resultstatus,stopped,label
0,803,36,165660,241249.0,220045,2134-05-12 13:00:00,2134-05-12 13:55:00,17525.0,86,86.0,bpm,0.0,0.0,,,heartRate
1,816,36,165660,241249.0,220045,2134-05-12 14:00:00,2134-05-12 14:39:00,17525.0,85,85.0,bpm,0.0,0.0,,,heartRate
2,829,36,165660,241249.0,220045,2134-05-12 15:00:00,2134-05-12 16:50:00,17525.0,87,87.0,bpm,0.0,0.0,,,heartRate
3,848,36,165660,241249.0,220045,2134-05-12 16:00:00,2134-05-12 16:50:00,17525.0,91,91.0,bpm,0.0,0.0,,,heartRate
4,350,34,144319,290505.0,220045,2191-02-23 07:34:00,2191-02-23 10:53:00,17741.0,44,44.0,bpm,0.0,0.0,,,heartRate


In [24]:
### merge all the data
df = chartDF.merge(patientDetailsDF)
df.shape

(8009783, 24)

## Generate features on lastButOneDay

In [56]:
df['lastDay'] = df.maxDate
df.loc[df.angus==1,'lastDay'] = df.loc[df.angus==1,'lastDay'] - pd.Timedelta("1 days")

lastDayFeatures = df.loc[df.chartDate==df.lastDay].groupby(['subject_id','label'],as_index=False).agg({'valuenum':[np.max,np.median,np.mean,np.std,len]})
lastDayFeatures.columns = [x[0] if x[1]=='' else x[0]+'_'+x[1] for x in lastDayFeatures.columns]
lastDayFeatures=lastDayFeatures.pivot_table(index='subject_id',columns='label',values=['valuenum_median', 'valuenum_mean', 'valuenum_std',
       'valuenum_len']).reset_index()
lastDayFeatures.fillna(0,inplace=True)
lastDayFeatures.columns = [y[0] if y[1]=='' else 'lastDay_'+y[0]+'_'+y[1] for y in lastDayFeatures.columns]

print(lastDayFeatures.shape)
print(lastDayFeatures.head())

(8019, 19)
   subject_id  lastDay_valuenum_len_bloodTemperature  \
0           5                                    0.0   
1           9                                    0.0   
2          10                                    0.0   
3          16                                    0.0   
4          22                                    0.0   

   lastDay_valuenum_len_heartRate  lastDay_valuenum_len_manualBP  \
0                             1.0                            0.0   
1                            16.0                            0.0   
2                            12.0                            0.0   
3                             1.0                            0.0   
4                             5.0                            0.0   

   lastDay_valuenum_len_oxygenSaturation  lastDay_valuenum_len_respiration  \
0                                    0.0                               0.0   
1                                    0.0                               0.0   
2        

## Generate weekly Features

In [57]:
df['lastWeekDay'] = df['lastDay'] - pd.Timedelta("8 days")

lastWeekDF = df.loc[(df.chartDate < df.lastDay) & (df.chartDate >= df.lastWeekDay)].groupby(['subject_id','label'],as_index=False).agg({'valuenum':[np.max,np.median,np.mean,np.std,len]})
lastWeekDF.columns = [x[0] if x[1]=='' else x[0]+'_'+x[1] for x in lastWeekDF.columns]
lastWeekDF=lastWeekDF.pivot_table(index='subject_id',columns='label',values=['valuenum_median', 'valuenum_mean', 'valuenum_std',
       'valuenum_len']).reset_index()
lastWeekDF.fillna(0,inplace=True)
lastWeekDF.columns = [y[0] if y[1]=='' else 'week_'+y[0]+'_'+y[1] for y in lastWeekDF.columns]
print(lastWeekDF.shape)
print(lastWeekDF.head())

(19130, 21)
   subject_id  week_valuenum_len_bloodTemperature  \
0           2                                 0.0   
1           7                                 0.0   
2           8                                 0.0   
3           9                                 0.0   
4          10                                 0.0   

   week_valuenum_len_heartRate  week_valuenum_len_manualBP  \
0                          4.0                         0.0   
1                          3.0                         0.0   
2                         28.0                         0.0   
3                        177.0                         0.0   
4                        187.0                         0.0   

   week_valuenum_len_oxygenSaturation  week_valuenum_len_respiration  \
0                                 0.0                            0.0   
1                                 0.0                            0.0   
2                                 0.0                            0.0   
3       

## Generate LastMonth Features

In [58]:
df['lastMonth'] = df['lastDay'] - pd.Timedelta("31 days")

lastMonth = df.loc[(df.chartDate < df.lastWeekDay) & (df.chartDate >= df.lastMonth)].groupby(['subject_id','label'],as_index=False).agg({'valuenum':[np.max,np.median,np.mean,np.std,len]})
lastMonth.columns = [x[0] if x[1]=='' else x[0]+'_'+x[1] for x in lastMonth.columns]
lastMonth=lastMonth.pivot_table(index='subject_id',columns='label',values=['valuenum_median', 'valuenum_mean', 'valuenum_std',
       'valuenum_len']).reset_index()
lastMonth.fillna(0,inplace=True)
lastMonth.columns = [y[0] if y[1]=='' else 'month_'+y[0]+'_'+y[1] for y in lastMonth.columns]
print(lastMonth.shape)
print(lastMonth.head())

(6341, 21)
   subject_id  month_valuenum_len_bloodTemperature  \
0          12                                  0.0   
1          20                                  0.0   
2          32                                  0.0   
3          35                                  0.0   
4          39                                  0.0   

   month_valuenum_len_heartRate  month_valuenum_len_manualBP  \
0                         130.0                          0.0   
1                          41.0                          0.0   
2                         136.0                          0.0   
3                          61.0                          0.0   
4                          25.0                          0.0   

   month_valuenum_len_oxygenSaturation  month_valuenum_len_respiration  \
0                                  0.0                             0.0   
1                                  0.0                             0.0   
2                                  0.0                   

## features over the history

In [59]:
historyFeatures = df.groupby(['subject_id','label'],as_index=False).agg({'valuenum':[np.max,np.median,np.mean,np.std,len]})
historyFeatures.columns = [x[0] if x[1]=='' else x[0]+'_'+x[1] for x in historyFeatures.columns]
historyFeatures=historyFeatures.pivot_table(index='subject_id',columns='label',values=['valuenum_median', 'valuenum_mean', 'valuenum_std',
       'valuenum_len']).reset_index()
historyFeatures.fillna(0,inplace=True)
historyFeatures.columns = [y[0] if y[1]=='' else 'history_'+y[0]+'_'+y[1] for y in historyFeatures.columns]
print(historyFeatures.shape)
print(historyFeatures.head())


(45966, 21)
   subject_id  history_valuenum_len_bloodTemperature  \
0           2                                    0.0   
1           3                                    0.0   
2           4                                    0.0   
3           5                                    0.0   
4           6                                    0.0   

   history_valuenum_len_heartRate  history_valuenum_len_manualBP  \
0                             4.0                            0.0   
1                           170.0                            0.0   
2                            31.0                            0.0   
3                             1.0                            0.0   
4                            91.0                            0.0   

   history_valuenum_len_oxygenSaturation  history_valuenum_len_respiration  \
0                                    0.0                               0.0   
1                                    0.0                               0.0   
2       

## Merge Features

In [63]:
output = patientDetailsDF.merge(lastDayFeatures,how="left")
print(output.shape)
output = output.merge(lastWeekDF,how="left")
print(output.shape)
output = output.merge(lastMonth,how="left")
print(output.shape)
output = output.merge(historyFeatures,how="left")
print(output.shape)

(46494, 26)
(46494, 46)
(46494, 66)
(46494, 86)


In [67]:
output.fillna(0,inplace=True)
output.head()

Unnamed: 0,subject_id,dob,gender,first_admittime,first_admit_age,age_group,angus,maxDate,lastDay_valuenum_len_bloodTemperature,lastDay_valuenum_len_heartRate,...,history_valuenum_median_bloodTemperature,history_valuenum_median_heartRate,history_valuenum_median_manualBP,history_valuenum_median_oxygenSaturation,history_valuenum_median_respiration,history_valuenum_std_bloodTemperature,history_valuenum_std_heartRate,history_valuenum_std_manualBP,history_valuenum_std_oxygenSaturation,history_valuenum_std_respiration
0,3,4/11/2025 0:00,M,10/20/2101 19:08,76.52,adult,1,2101-10-20,0.0,0.0,...,0.0,87.5,0.0,0.0,0.0,0.0,19.908356,0.0,0.0,0.0
1,21,4/4/2047 0:00,M,9/11/2134 12:17,87.44,adult,1,2134-09-11,0.0,0.0,...,0.0,75.0,0.0,0.0,0.0,0.0,13.281448,0.0,0.0,0.0
2,31,5/17/2036 0:00,M,8/22/2108 23:27,72.26,adult,1,2108-08-22,0.0,0.0,...,0.0,52.0,0.0,0.0,0.0,0.0,8.096154,0.0,0.0,0.0
3,36,8/17/2061 0:00,M,4/30/2131 7:15,69.7,adult,1,2134-05-12,0.0,0.0,...,0.0,93.0,0.0,0.0,0.0,0.0,13.252031,0.0,0.0,0.0
4,38,8/31/2090 0:00,M,8/10/2166 0:28,75.94,adult,1,2166-08-10,0.0,0.0,...,0.0,88.0,0.0,0.0,0.0,0.0,11.248368,0.0,0.0,0.0


## save features to a file

In [68]:
store = pd.HDFStore('../cse6250_features.h5')
store['df'] = output
store.close()

In [69]:
output.to_csv('cse6250_features.csv',index=False)