# TableOne statistics for the whole data

Update 2020.12.17

The tableone package for Python was updated in May or June 2020, the updated version supports more than two groups statistic.

Thanks for Tom J Pollard, Alistair E W Johnson, Jesse D Raffa, Roger G Mark.:)

In [1]:
import numpy as np
import pandas as pd
import warnings
from tableone import TableOne
warnings.filterwarnings("ignore")

In [2]:
data_mimic = pd.read_csv('../datasets/processed data/MIMIC-data-update202012.csv')
data_aumc = pd.read_csv('../datasets/processed data/AUMC-data-update202012.csv')
data_eicu = pd.read_csv('../datasets/processed data/eCRD-data-update202012.csv')
data_plagh = pd.read_csv('../datasets/processed data/PLAGH-data-update202012.csv')
data_HJ23 = pd.read_csv('../datasets/processed data/HJ23-data-update202012.csv')

## Reliable hospital & unit list of eICU

In [3]:
reliable_l=pd.read_csv('../datasets/ReliableHospitalICUYearFilter.csv')

In [4]:
reliable_l['reliable']=1
reliable_l.drop(['HospitalID','UnitAdmitYear'],axis=1,inplace=True)
reliable_l

Unnamed: 0,ICUID,reliable
0,1087,1
1,822,1
2,369,1
3,451,1
4,608,1
...,...,...
250,362,1
251,1041,1
252,376,1
253,601,1


In [5]:
reliable_l.drop_duplicates(inplace=True)
reliable_l

Unnamed: 0,ICUID,reliable
0,1087,1
1,822,1
2,369,1
3,451,1
4,608,1
...,...,...
203,487,1
221,338,1
223,364,1
251,1041,1


In [6]:
data_eicu.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132994 entries, 0 to 132993
Data columns (total 34 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   uniquepid                  132994 non-null  object 
 1   patienthealthsystemstayid  132994 non-null  int64  
 2   patientunitstayid          132994 non-null  int64  
 3   gender                     132925 non-null  float64
 4   age                        132994 non-null  float64
 5   unittype                   132994 non-null  object 
 6   wardid                     132994 non-null  int64  
 7   unitadmitsource            132431 non-null  object 
 8   hospitaladmitsource        104866 non-null  object 
 9   unitvisitnumber            132994 non-null  int64  
 10  icu_los_hours              132994 non-null  int64  
 11  hospital_los_hours         132994 non-null  int64  
 12  icu_expire_flag            132980 non-null  float64
 13  hospital_expire_flag       13

In [7]:
data_eicu.columns.values

array(['uniquepid', 'patienthealthsystemstayid', 'patientunitstayid',
       'gender', 'age', 'unittype', 'wardid', 'unitadmitsource',
       'hospitaladmitsource', 'unitvisitnumber', 'icu_los_hours',
       'hospital_los_hours', 'icu_expire_flag', 'hospital_expire_flag',
       'rrt_flag', 'cardiac_surgery_flag', 'vent_duration_hours',
       'admission_type', 'bicarbonate_min', 'creatinine_max',
       'glucose_max', 'hemoglobin_min', 'lactate_max', 'sodium_max',
       'sodium_min', 'crrt_duration_hours', 'vent_flag', 'age_group',
       'outcome_group', 'crystalloid_v', 'colloid', 'blood', 'emergency',
       'crystalloid'], dtype=object)

In [8]:
data_eicu=pd.merge(data_eicu,reliable_l,left_on='wardid',right_on='ICUID',how='left')
data_eicu

Unnamed: 0,uniquepid,patienthealthsystemstayid,patientunitstayid,gender,age,unittype,wardid,unitadmitsource,hospitaladmitsource,unitvisitnumber,...,vent_flag,age_group,outcome_group,crystalloid_v,colloid,blood,emergency,crystalloid,ICUID,reliable
0,002-10009,193705,224606,0.0,76.0,Med-Surg ICU,87,Operating Room,Operating Room,1,...,1.0,"(69, 79]",3.0,4949.0,0.0,1.0,0.0,1.0,,
1,002-10018,178200,204602,0.0,29.0,Med-Surg ICU,90,Floor,Floor,1,...,0.0,"(17, 39]",1.0,,0.0,0.0,0.0,0.0,,
2,002-10034,141169,157016,0.0,23.0,Med-Surg ICU,95,Floor,Floor,1,...,0.0,"(17, 39]",3.0,,0.0,0.0,0.0,0.0,,
3,002-10050,183274,211144,0.0,67.0,Med-Surg ICU,87,Operating Room,Operating Room,1,...,1.0,"(59, 69]",4.0,284.0,1.0,1.0,0.0,1.0,,
4,002-10052,137239,151900,0.0,66.0,MICU,97,Emergency Department,Emergency Department,1,...,1.0,"(59, 69]",3.0,,0.0,0.0,1.0,0.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132989,035-9957,2741786,3351785,1.0,74.0,CCU,1109,Emergency Department,Emergency Department,1,...,0.0,"(69, 79]",2.0,,0.0,0.0,1.0,0.0,,
132990,035-9959,2731423,3340321,1.0,44.0,CCU,1109,Direct Admit,Direct Admit,1,...,0.0,"(39, 49]",3.0,,0.0,0.0,0.0,0.0,,
132991,035-996,2736458,3345874,1.0,55.0,CCU,1109,Emergency Department,Emergency Department,1,...,0.0,"(49, 59]",3.0,,0.0,0.0,1.0,0.0,,
132992,035-9966,2742533,3352628,1.0,60.0,MICU,1106,Operating Room,Recovery Room,1,...,1.0,"(59, 69]",2.0,,0.0,0.0,0.0,0.0,,


In [9]:
data_mimic.columns.values

array(['subject_id', 'hadm_id', 'icustay_id', 'gender', 'age',
       'ethnicity', 'icu_los_hours', 'icu_expire_flag', 'admission_type',
       'unittype', 'albumin_min', 'albumin_max', 'bilirubin_min',
       'bilirubin_max', 'bicarbonate_min', 'bicarbonate_max',
       'creatinine_min', 'creatinine_max', 'glucose_min', 'glucose_max',
       'hemoglobin_min', 'hemoglobin_max', 'lactate_min', 'lactate_max',
       'platelet_min', 'platelet_max', 'potassium_min', 'potassium_max',
       'sodium_min', 'sodium_max', 'magnesium_min', 'magnesium_max',
       'phosphate_min', 'phosphate_max', 'bun_min', 'bun_max', 'wbc_min',
       'wbc_max', 'calcium_min', 'calcium_max', 'ionized_calcium_min',
       'ionized_calcium_max', 'rrt_flag', 'crrt_duration_hours',
       'vent_flag', 'vent_duration_hours', 'age_group', 'outcome_group',
       'cardiac_surgery_flag', 'crystalloid_v', 'colloid', 'blood',
       'emergency', 'crystalloid'], dtype=object)

In [10]:
#column names we want to list in the TableOne
col_l=['gender', 'age','icu_los_hours', 'icu_expire_flag','unittype', 'vent_duration_hours', 'crrt_duration_hours',
      'admission_type', 'vent_flag', 'rrt_flag','age_group', 'outcome_group', 'cardiac_surgery_flag',
      'crystalloid_v', 'colloid', 'emergency', 'crystalloid','blood',
      'sodium_max','creatinine_max', 'lactate_max','bicarbonate_min', 'hemoglobin_min']

In [11]:
df_tableone_mimic=data_mimic.loc[:,col_l]
df_tableone_mimic['source']='MIMIC'

In [12]:
df_tableone_eicu=data_eicu.loc[:,col_l]
df_tableone_eicu['source']='eICU'

In [13]:
df_tableone_plagh=data_plagh.loc[:,col_l]
df_tableone_plagh['source']='PLAGH'

In [14]:
df_tableone_aumc=data_aumc.loc[:,['gender','icu_los_hours', 'icu_expire_flag','unittype',
      'admission_type', 'vent_flag', 'rrt_flag','age_group', 'outcome_group', 'cardiac_surgery_flag',
      'crystalloid_v', 'colloid', 'emergency', 'crystalloid','blood',
      'sodium_max','creatinine_max', 'lactate_max','bicarbonate_min', 'hemoglobin_min']]
df_tableone_aumc['source']='AUMC'

In [15]:
df_tableone_HJ23=data_HJ23.loc[:,col_l]
df_tableone_HJ23['source']='HJ23'

In [16]:
df_tableone_eicu

Unnamed: 0,gender,age,icu_los_hours,icu_expire_flag,unittype,vent_duration_hours,crrt_duration_hours,admission_type,vent_flag,rrt_flag,...,colloid,emergency,crystalloid,blood,sodium_max,creatinine_max,lactate_max,bicarbonate_min,hemoglobin_min,source
0,0.0,76.0,69,0.0,Med-Surg ICU,23.47,,EMERGENCY,1.0,0,...,0.0,0.0,1.0,1.0,143.0,1.10,1.9,18.0,8.6,eICU
1,0.0,29.0,19,0.0,Med-Surg ICU,,,EMERGENCY,0.0,0,...,0.0,0.0,0.0,0.0,144.0,5.25,2.9,17.0,9.3,eICU
2,0.0,23.0,69,0.0,Med-Surg ICU,,,EMERGENCY,0.0,0,...,0.0,0.0,0.0,0.0,141.0,0.65,,24.0,7.8,eICU
3,0.0,67.0,92,0.0,Med-Surg ICU,49.45,,ELECTIVE,1.0,0,...,1.0,0.0,1.0,1.0,146.0,0.80,,30.0,13.0,eICU
4,0.0,66.0,57,0.0,MICU,15.67,,EMERGENCY,1.0,0,...,0.0,1.0,0.0,0.0,140.0,1.24,0.9,20.0,7.7,eICU
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132989,1.0,74.0,36,0.0,CCU,,,EMERGENCY,0.0,0,...,0.0,1.0,0.0,0.0,142.0,0.80,,27.0,14.1,eICU
132990,1.0,44.0,64,0.0,CCU,,,EMERGENCY,0.0,0,...,0.0,0.0,0.0,0.0,137.0,0.99,,24.0,13.6,eICU
132991,1.0,55.0,46,0.0,CCU,,,EMERGENCY,0.0,0,...,0.0,1.0,0.0,0.0,139.0,2.30,2.7,24.0,8.7,eICU
132992,1.0,60.0,31,0.0,MICU,25.93,,EMERGENCY,1.0,0,...,0.0,0.0,0.0,0.0,133.0,1.00,,27.0,,eICU


In [17]:
df_tableone=pd.concat([df_tableone_mimic,df_tableone_eicu,df_tableone_plagh,df_tableone_aumc,df_tableone_HJ23],ignore_index=True)
df_tableone['age'].replace(1,'1',inplace=True)
df_tableone['age'].replace(0,'0',inplace=True)
df_tableone

Unnamed: 0,gender,age,icu_los_hours,icu_expire_flag,unittype,vent_duration_hours,crrt_duration_hours,admission_type,vent_flag,rrt_flag,...,colloid,emergency,crystalloid,blood,sodium_max,creatinine_max,lactate_max,bicarbonate_min,hemoglobin_min,source
0,1,76.5,145.6,0.0,MICU,83.733333,,EMERGENCY,1.0,0.0,...,0.0,1.0,1.0,1.0,153.0,2.50,8.80,11.0,7.8,MIMIC
1,0,47.8,40.3,0.0,MICU,,,EMERGENCY,0.0,0.0,...,0.0,1.0,1.0,0.0,141.0,0.50,,21.0,10.6,MIMIC
2,0,65.9,88.1,0.0,SICU,,,ELECTIVE,0.0,0.0,...,0.0,0.0,1.0,1.0,134.0,10.00,,18.0,9.2,MIMIC
3,1,41.8,127.8,1.0,MICU,118.333333,,EMERGENCY,1.0,0.0,...,0.0,1.0,0.0,0.0,138.0,1.40,2.70,26.0,12.9,MIMIC
4,0,50.1,38.0,0.0,SICU,,,EMERGENCY,0.0,0.0,...,0.0,1.0,0.0,0.0,142.0,0.70,,25.0,12.5,MIMIC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259347,1,27.0,17.0,0.0,MICU,,,EMERGENCY,0.0,0.0,...,0.0,1.0,0.0,0.0,138.3,0.68,2.72,26.5,15.5,HJ23
259348,0,84.0,153.8,0.0,MICU,,,EMERGENCY,0.0,1.0,...,0.0,0.0,1.0,0.0,143.0,9.70,1.50,22.8,9.1,HJ23
259349,1,43.0,98.3,0.0,SICU,,,EMERGENCY,0.0,0.0,...,0.0,0.0,1.0,1.0,139.0,0.78,,,7.0,HJ23
259350,1,53.0,21.5,0.0,SICU,,,EMERGENCY,0.0,0.0,...,0.0,0.0,0.0,0.0,142.0,0.45,1.49,28.9,8.4,HJ23


In [18]:
columns = ['icu_expire_flag',
           'gender', 'age','age_group','icu_los_hours', 'admission_type','emergency',
           'unittype','vent_flag', 'rrt_flag',  'cardiac_surgery_flag',
           'vent_duration_hours', 'crrt_duration_hours',
           'crystalloid_v', 'colloid',  'crystalloid','blood',
           'outcome_group',
           'sodium_max','creatinine_max', 'lactate_max','bicarbonate_min', 'hemoglobin_min']

categorical = ['icu_expire_flag',
              'gender','age_group','admission_type','emergency',
              'unittype','vent_flag', 'rrt_flag',  'cardiac_surgery_flag',
              'colloid',  'crystalloid','blood',
              'outcome_group']

groupby='source'

nonnormal = ['age','icu_los_hours', 'hospital_los_hours',
            'vent_duration_hours', 'crrt_duration_hours',
            'crystalloid_v',
            'sodium_max','creatinine_max', 'lactate_max','bicarbonate_min', 'hemoglobin_min']

In [19]:
mytable_whole = TableOne(df_tableone, columns=columns, categorical=categorical,
                   groupby=groupby, nonnormal=nonnormal,decimals=2)

In [20]:
print(mytable_whole)

                                                 Grouped by source                                                                                                                                                             
                                                           Missing                   Overall                       AUMC                      HJ23                     MIMIC                     PLAGH                      eICU
n                                                                                     259352                      20127                      4238                     38478                     63515                    132994
icu_expire_flag, n (%)              0.0                         14            242482 (93.50)              18198 (90.42)              3619 (85.39)             35531 (92.34)             59772 (94.11)            125362 (94.27)
                                    1.0                                         16856 (6.50)            

In [21]:
mytable_whole.to_csv('../report/table1.csv')

In [22]:
mytable_best = TableOne(df_tableone.loc[df_tableone['outcome_group']==1,:], columns=columns, categorical=categorical,
                   groupby=groupby, nonnormal=nonnormal,decimals=2)
mytable_best.to_csv('../report/table1_best.csv')

In [23]:
mytable_worst = TableOne(df_tableone.loc[df_tableone['outcome_group']==5,:], columns=columns, categorical=categorical,
                   groupby=groupby, nonnormal=nonnormal,decimals=2)
mytable_worst.to_csv('../report/table1_worst.csv')

In [24]:
mytable_worst = TableOne(df_tableone.loc[df_tableone['outcome_group']!=5,:], columns=columns, categorical=categorical,
                   groupby=groupby, nonnormal=nonnormal,decimals=2)
mytable_worst.to_csv('../report/table1_survivor.csv')

## reliable hospital

In [25]:
data_eicu['reliable'].fillna(0,inplace=True)

In [26]:
mytable_eicu=TableOne(data_eicu,columns=columns, categorical=categorical,groupby='reliable', nonnormal=nonnormal,decimals=2)
mytable_eicu.to_csv('../report/table1_eicu.csv')