 The variables are `age`, `hours_in_hosp_before_intubation`, `lactate_min`, `resp_rate_avg`,  `plateau_pressure_avg`, `parapleagia` 

In [None]:
# age SELECT  FROM `physionet-data.eicu_crd_derived.basic_demographics` LIMIT 1000
# lactate SELECT lactate FROM `physionet-data.eicu_crd_derived.pivoted_lab` LIMIT 1000

In [1]:
import os 
import pandas as pd

project_id="upbeat-legacy-282508" # need to specify accordingly
os.environ["GOOGLE_CLOUD_PROJECT"]=project_id

In [2]:
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

Authenticated


In [3]:
def run_query(query):
    df = pd.io.gbq.read_gbq(query, project_id=project_id, verbose=False, configuration={'query':{'useLegacySql': False}})
    print(len(df), 'rows')
    print(df.head())
    return df

# an example
df = run_query('''
SELECT
  subject_id,
  hadm_id,
  stay_id
FROM
  `physionet-data.mimic_icu.icustays`
LIMIT 10
''')

10 rows
   subject_id   hadm_id   stay_id
0    12776735  20817525  34547665
1    16256226  20013290  39289362
2    12974563  29618057  32563675
3    14609218  20606189  34947848
4    12687112  26132667  37445058


In [4]:
dataset_id = 'default' # change accordingly
view_id = f"{project_id}.{dataset_id}.vent_cohort_eicu_view" # this need to be created in advance
view_id

'upbeat-legacy-282508.default.vent_cohort_eicu_view'

In [5]:
HOUR_GAP = 24
cols = ['patientunitstayid', 'vent_start', 'vent_end', 'vent_duration','over72h']

In [23]:
# lactate, resp

sql_features = f'''
with lab_ft as (
    SELECT 
    v.patientunitstayid, 
    lab.lactate
    FROM `{view_id}` v
    left join `physionet-data.eicu_crd_derived.pivoted_lab` lab
    on v.patientunitstayid=lab.patientunitstayid
    where lab.chartoffset <= v.vent_start + {HOUR_GAP}*60 and lab.chartoffset >= v.vent_start
)

, vs_ft as (
    SELECT 
    v.patientunitstayid, 
    # vt.heartrate, vt.temperature, vt.spo2,
    vt.respiratoryrate, 
    FROM `{view_id}` v
    left join `physionet-data.eicu_crd_derived.pivoted_vital` vt
    on v.patientunitstayid = vt.patientunitstayid
    where vt.chartoffset <= v.vent_start + {HOUR_GAP}*60 and vt.chartoffset >= v.vent_start
)
select 
v.*
# v.patientunitstayid

,max(b.lactate) lactate_max
,min(b.lactate) lactate_min
,avg(b.lactate) lactate_avg

,max(s.respiratoryrate) resp_rate_max
,min(s.respiratoryrate) resp_rate_min
,avg(s.respiratoryrate) resp_rate_avg

# ,min(s.heartrate) hr_min
# ,max(s.heartrate) hr_max
# ,min(s.temperature) temp_min 
# ,max(s.temperature) temp_max

from `{view_id}` v
left join lab_ft b on b.patientunitstayid=v.patientunitstayid
left join vs_ft s on s.patientunitstayid=v.patientunitstayid
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h

'''
ft = run_query(sql_features)

21185 rows
   patientunitstayid  vent_start  ...  resp_rate_min  resp_rate_avg
0             147985         539  ...            0.0      13.808824
1             153409          11  ...           19.0      24.964912
2             157895         384  ...            8.0      18.807692
3             158050         304  ...           17.0      20.250000
4             171281         460  ...           10.0      14.888889

[5 rows x 11 columns]


In [6]:
sql_eicu_plateau=f'''
with tmp as (
    SELECT 
    patientunitstayid, 
    respchartentryoffset chartoffset,
    respchartvaluelabel, safe_cast(respchartvalue as float64) plateau
    -- distinct respchartvaluelabel
    FROM `physionet-data.eicu_crd.respiratorycharting` where lower(respchartvaluelabel) like '%plateau%'
    )
, plt as (
    select v.patientunitstayid , plateau
    FROM `{view_id}` v
    left join tmp t on t.patientunitstayid = v.patientunitstayid 
    where t.chartoffset <= v.vent_start+{HOUR_GAP}*60 and t.chartoffset >= v.vent_start
    )
select 
    v.*
    , max(plateau) plateau_pressure_max
    , avg(plateau) plateau_pressure_avg
    , min(plateau) plateau_pressure_min

from `{view_id}` v
left join plt p on p.patientunitstayid=v.patientunitstayid
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h

'''

plateau = run_query(sql_eicu_plateau)


21185 rows
   patientunitstayid  vent_start  ...  plateau_pressure_avg  plateau_pressure_min
0            2260573         426  ...             36.500000                  35.0
1            1642672         137  ...             17.285714                 -20.0
2            3179588         118  ...             23.220000                  20.7
3            3161044         102  ...             13.000000                  10.8
4            3052672         147  ...             24.428571                  23.0

[5 rows x 8 columns]


In [10]:
sql_age = """
SELECT patientunitstayid, age FROM `physionet-data.eicu_crd_derived.basic_demographics`
"""
age = run_query(sql_age)

200859 rows
   patientunitstayid age
0             141208  25
1             141263  19
2             141264  19
3             141970  18
4             142557  25


In [24]:
ft.head(2)

Unnamed: 0,patientunitstayid,vent_start,vent_end,vent_duration,over72h,lactate_max,lactate_min,lactate_avg,resp_rate_max,resp_rate_min,resp_rate_avg
0,147985,539,2506,32.783333,0,12.6,4.0,7.933333,29.0,0.0,13.808824
1,153409,11,3132,52.016667,0,14.1,10.9,12.233333,31.0,19.0,24.964912


In [12]:
plateau.head(2)

Unnamed: 0,patientunitstayid,vent_start,vent_end,vent_duration,over72h,plateau_pressure_max,plateau_pressure_avg,plateau_pressure_min
0,2260573,426,14252,230.433333,1,38.0,36.5,35.0
1,1642672,137,4081,65.733333,0,50.0,17.285714,-20.0


In [25]:
cols = ['patientunitstayid', 'vent_start', 'vent_end', 'vent_duration','over72h']

df = ft.merge(plateau, on=cols, how='inner')
print(len(df))
df.head(2)

21185


Unnamed: 0,patientunitstayid,vent_start,vent_end,vent_duration,over72h,lactate_max,lactate_min,lactate_avg,resp_rate_max,resp_rate_min,resp_rate_avg,plateau_pressure_max,plateau_pressure_avg,plateau_pressure_min
0,147985,539,2506,32.783333,0,12.6,4.0,7.933333,29.0,0.0,13.808824,26.0,18.6,11.0
1,153409,11,3132,52.016667,0,14.1,10.9,12.233333,31.0,19.0,24.964912,31.0,28.307692,23.0


In [26]:
df = df.merge(age, how='left')
print(len(df))
df.head(2)

21185


Unnamed: 0,patientunitstayid,vent_start,vent_end,vent_duration,over72h,lactate_max,lactate_min,lactate_avg,resp_rate_max,resp_rate_min,resp_rate_avg,plateau_pressure_max,plateau_pressure_avg,plateau_pressure_min,age
0,147985,539,2506,32.783333,0,12.6,4.0,7.933333,29.0,0.0,13.808824,26.0,18.6,11.0,78
1,153409,11,3132,52.016667,0,14.1,10.9,12.233333,31.0,19.0,24.964912,31.0,28.307692,23.0,58


In [27]:
df.to_csv('eicu_some_ft.csv', index=False)

In [21]:
#### ignore everything below

 # ignore everything below

apsiii, peep_min, resp_rate_min

(Neuroblocker, vasopressin), ph_max, height
co2_total_min, co2_total_avg, temp_max, fio2_min
plateau_pressure_max, peep_min

gcs_score, acidbase_score

resp min, ph max, 

In [None]:
sql_features = f'''
with bg_ft as (
    SELECT 
    v.patientunitstayid, 
    bg.pH #, peep
    FROM `{view_id}` v
    left join `physionet-data.eicu_crd_derived.pivoted_bg` bg
    on v.patientunitstayid=bg.patientunitstayid
    where bg.chartoffset <= v.vent_start + {HOUR_GAP}*60 and bg.chartoffset >= v.vent_start
)

, vs_ft as (
    SELECT 
    v.patientunitstayid, 
    vt.heartrate, vt.temperature, vt.respiratoryrate, vt.spo2
    FROM `{view_id}` v
    left join `physionet-data.eicu_crd_derived.pivoted_vital` vt
    on v.patientunitstayid = vt.patientunitstayid
    where vt.chartoffset <= v.vent_start + {HOUR_GAP}*60 and vt.chartoffset >= v.vent_start
)
select 
v.*
# v.patientunitstayid
,max(b.pH) ph_max
, min(s.spo2) spo2_min

,min(s.heartrate) hr_min
,max(s.heartrate) hr_max
,min(s.respiratoryrate) resp_min
,max(s.respiratoryrate) resp_max
,min(s.temperature) temp_min 
,max(s.temperature) temp_max

from `{view_id}` v
left join bg_ft b on b.patientunitstayid=v.patientunitstayid
left join vs_ft s on s.patientunitstayid=v.patientunitstayid
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h

'''

In [None]:
sql_ft_new = f'''
with lab as (
  SELECT 
  v.patientunitstayid 
  ,glucose ,TotalCO2 
  
  FROM `{view_id}` v
  left join `physionet-data.eicu_crd_derived.pivoted_lab` l
  on v.patientunitstayid =l.patientunitstayid 
  where l.chartoffset <= v.vent_start + {HOUR_GAP}*60 and l.chartoffset >=v.vent_start 
), bp as (
  SELECT
  v.patientunitstayid 
  ,nibp_mean 
  
  , case 
    when nibp_mean is null and ibp_mean is null then null
    when nibp_mean is null or ibp_mean is null then coalesce(nibp_mean, ibp_mean)
    else (nibp_mean+ibp_mean)/2

    end as bp_mean
  FROM `{view_id}` v
  left join `physionet-data.eicu_crd_derived.pivoted_vital` b
  on v.patientunitstayid =b.patientunitstayid 
  where b.chartoffset <= v.vent_start + {HOUR_GAP}*60 and b.chartoffset >=v.vent_start 
)
select 
v.patientunitstayid
, max(glucose) glucose_max
, min(glucose) glucose_min
, max(TotalCO2) co2_total_max
, avg(TotalCO2) co2_total_avg
, min(TotalCO2) co2_total_min

, max(bp_mean) mbp_arterial_max
, min(nibp_mean) mbp_ni_min

FROM `{view_id}` v
left join lab l on v.patientunitstayid =l.patientunitstayid 
left join bp b on v.patientunitstayid =b.patientunitstayid 
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h

'''

In [None]:
sql_apsiii = f'''
select
v.patientunitstayid
,avg(acutephysiologyscore) apsiii

FROM `{view_id}` v
left join `physionet-data.eicu_crd.apachepatientresult` a
on a.patientunitstayid =v.patientunitstayid 
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h
'''

In [None]:
df_ft1 = run_query(sql_features)
df_ft_new = run_query(sql_ft_new)
df_aps = run_query(sql_apsiii)

21185 rows
   patientunitstayid  glucose_max  ...  mbp_arterial_max  mbp_ni_min
0             181480        142.0  ...             104.0       104.0
1             225680        173.0  ...             130.0       105.0
2             240457        211.0  ...             122.0        88.0
3             257541        239.0  ...             101.0        94.0
4             268004         93.0  ...              86.5        90.0

[5 rows x 8 columns]


In [None]:
df1 = df_ft1.merge(df_ft_new, 
                on=['patientunitstayid'], how='left').merge(df_aps, 
                                                      on=['patientunitstayid'], how='left')

In [None]:
df1

Unnamed: 0,patientunitstayid,vent_start,vent_end,vent_duration,over72h,ph_max,spo2_min,hr_min,hr_max,resp_min,resp_max,temp_min,temp_max,glucose_max,glucose_min,co2_total_max,co2_total_avg,co2_total_min,mbp_arterial_max,mbp_ni_min,apsiii
0,177241,259,1836,26.283333,0,7.340,86.0,104.0,140.0,35.0,36.0,35.90000,36.90000,173.0,162.0,,,,116.0,,
1,188948,2638,6630,66.533333,0,7.240,90.0,96.0,144.0,36.0,36.0,36.60000,37.20000,151.0,151.0,,,,175.5,56.0,65.0
2,224432,3573,7952,72.983333,1,7.170,97.0,114.0,134.0,28.0,28.0,38.20000,40.70000,256.0,185.0,,,,81.0,,75.0
3,257535,134,6545,106.850000,1,7.300,86.0,89.0,102.0,28.0,38.0,36.33624,37.94748,115.0,81.0,,,,117.0,60.0,67.0
4,349218,1332,6699,89.450000,1,7.461,94.0,128.0,158.0,26.0,26.0,35.80000,37.20000,152.0,117.0,,,,100.0,22.0,116.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21180,3131850,78,3198,52.000000,0,7.540,95.0,76.0,100.0,24.0,35.0,36.60000,37.20000,390.0,136.0,,,,93.0,54.0,117.0
21181,3202275,868,9839,149.516667,1,,97.0,71.0,100.0,24.0,35.0,37.90000,39.70000,162.0,146.0,,,,117.0,80.0,113.0
21182,3223531,260,12862,210.033333,1,7.420,80.0,48.0,82.0,24.0,42.0,37.10000,38.50000,151.0,111.0,,,,95.0,78.0,60.0
21183,3239782,490,8006,125.266667,1,7.440,90.0,73.0,100.0,24.0,40.0,37.10000,39.20000,112.0,93.0,,,,117.0,34.0,60.0


peep_min, fio2_min, plateau_pressure_max

In [None]:
sql_eicu_peep = f'''

with 
resp as (
SELECT patientunitstayid,
respchartoffset chartoffset
,respchartvalue peep
FROM `physionet-data.eicu_crd.respiratorycharting` 
where lower(respchartvaluelabel) like 'peep'
)
, peep_ft1 as (
  select 
  v.patientunitstayid, safe_cast(peep as float64) peep
  from `{view_id}` v
  left join resp p on v.patientunitstayid=p.patientunitstayid
  where p.chartoffset <= v.vent_start+{HOUR_GAP}*60 and p.chartoffset >= v.vent_start and safe_cast(peep as float64) < 100
)
, peep_ft2 as (
    SELECT 
    v.patientunitstayid, peep
    FROM `{view_id}` v
    left join `physionet-data.eicu_crd_derived.pivoted_bg` bg
    on v.patientunitstayid=bg.patientunitstayid
    where bg.chartoffset <= v.vent_start+{HOUR_GAP}*60 and bg.chartoffset >= v.vent_start
)
, peep_union as (
  select * from peep_ft1 
  
  union all

  select * from peep_ft2
)

select 
v.*
, max(peep) peep_max
, avg(peep) peep_avg
, min(peep) peep_min

from `{view_id}` v
left join peep_union p on p.patientunitstayid=v.patientunitstayid
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h

'''
peep = run_query(sql_eicu_peep)

21185 rows
   patientunitstayid  vent_start  vent_end  ...  peep_max   peep_avg  peep_min
0             141233         225      1672  ...      55.0   9.545455       5.0
1             161176         266     10138  ...      11.0  11.000000      11.0
2             168134        1782      9577  ...       3.0   3.000000       3.0
3             201456         953      5878  ...      22.0  16.333333      12.0
4             221911          60      3734  ...      16.0   7.000000       5.0

[5 rows x 8 columns]


In [None]:
sql_eicu_fio2=f'''

with tmp as (
SELECT 
patientunitstayid ,
respchartoffset chartoffset, 
respchartvaluelabel
, safe_cast(respchartvalue as float64) v
FROM `physionet-data.eicu_crd.respiratorycharting` where lower(respchartvaluelabel) like 'fio2%' # Fio2, Fio2 (%)  
), 

tmp2 as ( select
patientunitstayid, chartoffset
,case when v>=0.2 and v<=1 then v*100 else null end value1
,case when v>=20 and v<=100 then v else null end value2
from tmp
-- where safe_cast(v as float64) >=0 and safe_cast(v as float64) <2
) 

, fio2_1 as (
select
patientunitstayid , chartoffset, value1 fio2
from tmp2 where value1 is not null)
, fio2_2 as ( 
select
patientunitstayid , chartoffset, value1 fio2
from tmp2 where value2 is not null)

, fio2_tmp_union as (
  select * from fio2_1 
  union all
  select * from fio2_2
)

,fio2_resp as (
select v.patientunitstayid, chartoffset, fio2
from `{view_id}` v
left join fio2_tmp_union f on v.patientunitstayid = f.patientunitstayid 
where f.chartoffset <= v.vent_start+{HOUR_GAP}*60 and f.chartoffset >= v.vent_start) 

,fio2_lab as (
select v.patientunitstayid, chartoffset, fio2 * 100 fio2 
from `{view_id}` v
left join `physionet-data.eicu_crd_derived.pivoted_bg` f on v.patientunitstayid = f.patientunitstayid  
where f.chartoffset <= v.vent_start+{HOUR_GAP}*60 and f.chartoffset >= v.vent_start )

, fio2_union as (
select * from fio2_resp 
union all
select * from fio2_lab
)
select 
v.*
, max(fio2) fio2_max
, avg(fio2) fio2_mean
, min(fio2) fio2_min

from `{view_id}` v
left join fio2_union f on f.patientunitstayid=v.patientunitstayid
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h
'''

fio2 = run_query(sql_eicu_fio2)

21185 rows
   patientunitstayid  vent_start  vent_end  ...  fio2_max  fio2_mean  fio2_min
0             145464        1707      7622  ...      50.0  42.500000      40.0
1             151860         451      3895  ...     100.0  91.666667      70.0
2             153349         316      2726  ...     100.0  92.500000      80.0
3             153440         197      4050  ...      90.0  66.250000      50.0
4             154516         848     13748  ...     100.0  67.000000      55.0

[5 rows x 8 columns]


In [None]:
sql_eicu_plateau=f'''
with tmp as (
SELECT 
patientunitstayid, 
respchartentryoffset chartoffset,
respchartvaluelabel, safe_cast(respchartvalue as float64) plateau
-- distinct respchartvaluelabel
FROM `physionet-data.eicu_crd.respiratorycharting` where lower(respchartvaluelabel) like '%plateau%'
)
, plt as (
select v.patientunitstayid , plateau
FROM `{view_id}` v
left join tmp t on t.patientunitstayid = v.patientunitstayid 
where t.chartoffset <= v.vent_start+{HOUR_GAP}*60 and t.chartoffset >= v.vent_start
)
select 
v.*
, max(plateau) plateau_pr_max
, avg(plateau) plateau_pr_mean
, min(plateau) plateau_pr_min

from `{view_id}` v
left join plt p on p.patientunitstayid=v.patientunitstayid
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h


'''

plateau = run_query(sql_eicu_plateau)


21185 rows
   patientunitstayid  vent_start  ...  plateau_pr_mean  plateau_pr_min
0            2310888         494  ...        39.958333            33.0
1            3243843         343  ...        11.333333            10.0
2            2358786         390  ...        19.200000            17.8
3            3102418        1851  ...        20.750000             4.5
4            3191829           4  ...        14.600000            14.0

[5 rows x 8 columns]


In [None]:
df2 = peep.merge(fio2, on=cols).merge(plateau, on=cols)
len(df2)

21185

In [None]:
df1.columns

Index(['patientunitstayid', 'vent_start', 'vent_end', 'vent_duration',
       'over72h', 'ph_max', 'spo2_min', 'hr_min', 'hr_max', 'resp_min',
       'resp_max', 'temp_min', 'temp_max', 'glucose_max', 'glucose_min',
       'co2_total_max', 'co2_total_avg', 'co2_total_min', 'mbp_arterial_max',
       'mbp_ni_min', 'apsiii'],
      dtype='object')

In [None]:
df2.columns

Index(['patientunitstayid', 'vent_start', 'vent_end', 'vent_duration',
       'over72h', 'peep_max', 'peep_avg', 'peep_min', 'fio2_max', 'fio2_mean',
       'fio2_min', 'plateau_pr_max', 'plateau_pr_mean', 'plateau_pr_min'],
      dtype='object')

In [None]:
ft1=['apsiii', 'resp_min', 'ph_max', 'temp_max', 'co2_total_avg', 'co2_total_min']
ft2=['fio2_min', 'plateau_pr_max', 'peep_min']

In [None]:
ndf1 = df1[cols+ft1].rename(columns={'resp_min':'resp_rate_min'})
ndf2 = df2[cols+ft2].rename(columns={'plateau_pr_max':'plateau_pressure_max'})

In [None]:
sql_demo = '''
select 
patientunitstayid, 
uniquepid patient_id,
# patienthealthsystemstayid hospital_stay_id, 
# hospitalid hospital_id, 
# unitdischargeoffset,
# hospitaldischargeoffset,
# unitdischargestatus, 
# hospitaldischargestatus, 
gender, age,
admissionheight height
 from `physionet-data.eicu_crd.patient` 
'''
# where patientunitstayid in ({})
# '''.format(','.join(icustay_ids))

demo = run_query(sql_demo)

200859 rows
   patientunitstayid patient_id  gender age  height
0             486998  005-69938    Male  46  176.53
1            2188050  021-37174  Female  49  163.80
2            3034287  030-55344  Female  82  168.90
3            3036294  030-38039  Female  78  163.50
4            3125153  030-60229  Female  82  147.30


In [None]:
sql_vaso = '''
with tmp as (
SELECT
  patientunitstayid,
  chartoffset,
  vasopressor # has only 1 value
FROM
  `physionet-data.eicu_crd_derived.pivoted_treatment_vasopressor`
) 
select patientunitstayid, max(vasopressor) as vasopressor
from tmp group by patientunitstayid
'''
vaso = run_query(sql_vaso)

24381 rows
   patientunitstayid  vasopressor
0             242082            1
1             242290            1
2             242319            1
3             243334            1
4             243445            1


In [None]:
mdf = ndf1.merge(ndf2, on=cols)

In [None]:
mdf1 = mdf.merge(demo[['patientunitstayid', 'height']], how='left', on=['patientunitstayid'])

In [None]:
mdf2 = mdf1.merge(vaso, how='left', on=['patientunitstayid'])
mdf2.vasopressor.fillna(0, inplace=True)

In [None]:
l = mdf2.columns[5:]

In [None]:
print(', '.join(l))

apsiii, resp_rate_min, ph_max, temp_max, co2_total_avg, co2_total_min, fio2_min, plateau_pressure_max, peep_min, height, vasopressor


In [None]:
len(l)

11

In [None]:
mdf2.to_csv('eicu_features.csv', index=False)
os.listdir('.')

from google.colab import files
files.download('eicu_features.csv') 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>