# Setup

In [1]:
import os 
import pandas as pd

project_id="upbeat-legacy-282508" # need to specify accordingly
os.environ["GOOGLE_CLOUD_PROJECT"]=project_id

In [2]:
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

Authenticated


In [3]:
def run_query(query):
    df = pd.io.gbq.read_gbq(query, project_id=project_id, verbose=False, configuration={'query':{'useLegacySql': False}})
    print(len(df), 'rows')
    print(df.head())
    return df

# an example
df = run_query('''
SELECT
  subject_id,
  hadm_id,
  stay_id
FROM
  `physionet-data.mimic_icu.icustays`
LIMIT 10
''')

10 rows
   subject_id   hadm_id   stay_id
0    10122297  25825366  36349608
1    17168310  21560534  33376903
2    15703353  29272306  35111434
3    10308232  21297383  30153687
4    13307171  21128752  38043905


# Cohort extraction

The cohort extraction in eICU is more complicated than mimiv-iv as eICU does not provide explicitly an event on invasive mechanical ventilation. Furthermore, all events in eICU have only a chart time instead of duration, making it challenging to decide when a continuous event actually ends. So there are two main tasks to tackle in order to construct the prolonged MV cohort with eICU. First, identify charted events/items that indicate invasive MV; and second, decide start and end time of the MV. 

Below is the SQL modified based on https://github.com/nus-mornin-lab/oxygenation_kc/blob/master/data-extraction/eICU/eicu_oxygen_therapy.sql to address the two tasks and extract the eICU cohort. Again, only first ventilation in an ICU with duration longer than 24h is kept, with an over-72h flag.

In [None]:
sql_vent_cohort = '''



WITH respchart AS (
    SELECT *
    FROM `physionet-data.eicu_crd.respiratorycharting`
)

, nursechart AS (
    SELECT *
    FROM `physionet-data.eicu_crd.nursecharting`
)

, pat AS (
    SELECT *
    FROM `physionet-data.eicu_crd.patient`
)

-- paterns modified based on 
-- https://github.com/nus-mornin-lab/oxygenation_kc/blob/master/data-extraction/eICU/eicu_oxygen_therapy.sql

-- The categories are invasive ventilation,
-- noninvasive ventilation, and either/or.
, ventsettings0 AS (
    SELECT patientunitstayid AS icustay_id
        , charttime
        , CASE

            -- Invasive ventilation
            WHEN
                string IN (
                    'plateau pressure',
                    'postion at lip',
                    'position at lip',
                    'pressure control'
                )
                OR string LIKE '%set vt%'
                OR string LIKE '%sputum%'
                OR string LIKE '%rsbi%'
                OR string LIKE '%tube%'
                OR string LIKE '%ett%'
                OR string LIKE '%endotracheal%'
                OR string LIKE '%tracheal suctioning%'
                OR string LIKE '%tracheostomy%'
                OR string LIKE '%reintubation%' # re?
                OR string LIKE '%intubation%' # remove re?
                OR string LIKE '%assist controlled%'
                OR string LIKE '%volume controlled%'
                OR string LIKE '%pressure controlled%'
                OR string LIKE '%trach collar%'
            THEN 0

            -- Noninvasive ventilation
            WHEN
                string IN (
                    'bi-pap',
                    'ambubag'
                )
                OR string LIKE '%ipap%'
                OR string LIKE '%niv%'
                OR string LIKE '%epap%'
                OR string LIKE '%mask leak%'
                OR string LIKE '%volume assured%'
                OR string LIKE '%non-invasive ventilation%'
                OR string LIKE '%cpap%'
            THEN 1

            -- Either invasive or noninvasive ventilation:
            WHEN
                string IN (
                    'flowtrigger',
                    'peep',
                    'tv/kg ibw',
                    'mean airway pressure',
                    'peak insp. pressure',
                    'exhaled mv',
                    'exhaled tv (machine)',
                    'exhaled tv (patient)',
                    'flow sensitivity',
                    'peak flow',
                    'f total',
                    'pressure to trigger ps',
                    'adult con setting set rr',
                    'adult con setting set vt',
                    'vti',
                    'exhaled vt',
                    'adult con alarms hi press alarm',
                    'mve',
                    'respiratory phase',
                    'inspiratory pressure, set',
                    'a1: high exhaled vt',
                    'set fraction of inspired oxygen (fio2)',
                    'insp flow (l/min)',
                    'adult con setting spont exp vt',
                    'spont tv',
                    'pulse ox results vt',
                    'vt spontaneous (ml)',
                    'peak pressure',
                    'ltv1200',
                    'tc'
                )
                OR (
                    string LIKE '%vent%'
                    AND NOT string LIKE '%hyperventilat%'
                )
                OR string LIKE '%tidal%'
                OR string LIKE '%flow rate%'
                OR string LIKE '%minute volume%'
                OR string LIKE '%leak%'
                OR string LIKE '%pressure support%'
                OR string LIKE '%peep%'
                OR string LIKE '%tidal volume%'
            THEN 2

            ELSE NULL

        END AS vent_type
        , activeUponDischarge
    FROM (

        SELECT patientunitstayid
            , nursingChartOffset AS charttime
            , LOWER(nursingchartvalue) AS string
            , NULL AS activeUponDischarge
        FROM nursechart

        UNION ALL

        SELECT patientunitstayid
            , respchartoffset AS charttime
            , LOWER(respchartvaluelabel) AS string
            , NULL AS activeUponDischarge
        FROM respchart

        UNION ALL

        -- Oxygen device from respchart
        SELECT patientunitstayid
            , respchartoffset AS charttime
            , LOWER(respchartvalue) AS string
            , NULL AS activeUponDischarge
        FROM respchart
        WHERE LOWER(respchartvaluelabel) IN (
            'o2 device',
            'respiratory device',
            'ventilator type',
            'oxygen delivery method'
        )

        UNION ALL

        -- The treatment table also contains info on oxygen therapy.
        SELECT patientunitstayid
            , treatmentoffset AS charttime
            , LOWER(treatmentstring) AS string
            , activeUponDischarge
        FROM `physionet-data.eicu_crd.treatment`
    )
    WHERE charttime >= -60

)
-- 
-- Ensure charttime is unique
, ventsettings AS (
    SELECT icustay_id
        , charttime
        -- , MIN(vent_type) AS vent_type
        , MIN(vent_type) AS vent_type
        , MAX(activeUponDischarge) AS activeUponDischarge
    FROM ventsettings0
    WHERE vent_type IS NOT NULL
    GROUP BY icustay_id, charttime #, vent_type
)
-- , vent_ordered as (
-- select * from ventsettings
-- order by icustay_id, charttime, vent_type
-- )
, vd0 as
(
  select
    *
    -- this carries over the previous charttime which had an event
    , LAG(CHARTTIME, 1) OVER (partition by icustay_id order by charttime)
    as charttime_lag
    -- similarly, this is one for vent_type, to later identify first vent_type=0
    , LAG(vent_type, 1) OVER (partition by icustay_id order by charttime)
    as vent_type_lag
  from ventsettings
)


, vd1 as
(
  select
      icustay_id
      , charttime
      , vent_type
      , vent_type_lag
      , activeUponDischarge
    -- If the time since the last oxygen therapy event is more than 24 hours,
    -- we consider that ventilation had ended in between.
    -- That is, the next ventilation record corresponds to a new ventilation session.
      , CASE
        WHEN charttime - charttime_lag > 24*60 THEN 1
        WHEN charttime_lag IS NULL THEN 1 -- No lag can be computed for the very first record
        ELSE 0
      END AS newvent
      
  -- use the staging table with only oxygen therapy records from chart events
  FROM vd0
)
-- select * from vd1 # 3677741 rows
, vd2 as
(
  select vd1.*
  -- create a cumulative sum of the instances of new ventilation
  -- this results in a monotonic integer assigned to each instance of ventilation
  , SUM( newvent )
      OVER ( partition by icustay_id order by charttime )
    as ventnum
  from vd1
)


-- -- now we convert the charttime of the ventilator setting into durations
-- -- here we focus on invasive ventilation, which has vent_type=0
-- -- meanwhile, chartevents with vent_type=2 can be either invasive or non-invasive ventilation
-- -- so here we pick the stays with an invasive chart event as long as vent_type=0 is in the stay
-- -- and pick the earliest charttime (equals either 0 or 2) as the beginning time 
-- -- and find the end time as following

,vd3 AS (
  select icustay_id, charttime, vent_type, ventnum, activeUponDischarge
      , CASE
        WHEN vent_type - vent_type_lag = -2 THEN 0 #'non2in'
        WHEN vent_type - vent_type_lag = 2 THEN 1 #'in2non'
        WHEN vent_type - vent_type_lag = 0 THEN 2 #'non2non'
        WHEN vent_type_lag IS NULL THEN 2 #'non2non' -- No lag can be computed for the very first record
        ELSE 3
      END AS vent_type_transit
  from vd2
  where (vent_type=0) or (vent_type=2)
)


, vd_endtime AS 
(
    SELECT icustay_id
        , ventnum
        , CASE 
            WHEN (
                MAX(activeUponDischarge)
                -- vent_end cannot be later than the unit discharge time.
                -- However, unitdischargeoffset often seems too low.
                -- So, we only use it if it yields and extension of the
                -- ventilation time from ventsettings.
                AND MAX(charttime)+60 < MAX(pat.unitdischargeoffset)

            ) THEN MAX(pat.unitdischargeoffset)

            -- End time is currently a charting time
            -- Since these are usually recorded hourly, ventilation is actually longer.
            -- We therefore add 60 minutes to the last time.
            ELSE MAX(charttime)+60
        END AS vent_end
    FROM vd3
        LEFT JOIN pat 
        ON vd3.icustay_id = pat.patientunitstayid
    GROUP BY icustay_id, ventnum
)



, vd_starttime AS
(
    SELECT icustay_id
        , ventnum
        , MIN(charttime) AS vent_start
    FROM vd3
    WHERE vent_type_transit = 0 # 116765 -> final 22372
    # otherwise final count is 40749 (ventnum=1 and dur>24 and start>=0)
    GROUP BY icustay_id, ventnum
)

-- select *
-- from vd_starttime
-- where icustay_id =1609092

, vd_final AS 
(
    select e.icustay_id, e.ventnum, s.vent_start, e.vent_end
    from vd_endtime e
    left join vd_starttime s on e.icustay_id = s.icustay_id and e.ventnum = s.ventnum
)
, vd_dur AS
(
    select *
        , (vent_end - vent_start) / 60 AS vent_duration
    from vd_final
--     order by icustay_id 
)
select icustay_id patientunitstayid, vent_start, vent_end, vent_duration
, case 
    when vent_duration > 72 then 1
    else 0
  end over72h
from vd_dur 
where ventnum=1 and vent_duration>=24 and vent_start>=0
-- order by patientunitstayid 


'''

In [4]:
# create a view
dataset_id = 'default' # change accordingly
view_id = f"{project_id}.{dataset_id}.vent_cohort_eicu_view"

In [None]:
# need to run the following to create the view for the first time


from google.cloud import bigquery

client = bigquery.Client()

view = bigquery.Table(view_id)
view.view_query = sql_vent_cohort

view = client.create_table(view)
print('Created view')

Created view


# Ft SQL - set 1

8 features: hr min/max, resp min/max, temp min/max,  ph max, spo2 min (peep max excluded)



In [5]:
HOUR_GAP = 24

In [6]:
sql_features = f'''
with bg_ft as (
    SELECT 
    v.patientunitstayid, 
    bg.pH #, peep
    FROM `{view_id}` v
    left join `physionet-data.eicu_crd_derived.pivoted_bg` bg
    on v.patientunitstayid=bg.patientunitstayid
    where bg.chartoffset <= v.vent_start + {HOUR_GAP}*60 and bg.chartoffset >= v.vent_start
)

, vs_ft as (
    SELECT 
    v.patientunitstayid, 
    vt.heartrate, vt.temperature, vt.respiratoryrate, vt.spo2
    FROM `{view_id}` v
    left join `physionet-data.eicu_crd_derived.pivoted_vital` vt
    on v.patientunitstayid = vt.patientunitstayid
    where vt.chartoffset <= v.vent_start + {HOUR_GAP}*60 and vt.chartoffset >= v.vent_start
)
select 
v.*
# v.patientunitstayid
,max(b.pH) ph_max
, min(s.spo2) spo2_min

,min(s.heartrate) hr_min
,max(s.heartrate) hr_max
,min(s.respiratoryrate) resp_min
,max(s.respiratoryrate) resp_max
,min(s.temperature) temp_min 
,max(s.temperature) temp_max

from `{view_id}` v
left join bg_ft b on b.patientunitstayid=v.patientunitstayid
left join vs_ft s on s.patientunitstayid=v.patientunitstayid
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h

'''

# Ft SQL - set 2
glucose max/min, apsiii, co2_total max/min, mbp_arterial_max, mbp_ni_min

In [7]:
sql_ft_new = f'''
with lab as (
  SELECT 
  v.patientunitstayid 
  ,glucose ,TotalCO2 
  
  FROM `{view_id}` v
  left join `physionet-data.eicu_crd_derived.pivoted_lab` l
  on v.patientunitstayid =l.patientunitstayid 
  where l.chartoffset <= v.vent_start + {HOUR_GAP}*60 and l.chartoffset >=v.vent_start 
), bp as (
  SELECT
  v.patientunitstayid 
  ,nibp_mean 
  
  , case 
    when nibp_mean is null and ibp_mean is null then null
    when nibp_mean is null or ibp_mean is null then coalesce(nibp_mean, ibp_mean)
    else (nibp_mean+ibp_mean)/2

    end as bp_mean
  FROM `{view_id}` v
  left join `physionet-data.eicu_crd_derived.pivoted_vital` b
  on v.patientunitstayid =b.patientunitstayid 
  where b.chartoffset <= v.vent_start + {HOUR_GAP}*60 and b.chartoffset >=v.vent_start 
)
select 
v.patientunitstayid
, max(glucose) glucose_max
, min(glucose) glucose_min
, max(TotalCO2) co2_total_max
, min(TotalCO2) co2_total_min

, max(bp_mean) mbp_arterial_max
, min(nibp_mean) mbp_ni_min

FROM `{view_id}` v
left join lab l on v.patientunitstayid =l.patientunitstayid 
left join bp b on v.patientunitstayid =b.patientunitstayid 
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h

'''

In [8]:
sql_apsiii = f'''
select
v.patientunitstayid
,avg(acutephysiologyscore) apsiii

FROM `{view_id}` v
left join `physionet-data.eicu_crd.apachepatientresult` a
on a.patientunitstayid =v.patientunitstayid 
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h
'''

# Extraction & download

In [11]:
sql_cohort = f'''
select *
from `{view_id}` v
'''

cohort_df = run_query(sql_cohort)

21185 rows
   patientunitstayid  vent_start  vent_end  vent_duration  over72h
0            2127890        1853      4506      44.216667        0
1            2519150          95      4175      68.000000        0
2             919705        3012      5367      39.250000        0
3            1554681          44      1724      28.000000        0
4             260998          89      1937      30.800000        0


In [12]:
cols = ['patientunitstayid', 'vent_start', 'vent_end', 'vent_duration','over72h']

In [13]:
df_ft1 = run_query(sql_features)

21185 rows
   patientunitstayid  vent_start  vent_end  ...  resp_max  temp_min  temp_max
0             153440         197      4050  ...      46.0      38.1      38.9
1             156171        2269      9099  ...      59.0      36.8      38.4
2             173208        2342      8523  ...      58.0      35.2      39.5
3             173317        1696      7144  ...      46.0      36.6      37.9
4             179277         447      9822  ...      50.0      37.7      38.6

[5 rows x 13 columns]


In [14]:
df_ft_new = run_query(sql_ft_new)
df_aps = run_query(sql_apsiii)

21185 rows
   patientunitstayid  glucose_max  ...  mbp_arterial_max  mbp_ni_min
0             167450         99.0  ...             141.0        89.0
1             211729        168.0  ...             119.0        86.0
2             220689        155.0  ...             128.0        90.0
3             244718        120.0  ...             150.0        87.0
4             247363        138.0  ...             105.0        95.0

[5 rows x 7 columns]
21185 rows
   patientunitstayid  apsiii
0            3184359    53.0
1             625632    51.0
2             526988   106.0
3            3344168    54.0
4            3345101    62.0


In [15]:
df_peep = peep.drop(columns=['peep_avg'])
df_peep

Unnamed: 0,patientunitstayid,vent_start,vent_end,vent_duration,over72h,peep_max,peep_min
0,141233,225,1672,24.116667,0,55.0,5.0
1,161176,266,10138,164.533333,1,11.0,11.0
2,168134,1782,9577,129.916667,1,3.0,3.0
3,201456,953,5878,82.083333,1,22.0,12.0
4,221911,60,3734,61.233333,0,16.0,5.0
...,...,...,...,...,...,...,...
21180,3150615,360,3145,46.416667,0,15.0,2.5
21181,3185749,8,20805,346.616667,1,15.0,0.0
21182,3212344,641,13422,213.016667,1,15.0,5.0
21183,3240303,332,6000,94.466667,1,15.0,5.0


In [17]:
len(cohort_df), len(df_ft1), len(df_ft_new), len(df_peep)

(21185, 21185, 21185, 21185)

In [18]:
outdf = cohort_df.merge(df_ft1, 
                        on=cols, how='left').merge(df_ft_new, 
                                   on=['patientunitstayid'], how='left').merge(df_aps, 
                                                                         on=['patientunitstayid'], how='left').merge(df_peep, on=cols, how='left')
outdf                                                                        

Unnamed: 0,patientunitstayid,vent_start,vent_end,vent_duration,over72h,ph_max,spo2_min,hr_min,hr_max,resp_min,resp_max,temp_min,temp_max,glucose_max,glucose_min,co2_total_max,co2_total_min,mbp_arterial_max,mbp_ni_min,apsiii,peep_max,peep_min
0,2127890,1853,4506,44.216667,0,,89.0,107.0,196.0,16.0,49.0,35.9,37.4,187.0,80.0,,,93.0,52.0,96.0,8.0,5.0
1,2519150,95,4175,68.000000,0,,88.0,96.0,116.0,13.0,28.0,37.4,38.4,194.0,106.0,,,90.0,60.0,39.0,10.0,5.0
2,919705,3012,5367,39.250000,0,7.510,85.0,58.0,73.0,15.0,20.0,35.8,36.4,288.0,219.0,,,129.0,61.0,35.0,15.0,5.0
3,1554681,44,1724,28.000000,0,7.400,91.0,87.0,113.0,0.0,23.0,36.8,37.4,,,,,88.0,56.0,71.0,,
4,260998,89,1937,30.800000,0,7.250,75.0,109.0,121.0,28.0,35.0,36.3,37.3,278.0,92.0,,,82.5,43.0,140.0,8.0,8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21180,388844,202,6205,100.050000,1,7.511,91.0,57.0,131.0,10.0,30.0,35.7,37.2,147.0,99.0,,,100.0,47.0,119.0,5.0,5.0
21181,2349749,332,9413,151.350000,1,,90.0,95.0,139.0,7.0,26.0,37.0,37.8,136.0,102.0,,,110.5,67.0,97.0,10.0,8.0
21182,3097240,446,6396,99.166667,1,7.274,89.0,82.0,134.0,19.0,34.0,36.3,37.2,237.0,93.0,,,85.0,61.0,120.0,8.0,5.0
21183,3128486,6617,17560,182.383333,1,,95.0,60.0,91.0,12.0,30.0,36.1,37.1,130.0,110.0,,,88.0,64.0,59.0,5.0,5.0


In [19]:
outdf.columns

Index(['patientunitstayid', 'vent_start', 'vent_end', 'vent_duration',
       'over72h', 'ph_max', 'spo2_min', 'hr_min', 'hr_max', 'resp_min',
       'resp_max', 'temp_min', 'temp_max', 'glucose_max', 'glucose_min',
       'co2_total_max', 'co2_total_min', 'mbp_arterial_max', 'mbp_ni_min',
       'apsiii', 'peep_max', 'peep_min'],
      dtype='object')

In [20]:
outdf.to_csv('ft17_eicu_new.csv', index=False)
os.listdir('.')

from google.colab import files
files.download('ft17_eicu_new.csv') 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [16]:
len(cohort_df), len(df_ft1), len(df_ft_new)

(21185, 21185, 21185)

In [None]:
final_df=cohort_df.merge(df_ft1, on=cols, how='left').merge(df_ft_new, on=['patientunitstayid'], how='left').merge(df_aps, on=['patientunitstayid'], how='left')
final_df

Unnamed: 0,patientunitstayid,vent_start,vent_end,vent_duration,over72h,ph_max,spo2_min,hr_min,hr_max,resp_min,resp_max,temp_min,temp_max,glucose_max,glucose_min,co2_total_max,co2_total_min,mbp_arterial_max,mbp_ni_min,apsiii
0,1842765,808,4376,59.466667,0,7.460,95.0,142.0,144.0,31.0,31.0,37.90,37.90,224.0,189.0,17.8,16.3,88.0,75.0,92.0
1,2312192,163,1967,30.066667,0,,93.0,78.0,108.0,10.0,48.0,36.70,38.30,100.0,99.0,,,96.0,63.0,96.0
2,3090755,350,2685,38.916667,0,,97.0,69.0,87.0,8.0,18.0,36.40,36.90,212.0,119.0,,,113.5,73.0,63.0
3,3056810,6502,10527,67.083333,0,7.580,90.0,85.0,106.0,12.0,18.0,36.40,37.40,149.0,95.0,43.0,36.0,94.0,68.0,
4,1174015,1478,4170,44.866667,0,,98.0,83.0,102.0,10.0,18.0,36.80,37.80,85.0,85.0,,,100.5,48.0,77.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21180,2413027,1691,6867,86.266667,1,,96.0,57.0,117.0,12.0,24.0,33.90,36.60,118.0,82.0,,,117.5,78.0,74.0
21181,726231,3175,8514,88.983333,1,7.300,93.0,77.0,160.0,13.0,33.0,37.00,38.00,276.0,199.0,,,114.5,55.0,80.0
21182,1639398,333,4800,74.450000,1,,96.0,57.0,90.0,12.0,27.0,36.27,36.83,211.0,127.0,,,78.0,51.0,39.0
21183,3053294,128,14025,231.616667,1,,93.0,74.0,124.0,16.0,22.0,37.40,40.20,292.0,106.0,,,95.0,40.0,76.0


In [None]:
final_df.to_csv('ft15_invasive_cohort_eicu.csv', index=False)
os.listdir('.')

from google.colab import files
files.download('ft15_invasive_cohort_eicu.csv') 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Pt Characteristics

stay ids, hospital ids, unit/hosp disch, disch status

gender, age, weight, height, bmi, elective surgery

apsiii, sofa

vaso, rrt

peep, fio2, plateau

In [None]:
# demographic and discharge status
sql_demo = '''
select 
patientunitstayid, 
uniquepid patient_id,
patienthealthsystemstayid hospital_stay_id, 
hospitalid hospital_id, 
unitdischargeoffset,
hospitaldischargeoffset,
unitdischargestatus, 
hospitaldischargestatus, 
gender, age,
admissionheight height
 from `physionet-data.eicu_crd.patient` 
'''
# where patientunitstayid in ({})
# '''.format(','.join(icustay_ids))

demo = run_query(sql_demo)

200859 rows
   patientunitstayid patient_id  hospital_stay_id  ...  gender  age  height
0            1137569  011-43764            844848  ...  Female   75   172.7
1            3036927  030-57208           2462651  ...    Male   51   177.8
2            3058863  030-13634           2481889  ...  Female   31   162.6
3            3072720  030-53536           2494105  ...    Male   58   173.0
4            3075429  030-59276           2496542  ...  Female   68   165.1

[5 rows x 11 columns]


In [None]:
df = cohort_df[['patientunitstayid']].merge(demo, on=['patientunitstayid'])
df

Unnamed: 0,patientunitstayid,patient_id,hospital_stay_id,hospital_id,unitdischargeoffset,hospitaldischargeoffset,unitdischargestatus,hospitaldischargestatus,gender,age,height
0,1842765,018-118610,1452206,282,20056,21061,Alive,Expired,Female,55,154.9
1,2312192,022-66531,1830876,331,1967,1972,Alive,Alive,Female,51,167.6
2,3090755,030-2838,2510060,420,2685,11406,Alive,Alive,Male,50,180.3
3,3056810,030-13353,2480103,420,10471,22096,Alive,Expired,Male,53,180.3
4,1174015,011-33563,875529,210,4170,21585,Alive,Alive,Male,77,185.4
...,...,...,...,...,...,...,...,...,...,...,...
21180,2413027,022-109239,1915057,331,6867,6867,Expired,Expired,Male,39,177.8
21181,726231,006-55048,566112,165,13417,26534,Alive,Alive,Female,53,177.0
21182,1639398,017-3874,1274213,262,4800,9257,Alive,Alive,Male,80,178.0
21183,3053294,030-5701,2476995,420,14025,16664,Alive,Alive,Female,57,160.0


In [None]:
sql_weight = '''
SELECT * FROM `physionet-data.eicu_crd_derived.pivoted_weight`
'''
weight = run_query(sql_weight)

501506 rows
   patientunitstayid  chartoffset source_table weight_type  weight
0             141168            0      patient       admit    84.3
1             141178          -14      patient       admit    54.4
2             141194         -780      patient       admit    73.9
3             141197          -25      patient       admit   102.1
4             141203        -1336      patient       admit    70.2


In [None]:
adm_w = weight[weight.weight_type=='admit'][['patientunitstayid', 'weight']]

tmp = df.merge(adm_w, on='patientunitstayid', how='left')

In [None]:
# impute missing adm weight with daily weight
for pid in tmp[tmp.weight.isna()]['patientunitstayid']:
    try:
        record = weight[weight.patientunitstayid==pid][['patientunitstayid', 'weight']].iloc[0]
#         ls.append(record)
        row = tmp[tmp['patientunitstayid']==pid].index[0]
        tmp.loc[row, 'weight'] = record['weight']
    except:
        pass
tmp

Unnamed: 0,patientunitstayid,patient_id,hospital_stay_id,hospital_id,unitdischargeoffset,hospitaldischargeoffset,unitdischargestatus,hospitaldischargestatus,gender,age,height,weight
0,1842765,018-118610,1452206,282,20056,21061,Alive,Expired,Female,55,154.9,99.1
1,2312192,022-66531,1830876,331,1967,1972,Alive,Alive,Female,51,167.6,77.3
2,3090755,030-2838,2510060,420,2685,11406,Alive,Alive,Male,50,180.3,112.6
3,3056810,030-13353,2480103,420,10471,22096,Alive,Expired,Male,53,180.3,112.6
4,1174015,011-33563,875529,210,4170,21585,Alive,Alive,Male,77,185.4,95.1
...,...,...,...,...,...,...,...,...,...,...,...,...
21180,2413027,022-109239,1915057,331,6867,6867,Expired,Expired,Male,39,177.8,83.9
21181,726231,006-55048,566112,165,13417,26534,Alive,Alive,Female,53,177.0,115.0
21182,1639398,017-3874,1274213,262,4800,9257,Alive,Alive,Male,80,178.0,60.0
21183,3053294,030-5701,2476995,420,14025,16664,Alive,Alive,Female,57,160.0,78.1


In [None]:
df = tmp.copy()

# clean unit cm
df.height = df.height.apply(lambda x: x*100 if x>0 and x<10 else x)

In [None]:
def bmi_func(w, h):
    if h==0 or w==0:
        return None
    if h>500:
        return None
    
    bmi = w/ (h/100)**2
    if bmi <100 and bmi>1:
        return bmi
    else:
        return None

df['bmi'] = df.apply(lambda r: bmi_func(r['weight'], r['height']) if r[['weight', 'height']].notnull().all() else None , axis=1)


df

Unnamed: 0,patientunitstayid,patient_id,hospital_stay_id,hospital_id,unitdischargeoffset,hospitaldischargeoffset,unitdischargestatus,hospitaldischargestatus,gender,age,height,weight,bmi
0,1842765,018-118610,1452206,282,20056,21061,Alive,Expired,Female,55,154.9,99.1,41.301975
1,2312192,022-66531,1830876,331,1967,1972,Alive,Alive,Female,51,167.6,77.3,27.518925
2,3090755,030-2838,2510060,420,2685,11406,Alive,Alive,Male,50,180.3,112.6,34.637532
3,3056810,030-13353,2480103,420,10471,22096,Alive,Expired,Male,53,180.3,112.6,34.637532
4,1174015,011-33563,875529,210,4170,21585,Alive,Alive,Male,77,185.4,95.1,27.666935
...,...,...,...,...,...,...,...,...,...,...,...,...,...
21180,2413027,022-109239,1915057,331,6867,6867,Expired,Expired,Male,39,177.8,83.9,26.539849
21181,726231,006-55048,566112,165,13417,26534,Alive,Alive,Female,53,177.0,115.0,36.707204
21182,1639398,017-3874,1274213,262,4800,9257,Alive,Alive,Male,80,178.0,60.0,18.937003
21183,3053294,030-5701,2476995,420,14025,16664,Alive,Alive,Female,57,160.0,78.1,30.507812


In [None]:
sql_elect = '''
SELECT patientunitstayid, electivesurgery FROM `physionet-data.eicu_crd.apachepredvar`
'''

sql_diag = '''
SELECT patientunitstayid
, case when apachedxgroup = 'PNA' then 1 else 0 end pneumonia
, case when apachedxgroup = 'Sepsis' then 1 else 0 end sepsis
FROM `physionet-data.eicu_crd_derived.apache_groups` 
'''

elect = run_query(sql_elect)
diag = run_query(sql_diag)

171177 rows
   patientunitstayid  electivesurgery
0             336201              NaN
1             379342              NaN
2             515595              NaN
3             560886              NaN
4             743611              NaN
200859 rows
   patientunitstayid  pneumonia  sepsis
0             377000          0       0
1            2382975          0       0
2            1276155          0       0
3             447820          0       0
4            1298098          0       0


In [None]:
elect = elect.fillna(0)

In [None]:
df = df.merge(elect, how='left').merge(diag, how='left')

df.electivesurgery.fillna(0, inplace=True)

In [None]:
df

Unnamed: 0,patientunitstayid,patient_id,hospital_stay_id,hospital_id,unitdischargeoffset,hospitaldischargeoffset,unitdischargestatus,hospitaldischargestatus,gender,age,height,weight,bmi,electivesurgery,pneumonia,sepsis
0,1842765,018-118610,1452206,282,20056,21061,Alive,Expired,Female,55,154.9,99.1,41.301975,0.0,0,1
1,2312192,022-66531,1830876,331,1967,1972,Alive,Alive,Female,51,167.6,77.3,27.518925,0.0,0,0
2,3090755,030-2838,2510060,420,2685,11406,Alive,Alive,Male,50,180.3,112.6,34.637532,1.0,0,0
3,3056810,030-13353,2480103,420,10471,22096,Alive,Expired,Male,53,180.3,112.6,34.637532,0.0,0,0
4,1174015,011-33563,875529,210,4170,21585,Alive,Alive,Male,77,185.4,95.1,27.666935,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21180,2413027,022-109239,1915057,331,6867,6867,Expired,Expired,Male,39,177.8,83.9,26.539849,0.0,0,0
21181,726231,006-55048,566112,165,13417,26534,Alive,Alive,Female,53,177.0,115.0,36.707204,0.0,0,0
21182,1639398,017-3874,1274213,262,4800,9257,Alive,Alive,Male,80,178.0,60.0,18.937003,0.0,0,0
21183,3053294,030-5701,2476995,420,14025,16664,Alive,Alive,Female,57,160.0,78.1,30.507812,0.0,0,0


In [None]:
sql_vaso = '''
with tmp as (
SELECT
  patientunitstayid,
  chartoffset,
  vasopressor # has only 1 value
FROM
  `physionet-data.eicu_crd_derived.pivoted_treatment_vasopressor`
) 
select patientunitstayid, max(vasopressor) as vasopressor
from tmp group by patientunitstayid
'''

sql_rrt = '''
with tmp as (
SELECT
   patientunitstayid, treatmentoffset, 
   case when LOWER(treatmentstring) LIKE 'renal|dialysis%'
  OR LOWER(treatmentstring) LIKE 'renal|pro%dialysis%' then 1 else 0 end dialysis
FROM
  `physionet-data.eicu_crd.treatment`
)
select 
  patientunitstayid, max(dialysis) as dialysis
from tmp group by patientunitstayid
'''

In [None]:
vaso = run_query(sql_vaso)
rrt = run_query(sql_rrt)

24381 rows
   patientunitstayid  vasopressor
0             242082            1
1             242290            1
2             242319            1
3             243334            1
4             243445            1
152429 rows
   patientunitstayid  dialysis
0             242040         0
1             242082         0
2             242083         0
3             242154         0
4             242203         0


In [None]:
eicu = df.merge(vaso[['patientunitstayid', 'vasopressor']], how='left').merge(rrt, how='left')

eicu.vasopressor.fillna(0, inplace=True)
eicu.dialysis.fillna(0, inplace=True)

eicu

Unnamed: 0,patientunitstayid,patient_id,hospital_stay_id,hospital_id,unitdischargeoffset,hospitaldischargeoffset,unitdischargestatus,hospitaldischargestatus,gender,age,height,weight,bmi,electivesurgery,pneumonia,sepsis,vasopressor,dialysis
0,1842765,018-118610,1452206,282,20056,21061,Alive,Expired,Female,55,154.9,99.1,41.301975,0.0,0,1,0.0,0.0
1,2312192,022-66531,1830876,331,1967,1972,Alive,Alive,Female,51,167.6,77.3,27.518925,0.0,0,0,0.0,0.0
2,3090755,030-2838,2510060,420,2685,11406,Alive,Alive,Male,50,180.3,112.6,34.637532,1.0,0,0,0.0,0.0
3,3056810,030-13353,2480103,420,10471,22096,Alive,Expired,Male,53,180.3,112.6,34.637532,0.0,0,0,0.0,0.0
4,1174015,011-33563,875529,210,4170,21585,Alive,Alive,Male,77,185.4,95.1,27.666935,0.0,0,0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21180,2413027,022-109239,1915057,331,6867,6867,Expired,Expired,Male,39,177.8,83.9,26.539849,0.0,0,0,0.0,0.0
21181,726231,006-55048,566112,165,13417,26534,Alive,Alive,Female,53,177.0,115.0,36.707204,0.0,0,0,1.0,0.0
21182,1639398,017-3874,1274213,262,4800,9257,Alive,Alive,Male,80,178.0,60.0,18.937003,0.0,0,0,0.0,0.0
21183,3053294,030-5701,2476995,420,14025,16664,Alive,Alive,Female,57,160.0,78.1,30.507812,0.0,0,0,1.0,1.0


### SOFA

In [None]:
sql_sofa = '''
--https://github.com/nus-mornin-lab/oxygenation_kc/blob/master/data-extraction/eICU/eicu_sofa_results.sql

--IMPORTANT: Please see around l277 for some possible issues.
--
--Based on code by Matthieu Komorowski, with changes to allow it to be used 
-- on BQ

WITH 

pat AS (
SELECT * FROM `physionet-data.eicu_crd.patient`),

lab AS (
SELECT * FROM `physionet-data.eicu_crd.lab`),

vitalperiodic AS (
SELECT * FROM `physionet-data.eicu_crd.vitalperiodic`),

vitalaperiodic AS (
SELECT * FROM `physionet-data.eicu_crd.vitalaperiodic`),

infusiondrug AS (
SELECT * FROM `physionet-data.eicu_crd.infusiondrug`),

respiratorycare AS (
SELECT * FROM `physionet-data.eicu_crd.respiratorycare`),

treatment AS (
SELECT * FROM `physionet-data.eicu_crd.treatment`),

careplangeneral AS (
SELECT * FROM `physionet-data.eicu_crd.careplangeneral`),

physicalexam AS (
SELECT * FROM `physionet-data.eicu_crd.physicalexam`),

diag AS (
SELECT * FROM `physionet-data.eicu_crd.diagnosis`),

chart AS (
SELECT * FROM `physionet-data.eicu_crd.nursecharting`),

apsiii_raw AS (
SELECT * FROM `physionet-data.eicu_crd.apachepatientresult`),

intakeoutput AS (
SELECT * FROM `physionet-data.eicu_crd.intakeoutput`),

respchart AS (
SELECT * FROM `physionet-data.eicu_crd.respiratorycharting`),


cohort1 AS (
SELECT * FROM `physionet-data.eicu_crd.patient`),


t1 as -- MAP
(
WITH tt1 as
(
select patientunitstayid,
min( case when noninvasivemean is not null then noninvasivemean else null end) as map
from vitalaperiodic
where observationoffset between -1440 and 1440
group by patientunitstayid
), 

tt2 as
(
select patientunitstayid,
min( case when systemicmean is not null then systemicmean else null end) as map
from vitalperiodic
where observationoffset between -1440 and 1440
group by patientunitstayid
)


select pt.patientunitstayid, case when tt1.map is not null then tt1.map
when tt2.map is not null then tt2.map
else null end as map
from pat pt
left outer join tt1
on tt1.patientunitstayid=pt.patientunitstayid
left outer join tt2
on tt2.patientunitstayid=pt.patientunitstayid
order by pt.patientunitstayid
),

t2 as --DOPAMINE
(
select distinct  patientunitstayid, max(
case when lower(drugname) like '%(ml/hr)%' then round(cast(drugrate as numeric)/3,3) -- rate in ml/h * 1600 mcg/ml / 80 kg / 60 min, to convert in mcg/kg/min
when lower(drugname) like '%(mcg/kg/min)%' then cast(drugrate as numeric)
else null end ) as dopa
from infusiondrug id
where lower(drugname) like '%dopamine%' and infusionoffset between -120 and 1440 and REGEXP_CONTAINS(drugrate, '^[0-9]{0,5}$') and drugrate<>'' and drugrate<>'.'
group by patientunitstayid
order by patientunitstayid


), 

t3 as  --NOREPI
(
select distinct patientunitstayid, max(case when lower(drugname) like '%(ml/hr)%' and drugrate<>''  and drugrate<>'.' then round(cast(drugrate as numeric)/300,3) -- rate in ml/h * 16 mcg/ml / 80 kg / 60 min, to convert in mcg/kg/min
when lower(drugname) like '%(mcg/min)%' and drugrate<>'' and drugrate<>'.'  then round(cast(drugrate as numeric)/80 ,3)-- divide by 80 kg
when lower(drugname) like '%(mcg/kg/min)%' and drugrate<>'' and drugrate<>'.' then cast(drugrate as numeric)
else null end ) as norepi


from infusiondrug id
where lower(drugname) like '%norepinephrine%'  and infusionoffset between -120 and 1440  and REGEXP_CONTAINS(drugrate, '^[0-9]{0,5}$') and drugrate<>'' and drugrate<>'.'
group by patientunitstayid
order by patientunitstayid


), 

t4 as  --DOBUTAMINE
(
select distinct patientunitstayid, 1 as dobu
from infusiondrug id
where lower(drugname) like '%dobutamin%' and drugrate <>'' and drugrate<>'.' and drugrate <>'0' and REGEXP_CONTAINS(drugrate, '^[0-9]{0,5}$') and infusionoffset between -120 and 1440
order by patientunitstayid
),

sofacv as
(
select pt.patientunitstayid, t1.map, t2.dopa, t3.norepi, t4.dobu,
(case when dopa>=15 or norepi>0.1 then 4
when dopa>5 or (norepi>0 and norepi <=0.1) then 3
when dopa<=5 or dobu > 0 then 2
when map <70 then 1
else 0 end) as SOFA_cv --COMPUTE SOFA CV
from cohort1 pt
left outer join t1
on t1.patientunitstayid=pt.patientunitstayid
left outer join t2
on t2.patientunitstayid=pt.patientunitstayid
left outer join t3
on t3.patientunitstayid=pt.patientunitstayid
left outer join t4
on t4.patientunitstayid=pt.patientunitstayid
order by pt.patientunitstayid
),


-- SOFA-RESPI


sofarespi as
(
with tempo2 as 
(
with tempo1 as
(
with t1 as --FIO2 from respchart
(
select *
from
(
select distinct patientunitstayid, max(cast(respchartvalue as numeric)) as rcfio2
-- , max(case when respchartvaluelabel = 'FiO2' then respchartvalue else null end) as fiO2
from respchart
where respchartoffset between -120 and 1440 and respchartvalue <> '' and REGEXP_CONTAINS(respchartvalue, '^[0-9]{0,2}$')
group by patientunitstayid
) as tempo
where rcfio2 >20 -- many values are liters per minute!
order by patientunitstayid


), 

t2 as --FIO2 from nursecharting
(
select distinct patientunitstayid, max(cast(nursingchartvalue as numeric)) as ncfio2
from chart nc
where lower(nursingchartcelltypevallabel) like '%fio2%' and REGEXP_CONTAINS(nursingchartvalue, '^[0-9]{0,2}$') and nursingchartentryoffset between -120 and 1440
group by patientunitstayid


), 

t3 as --sao2 from vitalperiodic
(
select patientunitstayid,
min( case when sao2 is not null then sao2 else null end) as sao2
from vitalperiodic
where observationoffset between -1440 and 1440
group by patientunitstayid


), 

t4 as --pao2 from lab
(
select patientunitstayid,
min(case when lower(labname) like 'pao2%' then labresult else null end) as pao2
from lab
where labresultoffset between -1440 and 1440
group by patientunitstayid


), 

t5 as --airway type combining 3 sources (1=invasive)
(


with t1 as --airway type from respcare (1=invasive) (by resp therapist!!)
(
select distinct patientunitstayid,
max(case when airwaytype in ('Oral ETT','Nasal ETT','Tracheostomy') then 1 else NULL end) as airway  -- either invasive airway or NULL
from respiratorycare
where respcarestatusoffset between -1440 and 1440


group by patientunitstayid-- , respcarestatusoffset
-- order by patientunitstayid-- , respcarestatusoffset
),


t2 as --airway type from respcharting (1=invasive)
(
select distinct patientunitstayid, 1 as ventilator
from respchart rc
where respchartvalue like '%ventilator%'
or respchartvalue like '%vent%'
or respchartvalue like '%bipap%'
or respchartvalue like '%840%'
or respchartvalue like '%cpap%'
or respchartvalue like '%drager%'
or respchartvalue like 'mv%'
or respchartvalue like '%servo%'
or respchartvalue like '%peep%'
and respchartoffset between -1440 and 1440
group by patientunitstayid
-- order by patientunitstayid
),


t3 as --airway type from treatment (1=invasive)


(
select distinct patientunitstayid, max(case when treatmentstring in
('pulmonary|ventilation and oxygenation|mechanical ventilation',
'pulmonary|ventilation and oxygenation|tracheal suctioning',
'pulmonary|ventilation and oxygenation|ventilator weaning',
'pulmonary|ventilation and oxygenation|mechanical ventilation|assist controlled',
'pulmonary|radiologic procedures / bronchoscopy|endotracheal tube',
'pulmonary|ventilation and oxygenation|oxygen therapy (> 60%)',
'pulmonary|ventilation and oxygenation|mechanical ventilation|tidal volume 6-10 ml/kg',
'pulmonary|ventilation and oxygenation|mechanical ventilation|volume controlled',
'surgery|pulmonary therapies|mechanical ventilation',
'pulmonary|surgery / incision and drainage of thorax|tracheostomy',
'pulmonary|ventilation and oxygenation|mechanical ventilation|synchronized intermittent',
'pulmonary|surgery / incision and drainage of thorax|tracheostomy|performed during current admission for ventilatory support',
'pulmonary|ventilation and oxygenation|ventilator weaning|active',
'pulmonary|ventilation and oxygenation|mechanical ventilation|pressure controlled',
'pulmonary|ventilation and oxygenation|mechanical ventilation|pressure support',
'pulmonary|ventilation and oxygenation|ventilator weaning|slow',
'surgery|pulmonary therapies|ventilator weaning',
'surgery|pulmonary therapies|tracheal suctioning',
'pulmonary|radiologic procedures / bronchoscopy|reintubation',
'pulmonary|ventilation and oxygenation|lung recruitment maneuver',
'pulmonary|surgery / incision and drainage of thorax|tracheostomy|planned',
'surgery|pulmonary therapies|ventilator weaning|rapid',
'pulmonary|ventilation and oxygenation|prone position',
'pulmonary|surgery / incision and drainage of thorax|tracheostomy|conventional',
'pulmonary|ventilation and oxygenation|mechanical ventilation|permissive hypercapnea',
'surgery|pulmonary therapies|mechanical ventilation|synchronized intermittent',
'pulmonary|medications|neuromuscular blocking agent',
'surgery|pulmonary therapies|mechanical ventilation|assist controlled',
'pulmonary|ventilation and oxygenation|mechanical ventilation|volume assured',
'surgery|pulmonary therapies|mechanical ventilation|tidal volume 6-10 ml/kg',
'surgery|pulmonary therapies|mechanical ventilation|pressure support',
'pulmonary|ventilation and oxygenation|non-invasive ventilation',
'pulmonary|ventilation and oxygenation|non-invasive ventilation|face mask',
'pulmonary|ventilation and oxygenation|non-invasive ventilation|nasal mask',
'pulmonary|ventilation and oxygenation|mechanical ventilation|non-invasive ventilation',
'pulmonary|ventilation and oxygenation|mechanical ventilation|non-invasive ventilation|face mask',
'surgery|pulmonary therapies|non-invasive ventilation',
'surgery|pulmonary therapies|non-invasive ventilation|face mask',
'pulmonary|ventilation and oxygenation|mechanical ventilation|non-invasive ventilation|nasal mask',
'surgery|pulmonary therapies|non-invasive ventilation|nasal mask',
'surgery|pulmonary therapies|mechanical ventilation|non-invasive ventilation',
'surgery|pulmonary therapies|mechanical ventilation|non-invasive ventilation|face mask'
) then 1  else NULL end) as interface   -- either ETT/NiV or NULL
from treatment
where treatmentoffset between -1440 and 1440
group by patientunitstayid-- , treatmentoffset, interface
order by patientunitstayid-- , treatmentoffset
),

t4 as
(
select distinct patientunitstayid,
max(case when cplitemvalue like '%Intubated%' then 1 else NULL end) as airway  -- either invasive airway or NULL
from careplangeneral
where cplitemoffset between -1440 and 1440
group by patientunitstayid -- , respcarestatusoffset


)

--Note from Michael
--
--Previously the below line was "case when t1.airway is not null or t2.ventilator is not null or t3.interface is not null or t4.interface is not null then 1 else null end as mechvent
--
--t4 doesn't have interface, removing

select pt.patientunitstayid,
case when t1.airway is not null or t2.ventilator is not null or t3.interface is not null then 1 else null end as mechvent --summarize
from cohort1 pt
left outer join t1
on t1.patientunitstayid=pt.patientunitstayid
left outer join t2
on t2.patientunitstayid=pt.patientunitstayid
left outer join t3
on t3.patientunitstayid=pt.patientunitstayid
left outer join t4
on t4.patientunitstayid=pt.patientunitstayid

--Note from Michael
--
--Previously the last line was "on t4.patientunitstayid=pt.patientunitstayidorder by pt.patientunitstayid"
--
--No idea what this is. "patientunitstayidorder site:eicu-crd.mit.edu" has no hits.


)


select pt.patientunitstayid, t3.sao2, t4.pao2, 
(case when t1.rcfio2>20 then t1.rcfio2 when t2.ncfio2 >20 then t2.ncfio2  when t1.rcfio2=1 or t2.ncfio2=1 then 100 else null end) as fio2, t5.mechvent
from cohort1 pt
left outer join t1
on t1.patientunitstayid=pt.patientunitstayid
left outer join t2
on t2.patientunitstayid=pt.patientunitstayid
left outer join t3
on t3.patientunitstayid=pt.patientunitstayid
left outer join t4
on t4.patientunitstayid=pt.patientunitstayid
left outer join t5
on t5.patientunitstayid=pt.patientunitstayid
-- order by pt.patientunitstayid
)


select *, -- coalesce(fio2,nullif(fio2,0),21) as fn, nullif(fio2,0) as nullifzero, coalesce(coalesce(nullif(fio2,0),21),fio2,21) as ifzero21 ,
coalesce(pao2,100)/coalesce(coalesce(nullif(fio2,0),21),fio2,21) as pf, coalesce(sao2,100)/coalesce(coalesce(nullif(fio2,0),21),fio2,21) as sf
from tempo1
)


select patientunitstayid, 
(case when pf <1 or sf <0.67 then 4  --COMPUTE SOFA RESPI
when pf between 1 and 2 or sf between 0.67 and 1.41 then 3
when pf between 2 and 3 or sf between 1.42 and 2.2 then 2
when pf between 3 and 4 or sf between 2.21 and 3.01 then 1
when pf > 4 or sf> 3.01 then 0 else 0 end ) as SOFA_respi
from tempo2
order by patientunitstayid
),


-- SOFA-RENAL


sofarenal as
(
with t1 as --CREATININE
(
select pt.patientunitstayid,
max(case when lower(labname) like 'creatin%' then labresult else null end) as creat
from pat pt
left outer join lab
on pt.patientunitstayid=lab.patientunitstayid
where labresultoffset between -1440 and 1440
group by pt.patientunitstayid


),

t2 as --UO
(


with uotemp as
(
select patientunitstayid,
case when dayz=1 then sum(outputtotal) else null end as uod1
from
(


select distinct patientunitstayid, intakeoutputoffset, outputtotal,
(CASE
WHEN  (intakeoutputoffset) between -120 and 1440 THEN 1
else null
end) as dayz
from intakeoutput
where intakeoutputoffset between 0 and 5760
order by patientunitstayid, intakeoutputoffset


) as temp
group by patientunitstayid, temp.dayz
)


select pt.patientunitstayid,
max(case when uod1 is not null then uod1 else null end) as UO
from pat pt
left outer join uotemp
on uotemp.patientunitstayid=pt.patientunitstayid
group by pt.patientunitstayid


)


select pt.patientunitstayid, -- t1.creat, t2.uo,
(case --COMPUTE SOFA RENAL
when uo <200 or creat>5 then 4
when uo <500 or creat >3.5 then 3
when creat between 2 and 3.5 then 2
when creat between 1.2 and 2 then 1
else 0
end) as sofarenal
from cohort1 pt
left outer join t1
on t1.patientunitstayid=pt.patientunitstayid
left outer join t2
on t2.patientunitstayid=pt.patientunitstayid
order by pt.patientunitstayid
-- group by pt.patientunitstayid, t1.creat, t2.uo


),


-- SOFA- GCS, liver, platelets


sofa3others as
(
with t1 as --GCS
(
select patientunitstayid, sum(cast(physicalexamvalue as numeric)) as gcs
from physicalexam pe
where (lower(physicalexampath) like '%gcs/eyes%'
or lower(physicalexampath) like '%gcs/verbal%'
or lower(physicalexampath) like '%gcs/motor%')
and physicalexamoffset between -1440 and 1440
group by patientunitstayid--, physicalexamoffset
), t2 as
(
select pt.patientunitstayid,
max(case when lower(labname) like 'total bili%' then labresult else null end) as bili, --BILI
min(case when lower(labname) like 'platelet%' then labresult else null end) as plt --PLATELETS
from pat pt
left outer join lab
on pt.patientunitstayid=lab.patientunitstayid
where labresultoffset between -1440 and 1440
group by pt.patientunitstayid
)


select distinct pt.patientunitstayid, min(t1.gcs) as gcs, max(t2.bili) as bili, min(t2.plt) as plt,
max(case when plt<20 then 4
when plt<50 then 3
when plt<100 then 2
when plt<150 then 1
else 0 end) as sofacoag,
max(case when bili>12 then 4
when bili>6 then 3
when bili>2 then 2
when bili>1.2 then 1
else 0 end) as sofaliver,
max(case when gcs=15 then 0
when gcs>=13 then 1
when gcs>=10 then 2
when gcs>=6 then 3
when gcs>=3 then 4
else 0 end) as sofacns
from cohort1 pt
left outer join t1
on t1.patientunitstayid=pt.patientunitstayid
left outer join t2
on t2.patientunitstayid=pt.patientunitstayid
group by pt.patientunitstayid, t1.gcs, t2.bili, t2.plt
order by pt.patientunitstayid
)


-- SOFA: COMBINE ALL SUBSCORES 


Select pt.patientunitstayid, --  sofacv.sofa_cv, sofarespi.sofa_respi,sofarenal.sofarenal,sofa3others.sofacoag,sofa3others.sofaliver,sofa3others.sofacns, 
sofacv.sofa_cv+sofarespi.sofa_respi+ sofarenal.sofarenal+sofa3others.sofacoag+ sofa3others.sofaliver+sofa3others.sofacns as sofatotal
From cohort1 pt
Left outer join sofacv
On pt.patientunitstayid=sofacv.Patientunitstayid
Left outer join sofarespi
On pt.patientunitstayid= sofarespi.Patientunitstayid
Left outer join sofarenal
On pt.patientunitstayid= sofarenal.Patientunitstayid
Left outer join sofa3others
On pt.patientunitstayid= sofa3others.Patientunitstayid


'''

In [None]:
sofa = run_query(sql_sofa)

200859 rows
   patientunitstayid  sofatotal
0            3242714         14
1             696240         15
2            2095470         19
3             658770         15
4             757345         21


In [None]:
aps = run_query(sql_apsiii)

21185 rows
   patientunitstayid  apsiii
0             528669     NaN
1             321707     NaN
2            2104979   102.0
3             567844    60.0
4            3345756    68.0


In [None]:
almost_done_df = eicu.merge(aps).merge(sofa)
almost_done_df

Unnamed: 0,patientunitstayid,patient_id,hospital_stay_id,hospital_id,unitdischargeoffset,hospitaldischargeoffset,unitdischargestatus,hospitaldischargestatus,gender,age,height,weight,bmi,electivesurgery,pneumonia,sepsis,vasopressor,dialysis,apsiii,sofatotal
0,1842765,018-118610,1452206,282,20056,21061,Alive,Expired,Female,55,154.9,99.1,41.301975,0.0,0,1,0.0,0.0,92.0,10
1,2312192,022-66531,1830876,331,1967,1972,Alive,Alive,Female,51,167.6,77.3,27.518925,0.0,0,0,0.0,0.0,96.0,9
2,3090755,030-2838,2510060,420,2685,11406,Alive,Alive,Male,50,180.3,112.6,34.637532,1.0,0,0,0.0,0.0,63.0,9
3,3056810,030-13353,2480103,420,10471,22096,Alive,Expired,Male,53,180.3,112.6,34.637532,0.0,0,0,0.0,0.0,,7
4,1174015,011-33563,875529,210,4170,21585,Alive,Alive,Male,77,185.4,95.1,27.666935,0.0,0,0,1.0,0.0,77.0,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21180,2413027,022-109239,1915057,331,6867,6867,Expired,Expired,Male,39,177.8,83.9,26.539849,0.0,0,0,0.0,0.0,74.0,6
21181,726231,006-55048,566112,165,13417,26534,Alive,Alive,Female,53,177.0,115.0,36.707204,0.0,0,0,1.0,0.0,80.0,6
21182,1639398,017-3874,1274213,262,4800,9257,Alive,Alive,Male,80,178.0,60.0,18.937003,0.0,0,0,0.0,0.0,39.0,5
21183,3053294,030-5701,2476995,420,14025,16664,Alive,Alive,Female,57,160.0,78.1,30.507812,0.0,0,0,1.0,1.0,76.0,6


## peep, fio2, plateau

In [10]:
sql_eicu_peep = f'''

with 
resp as (
SELECT patientunitstayid,
respchartoffset chartoffset
,respchartvalue peep
FROM `physionet-data.eicu_crd.respiratorycharting` 
where lower(respchartvaluelabel) like 'peep'
)
, peep_ft1 as (
  select 
  v.patientunitstayid, safe_cast(peep as float64) peep
  from `{view_id}` v
  left join resp p on v.patientunitstayid=p.patientunitstayid
  where p.chartoffset <= v.vent_start+{HOUR_GAP}*60 and p.chartoffset >= v.vent_start and safe_cast(peep as float64) < 100
)
, peep_ft2 as (
    SELECT 
    v.patientunitstayid, peep
    FROM `{view_id}` v
    left join `physionet-data.eicu_crd_derived.pivoted_bg` bg
    on v.patientunitstayid=bg.patientunitstayid
    where bg.chartoffset <= v.vent_start+{HOUR_GAP}*60 and bg.chartoffset >= v.vent_start
)
, peep_union as (
  select * from peep_ft1 
  
  union all

  select * from peep_ft2
)

select 
v.*
, max(peep) peep_max
, avg(peep) peep_avg
, min(peep) peep_min

from `{view_id}` v
left join peep_union p on p.patientunitstayid=v.patientunitstayid
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h

'''
peep = run_query(sql_eicu_peep)

21185 rows
   patientunitstayid  vent_start  vent_end  ...  peep_max   peep_avg  peep_min
0             141233         225      1672  ...      55.0   9.545455       5.0
1             161176         266     10138  ...      11.0  11.000000      11.0
2             168134        1782      9577  ...       3.0   3.000000       3.0
3             201456         953      5878  ...      22.0  16.333333      12.0
4             221911          60      3734  ...      16.0   7.000000       5.0

[5 rows x 8 columns]


In [None]:
sql_eicu_fio2=f'''

with tmp as (
SELECT 
patientunitstayid ,
respchartoffset chartoffset, 
respchartvaluelabel
, safe_cast(respchartvalue as float64) v
FROM `physionet-data.eicu_crd.respiratorycharting` where lower(respchartvaluelabel) like 'fio2%' # Fio2, Fio2 (%)  
), 

tmp2 as ( select
patientunitstayid, chartoffset
,case when v>=0.2 and v<=1 then v*100 else null end value1
,case when v>=20 and v<=100 then v else null end value2
from tmp
-- where safe_cast(v as float64) >=0 and safe_cast(v as float64) <2
) 

, fio2_1 as (
select
patientunitstayid , chartoffset, value1 fio2
from tmp2 where value1 is not null)
, fio2_2 as ( 
select
patientunitstayid , chartoffset, value1 fio2
from tmp2 where value2 is not null)

, fio2_tmp_union as (
  select * from fio2_1 
  union all
  select * from fio2_2
)

,fio2_resp as (
select v.patientunitstayid, chartoffset, fio2
from `{view_id}` v
left join fio2_tmp_union f on v.patientunitstayid = f.patientunitstayid 
where f.chartoffset <= v.vent_start+{HOUR_GAP}*60 and f.chartoffset >= v.vent_start) 

,fio2_lab as (
select v.patientunitstayid, chartoffset, fio2 * 100 fio2 
from `{view_id}` v
left join `physionet-data.eicu_crd_derived.pivoted_bg` f on v.patientunitstayid = f.patientunitstayid  
where f.chartoffset <= v.vent_start+{HOUR_GAP}*60 and f.chartoffset >= v.vent_start )

, fio2_union as (
select * from fio2_resp 
union all
select * from fio2_lab
)
select 
v.*
, max(fio2) fio2_max
, avg(fio2) fio2_mean
, min(fio2) fio2_min

from `{view_id}` v
left join fio2_union f on f.patientunitstayid=v.patientunitstayid
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h
'''

fio2 = run_query(sql_eicu_fio2)

21185 rows
   patientunitstayid  vent_start  vent_end  ...  fio2_max  fio2_mean  fio2_min
0             149229         296      9387  ...     100.0  86.666667      70.0
1             149701         368      5348  ...     100.0  81.666667      45.0
2             151880         632     12456  ...      56.0  38.666667      30.0
3             152365         411      2183  ...      70.0  56.250000      50.0
4             158800        1120     12212  ...     100.0  50.857143      30.0

[5 rows x 8 columns]


In [None]:
sql_eicu_plateau=f'''
with tmp as (
SELECT 
patientunitstayid, 
respchartentryoffset chartoffset,
respchartvaluelabel, safe_cast(respchartvalue as float64) plateau
-- distinct respchartvaluelabel
FROM `physionet-data.eicu_crd.respiratorycharting` where lower(respchartvaluelabel) like '%plateau%'
)
, plt as (
select v.patientunitstayid , plateau
FROM `{view_id}` v
left join tmp t on t.patientunitstayid = v.patientunitstayid 
where t.chartoffset <= v.vent_start+{HOUR_GAP}*60 and t.chartoffset >= v.vent_start
)
select 
v.*
, max(plateau) plateau_pr_max
, avg(plateau) plateau_pr_mean
, min(plateau) plateau_pr_min

from `{view_id}` v
left join plt p on p.patientunitstayid=v.patientunitstayid
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h


'''

plateau = run_query(sql_eicu_plateau)


21185 rows
   patientunitstayid  vent_start  ...  plateau_pr_mean  plateau_pr_min
0            3172372          12  ...        23.133333            23.0
1            3144442          83  ...        49.000000            49.0
2            1651966        4833  ...         0.100000             0.0
3             930191         224  ...        36.428571            33.0
4            2219687         661  ...        29.333333            18.0

[5 rows x 8 columns]


In [None]:
cols = ['patientunitstayid', 'vent_start', 'vent_end', 'vent_duration', 'over72h']
fts = peep.merge(fio2, on=cols).merge(plateau, on=cols)
len(fts)

21185

In [None]:
pt_char_df = almost_done_df.merge(fts.drop(columns=['vent_start', 'vent_end', 'vent_duration', 'over72h']))
pt_char_df

Unnamed: 0,patientunitstayid,patient_id,hospital_stay_id,hospital_id,unitdischargeoffset,hospitaldischargeoffset,unitdischargestatus,hospitaldischargestatus,gender,age,height,weight,bmi,electivesurgery,pneumonia,sepsis,vasopressor,dialysis,apsiii,sofatotal,peep_max,peep_mean,peep_min,fio2_max,fio2_mean,fio2_min,plateau_pr_max,plateau_pr_mean,plateau_pr_min
0,1842765,018-118610,1452206,282,20056,21061,Alive,Expired,Female,55,154.9,99.1,41.301975,0.0,0,1,0.0,0.0,92.0,10,7.0,5.50,5.0,40.0,40.0,40.0,,,
1,2312192,022-66531,1830876,331,1967,1972,Alive,Alive,Female,51,167.6,77.3,27.518925,0.0,0,0,0.0,0.0,96.0,9,10.0,8.75,5.0,,,,27.0,22.500000,17.0
2,3090755,030-2838,2510060,420,2685,11406,Alive,Alive,Male,50,180.3,112.6,34.637532,1.0,0,0,0.0,0.0,63.0,9,5.0,5.00,5.0,,,,20.0,18.666667,17.0
3,3056810,030-13353,2480103,420,10471,22096,Alive,Expired,Male,53,180.3,112.6,34.637532,0.0,0,0,0.0,0.0,,7,15.0,15.00,15.0,90.0,68.0,60.0,,,
4,1174015,011-33563,875529,210,4170,21585,Alive,Alive,Male,77,185.4,95.1,27.666935,0.0,0,0,1.0,0.0,77.0,7,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21180,2413027,022-109239,1915057,331,6867,6867,Expired,Expired,Male,39,177.8,83.9,26.539849,0.0,0,0,0.0,0.0,74.0,6,5.0,5.00,5.0,40.0,40.0,40.0,19.0,18.500000,18.0
21181,726231,006-55048,566112,165,13417,26534,Alive,Alive,Female,53,177.0,115.0,36.707204,0.0,0,0,1.0,0.0,80.0,6,5.0,5.00,5.0,60.0,50.0,40.0,11.0,11.000000,11.0
21182,1639398,017-3874,1274213,262,4800,9257,Alive,Alive,Male,80,178.0,60.0,18.937003,0.0,0,0,0.0,0.0,39.0,5,5.0,5.00,5.0,,,,21.0,15.444444,10.0
21183,3053294,030-5701,2476995,420,14025,16664,Alive,Alive,Female,57,160.0,78.1,30.507812,0.0,0,0,1.0,1.0,76.0,6,5.0,5.00,5.0,,,,16.0,14.200000,12.0


In [None]:
def handle89(x):
    try:
        return int(x)
    except:
        return 91

pt_char_df.age = pt_char_df.age.apply(lambda x: handle89(x))

In [None]:
pt_char_df.to_csv('pt_char_eicu.csv', index=False)
os.listdir('.')

from google.colab import files
files.download('pt_char_eicu.csv') 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
pt_char_df.columns

Index(['patientunitstayid', 'patient_id', 'hospital_stay_id', 'hospital_id',
       'unitdischargeoffset', 'hospitaldischargeoffset', 'unitdischargestatus',
       'hospitaldischargestatus', 'gender', 'age', 'height', 'weight', 'bmi',
       'electivesurgery', 'pneumonia', 'sepsis', 'vasopressor', 'dialysis',
       'apsiii', 'sofatotal', 'peep_max', 'peep_mean', 'peep_min', 'fio2_max',
       'fio2_mean', 'fio2_min', 'plateau_pr_max', 'plateau_pr_mean',
       'plateau_pr_min'],
      dtype='object')

# Archive

In [None]:
cols = ['patientunitstayid', 'vent_start', 'vent_end', 'vent_duration', 'over72h']
cohort_ft = pd.merge(df_ft1, df_ft2, on=cols, how='left')
len(cohort_ft)

22397

In [None]:
cohort_ft.head()

Unnamed: 0,patientunitstayid,vent_start,vent_end,vent_duration,over72h,ph_max,spo2_min,hr_min,hr_max,resp_min,resp_max,temp_min,temp_max,peep_max
0,3072790,18,1804,29.766667,0,,96.0,85.0,98.0,8.0,16.0,35.7,36.6,5.0
1,2767867,24,3204,53.0,0,,96.0,81.0,81.0,17.0,17.0,37.9,37.9,
2,3176933,121,2405,38.066667,0,7.42,95.0,67.0,120.0,8.0,17.0,35.6,37.8,5.0
3,271620,141,4158,66.95,0,,91.0,65.0,88.0,14.0,14.0,36.2,37.5,
4,3183892,222,9121,148.316667,1,7.43,83.0,66.0,163.0,12.0,58.0,34.8,38.4,


In [None]:
cohort_ft.to_csv('ft9_invasive_cohort_eicu.csv', index=False)
os.listdir('.')

['.config', 'ft9_invasive_cohort_eicu.csv', 'adc.json', 'sample_data']

In [None]:
from google.colab import files
files.download('ft9_invasive_cohort_eicu.csv') 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
cohort_demo.to_csv('ft9_invasive_cohort_eicu_demo_mort.csv', index=False)
files.download('ft9_invasive_cohort_eicu_demo_mort.csv') 

In [None]:
sql_peep = f'''
with 
resp as (
SELECT patientunitstayid,
respchartoffset
,lower(respchartvaluelabel) label
,respchartvalue
FROM `physionet-data.eicu_crd.respiratorycharting` 
)
, peep_tmp as (
select 
patientunitstayid
,respchartoffset chartoffset
,respchartvalue peep
from resp
where label like 'peep'
)
, peep_ft1 as (
  select 
  v.patientunitstayid, safe_cast(peep as float64) peep
  from `{view_id}` v
  left join peep_tmp p on v.patientunitstayid=p.patientunitstayid
  where p.chartoffset <= v.vent_start + {HOUR_GAP}*60 and p.chartoffset >= v.vent_start and safe_cast(peep as float64) < 100
)
, peep_ft2 as (
    SELECT 
    v.patientunitstayid, peep
    FROM `{view_id}` v
    left join `physionet-data.eicu_crd_derived.pivoted_bg` bg
    on v.patientunitstayid=bg.patientunitstayid
    where bg.chartoffset <= v.vent_start + {HOUR_GAP}*60 and bg.chartoffset >= v.vent_start
)
, peep_union as (
  select * from peep_ft1 
  
  union all

  select * from peep_ft2
)

select 
v.*
, max(peep) peep_max

from `{view_id}` v
left join peep_union p on p.patientunitstayid=v.patientunitstayid
group by patientunitstayid, vent_start, vent_end, vent_duration, over72h

'''