Instructions:
- run the "Setup" cell
- login to your google account - both Danni and Miguel have been granted access to the google cloud project
- go to "Get Data" and run the cell
- move on to "Causal Inference" for the main analysis

# Setup

In [0]:
!pip install -U tableone -q

# Import libraries
import os
import warnings

# Set up environment variables
project_id='amsterdam-translation'
os.environ["GOOGLE_CLOUD_PROJECT"]=project_id

warnings.filterwarnings("ignore")

# Access data using Google BigQuery.
from google.colab import auth
from google.cloud import bigquery

from IPython.display import display, HTML
import matplotlib.pyplot as plt
import scipy.stats as ss
import numpy as np
np.random.seed(2020)
import pandas as pd
from scipy.stats import uniform, randint
from sklearn.preprocessing import LabelEncoder,StandardScaler, OneHotEncoder
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.metrics import classification_report, make_scorer, average_precision_score, roc_auc_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from tableone import TableOne

import statsmodels.api as sm

pd.set_option('mode.chained_assignment', None)

import seaborn as sns
import matplotlib.pyplot as plt

# authenticate
auth.authenticate_user()

Target Trial Checklist:
- Eligibility criteria:
    - Postmenopausal women within 5 years of menopause between the years 2005 and 2010 and with no history of
cancer and no use of hormone therapy in the past 2 years.
- Treatment strategies:
    - Refrain from taking hormone therapy during the follow-up. Initiate estrogen plus progestin hormone therapy at
baseline and remain on it during the follow-up unless you are diagnosed with deep vein thrombosis,
pulmonary embolism, myocardial infarction, or cancer.
- Assignment procedures:
    - Participants will be randomly assigned to either strategy at baseline and will be aware of the strategy to which
they have been assigned.
- Follow-up period:
    - Starts at randomization and ends at diagnosis of breast cancer, death, loss to follow-up, or 5 years after baseline,
whichever occurs first.
- Outcome:
    - Breast cancer diagnosed by an oncologist within 5 years of baseline
- Causal contrasts of interest:
    - Intention-to-treat effect, per-protocol effect
- Analysis plan
    - Intention-to-treat effect estimated via comparison of 5-year cancer risks among individuals assigned to each
treatment strategy.
    - Per-protocol effect estimation requires adjustments for pre- and postbaseline prognostic
factors associated with adherence to the strategies of interest. All analyses will be adjusted for pre- and
postbaseline prognostic factors associated with loss to follow-up (57). This analysis plan implies that the
investigators prespecify and collect data on the adjustment factors.

# Data Extraction

## MIMIC III SQL code

In [0]:
%%bigquery 

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.pivoted_bg_art` AS (
    WITH pivoted_bg as (
    -- create a table which has fuzzy boundaries on ICU admission
    -- involves first creating a lag/lead version of intime/outtime
        with i as
        (
        select
            subject_id, icustay_id, intime, outtime
            , lag (outtime) over (partition by subject_id order by intime) as outtime_lag
            , lead (intime) over (partition by subject_id order by intime) as intime_lead
        from `physionet-data.mimiciii_clinical.icustays`
        )
        , iid_assign as
        (
        select
            i.subject_id, i.icustay_id
            -- this rule is:
            --  if there are two hospitalizations within 24 hours, set the start/stop
            --  time as half way between the two admissions
            , case
                when i.outtime_lag is not null
                and i.outtime_lag > DATETIME_SUB(i.intime,interval '24' hour)
                then DATETIME_SUB(i.intime, interval CAST(DATETIME_DIFF(i.intime,i.outtime_lag,HOUR)/2 AS INT64) HOUR)
            else DATETIME_SUB(i.intime,interval '12' hour)
            end as data_start
            , case
                when i.intime_lead is not null
                and i.intime_lead < DATETIME_ADD(i.outtime,interval '24' hour)
                then DATETIME_ADD(i.outtime, interval CAST(DATETIME_DIFF(i.intime_lead,i.outtime,HOUR)/2 AS INT64) HOUR)
            else DATETIME_ADD(i.outtime, interval '12' hour)
            end as data_end
            from i
        )
        , pvt as
        ( -- begin query that extracts the data
        select le.hadm_id
        -- here we assign labels to ITEMIDs
        -- this also fuses together multiple ITEMIDs containing the same data
            , case
                when itemid = 50800 then 'SPECIMEN'
                when itemid = 50801 then 'AADO2'
                when itemid = 50802 then 'BASEEXCESS'
                when itemid = 50803 then 'BICARBONATE'
                when itemid = 50804 then 'TOTALCO2'
                when itemid = 50805 then 'CARBOXYHEMOGLOBIN'
                when itemid = 50806 then 'CHLORIDE'
                when itemid = 50808 then 'CALCIUM'
                when itemid = 50809 then 'GLUCOSE'
                when itemid = 50810 then 'HEMATOCRIT'
                when itemid = 50811 then 'HEMOGLOBIN'
                when itemid = 50812 then 'INTUBATED'
                when itemid = 50813 then 'LACTATE'
                when itemid = 50814 then 'METHEMOGLOBIN'
                when itemid = 50815 then 'O2FLOW'
                when itemid = 50816 then 'FIO2'
                when itemid = 50817 then 'SO2' -- OXYGENSATURATION
                when itemid = 50818 then 'PCO2'
                when itemid = 50819 then 'PEEP'
                when itemid = 50820 then 'PH'
                when itemid = 50821 then 'PO2'
                when itemid = 50822 then 'POTASSIUM'
                when itemid = 50823 then 'REQUIREDO2'
                when itemid = 50824 then 'SODIUM'
                when itemid = 50825 then 'TEMPERATURE'
                when itemid = 50826 then 'TIDALVOLUME'
                when itemid = 50827 then 'VENTILATIONRATE'
                when itemid = 50828 then 'VENTILATOR'
                else null
                end as label
                , charttime
                , value
                -- add in some sanity checks on the values
                , case
                when valuenum <= 0 then null
                when itemid = 50810 and valuenum > 100 then null -- hematocrit
                -- ensure FiO2 is a valid number between 21-100
                -- mistakes are rare (<100 obs out of ~100,000)
                -- there are 862 obs of valuenum == 20 - some people round down!
                -- rather than risk imputing garbage data for FiO2, we simply NULL invalid values
                when itemid = 50816 and valuenum < 20 then null
                when itemid = 50816 and valuenum > 100 then null
                when itemid = 50817 and valuenum > 100 then null -- O2 sat
                when itemid = 50815 and valuenum >  70 then null -- O2 flow
                when itemid = 50821 and valuenum > 800 then null -- PO2
                -- conservative upper limit
                else valuenum
                end as valuenum
            from `physionet-data.mimiciii_clinical.labevents` le
            where le.ITEMID in
            -- blood gases
            (
            50800, 50801, 50802, 50803, 50804, 50805, 50806, 50807, 50808, 50809
            , 50810, 50811, 50812, 50813, 50814, 50815, 50816, 50817, 50818, 50819
            , 50820, 50821, 50822, 50823, 50824, 50825, 50826, 50827, 50828
            , 51545
            )
        )
        , grp as
        (
        select pvt.hadm_id, pvt.charttime
        , max(case when label = 'SPECIMEN' then value else null end) as SPECIMEN
        , avg(case when label = 'AADO2' then valuenum else null end) as AADO2
        , avg(case when label = 'BASEEXCESS' then valuenum else null end) as BASEEXCESS
        , avg(case when label = 'BICARBONATE' then valuenum else null end) as BICARBONATE
        , avg(case when label = 'TOTALCO2' then valuenum else null end) as TOTALCO2
        , avg(case when label = 'CARBOXYHEMOGLOBIN' then valuenum else null end) as CARBOXYHEMOGLOBIN
        , avg(case when label = 'CHLORIDE' then valuenum else null end) as CHLORIDE
        , avg(case when label = 'CALCIUM' then valuenum else null end) as CALCIUM
        , avg(case when label = 'GLUCOSE' then valuenum else null end) as GLUCOSE
        , avg(case when label = 'HEMATOCRIT' then valuenum else null end) as HEMATOCRIT
        , avg(case when label = 'HEMOGLOBIN' then valuenum else null end) as HEMOGLOBIN
        , avg(case when label = 'INTUBATED' then valuenum else null end) as INTUBATED
        , avg(case when label = 'LACTATE' then valuenum else null end) as LACTATE
        , avg(case when label = 'METHEMOGLOBIN' then valuenum else null end) as METHEMOGLOBIN
        , avg(case when label = 'O2FLOW' then valuenum else null end) as O2FLOW
        , avg(case when label = 'FIO2' then valuenum else null end) as FIO2
        , avg(case when label = 'SO2' then valuenum else null end) as SO2 -- OXYGENSATURATION
        , avg(case when label = 'PCO2' then valuenum else null end) as PCO2
        , avg(case when label = 'PEEP' then valuenum else null end) as PEEP
        , avg(case when label = 'PH' then valuenum else null end) as PH
        , avg(case when label = 'PO2' then valuenum else null end) as PO2
        , avg(case when label = 'POTASSIUM' then valuenum else null end) as POTASSIUM
        , avg(case when label = 'REQUIREDO2' then valuenum else null end) as REQUIREDO2
        , avg(case when label = 'SODIUM' then valuenum else null end) as SODIUM
        , avg(case when label = 'TEMPERATURE' then valuenum else null end) as TEMPERATURE
        , avg(case when label = 'TIDALVOLUME' then valuenum else null end) as TIDALVOLUME
        , max(case when label = 'VENTILATIONRATE' then valuenum else null end) as VENTILATIONRATE
        , max(case when label = 'VENTILATOR' then valuenum else null end) as VENTILATOR
        from pvt
        group by pvt.hadm_id, pvt.charttime
        -- remove observations if there is more than one specimen listed
        -- we do not know whether these are arterial or mixed venous, etc...
        -- happily this is a small fraction of the total number of observations
        having sum(case when label = 'SPECIMEN' then 1 else 0 end)<2
        )
        select
        iid.icustay_id, grp.*
        from grp
        inner join `physionet-data.mimiciii_clinical.admissions` adm
        on grp.hadm_id = adm.hadm_id
        left join iid_assign iid
        on adm.subject_id = iid.subject_id
        and grp.charttime >= iid.data_start
        and grp.charttime < iid.data_end
        order by grp.hadm_id, grp.charttime
    ), pivoted_bg_art AS (
        with stg_spo2 as
        (
        select HADM_ID, CHARTTIME
            -- avg here is just used to group SpO2 by charttime
            , avg(valuenum) as SpO2
        from `physionet-data.mimiciii_clinical.chartevents`
        -- o2 sat
        where ITEMID in
        (
            646 -- SpO2
        , 220277 -- O2 saturation pulseoxymetry
        )
        and valuenum > 0 and valuenum <= 100
        group by HADM_ID, CHARTTIME
        )
        , stg_fio2 as
        (
        select HADM_ID, CHARTTIME
            -- pre-process the FiO2s to ensure they are between 21-100%
            , max(
                case
                when itemid = 223835
                    then case
                    when valuenum > 0 and valuenum <= 1
                        then valuenum * 100
                    -- improperly input data - looks like O2 flow in litres
                    when valuenum > 1 and valuenum < 21
                        then null
                    when valuenum >= 21 and valuenum <= 100
                        then valuenum
                    else null end -- unphysiological
                when itemid in (3420, 3422)
                -- all these values are well formatted
                    then valuenum
                when itemid = 190 and valuenum > 0.20 and valuenum < 1
                -- well formatted but not in %
                    then valuenum * 100
            else null end
            ) as fio2_chartevents
        from `physionet-data.mimiciii_clinical.chartevents`
        where ITEMID in
        (
            3420 -- FiO2
        , 190 -- FiO2 set
        , 223835 -- Inspired O2 Fraction (FiO2)
        , 3422 -- FiO2 [measured]
        )
        and valuenum > 0 and valuenum < 100
        -- exclude rows marked as error
        and error != 1
        group by HADM_ID, CHARTTIME
        )
        , stg2 as
        (
        select bg.*
        , ROW_NUMBER() OVER (partition by bg.hadm_id, bg.charttime order by s1.charttime DESC) as lastRowSpO2
        , s1.spo2
        from pivoted_bg bg
        left join stg_spo2 s1
        -- same hospitalization
        on  bg.hadm_id = s1.hadm_id
        -- spo2 occurred at most 2 hours before this blood gas
        and s1.charttime between DATETIME_SUB(bg.charttime, interval '2' hour) and bg.charttime
        where bg.po2 is not null
        )
        , stg3 as
        (
        select bg.*
        , ROW_NUMBER() OVER (partition by bg.hadm_id, bg.charttime order by s2.charttime DESC) as lastRowFiO2
        , s2.fio2_chartevents

        -- create our specimen prediction
        ,  1/(1+exp(-(-0.02544
        +    0.04598 * po2
        + coalesce(-0.15356 * spo2             , -0.15356 *   97.49420 +    0.13429)
        + coalesce( 0.00621 * fio2_chartevents ,  0.00621 *   51.49550 +   -0.24958)
        + coalesce( 0.10559 * hemoglobin       ,  0.10559 *   10.32307 +    0.05954)
        + coalesce( 0.13251 * so2              ,  0.13251 *   93.66539 +   -0.23172)
        + coalesce(-0.01511 * pco2             , -0.01511 *   42.08866 +   -0.01630)
        + coalesce( 0.01480 * fio2             ,  0.01480 *   63.97836 +   -0.31142)
        + coalesce(-0.00200 * aado2            , -0.00200 *  442.21186 +   -0.01328)
        + coalesce(-0.03220 * bicarbonate      , -0.03220 *   22.96894 +   -0.06535)
        + coalesce( 0.05384 * totalco2         ,  0.05384 *   24.72632 +   -0.01405)
        + coalesce( 0.08202 * lactate          ,  0.08202 *    3.06436 +    0.06038)
        + coalesce( 0.10956 * ph               ,  0.10956 *    7.36233 +   -0.00617)
        + coalesce( 0.00848 * o2flow           ,  0.00848 *    7.59362 +   -0.35803)
        ))) as SPECIMEN_PROB
        from stg2 bg
        left join stg_fio2 s2
        -- same patient
        on  bg.hadm_id = s2.hadm_id
        -- fio2 occurred at most 4 hours before this blood gas
        and s2.charttime between DATETIME_SUB(bg.charttime, interval '4' hour) and bg.charttime
        and s2.fio2_chartevents > 0
        where bg.lastRowSpO2 = 1 -- only the row with the most recent SpO2 (if no SpO2 found lastRowSpO2 = 1)
        )
        select
            stg3.hadm_id
        , stg3.icustay_id
        , stg3.charttime
        , SPECIMEN -- raw data indicating sample type, only present 80% of the time
        -- prediction of specimen for missing data
        , case
                when SPECIMEN is not null then SPECIMEN
                when SPECIMEN_PROB > 0.75 then 'ART'
            else null end as SPECIMEN_PRED
        , SPECIMEN_PROB

        -- oxygen related parameters
        , SO2, spo2 -- note spo2 is from chartevents
        , PO2, PCO2
        , fio2_chartevents, FIO2
        , AADO2
        -- also calculate AADO2
        , case
            when  PO2 is not null
                and pco2 is not null
                and coalesce(FIO2, fio2_chartevents) is not null
            -- multiple by 100 because FiO2 is in a % but should be a fraction
                then (coalesce(FIO2, fio2_chartevents)/100) * (760 - 47) - (pco2/0.8) - po2
            else null
            end as AADO2_calc
        , case
            when PO2 is not null and coalesce(FIO2, fio2_chartevents) is not null
            -- multiply by 100 because FiO2 is in a % but should be a fraction
                then 100*PO2/(coalesce(FIO2, fio2_chartevents))
            else null
            end as PaO2FiO2Ratio
        -- acid-base parameters
        , PH, BASEEXCESS
        , BICARBONATE, TOTALCO2

        -- blood count parameters
        , HEMATOCRIT
        , HEMOGLOBIN
        , CARBOXYHEMOGLOBIN
        , METHEMOGLOBIN

        -- chemistry
        , CHLORIDE, CALCIUM
        , TEMPERATURE
        , POTASSIUM, SODIUM
        , LACTATE
        , GLUCOSE

        -- ventilation stuff that's sometimes input
        , INTUBATED, TIDALVOLUME, VENTILATIONRATE, VENTILATOR
        , PEEP, O2Flow
        , REQUIREDO2
        from stg3
        where lastRowFiO2 = 1 -- only the most recent FiO2
        -- restrict it to *only* arterial samples
        and (SPECIMEN = 'ART' or SPECIMEN_PROB > 0.75)
        order by hadm_id, charttime
    )
    SELECT * FROM pivoted_bg_art
);

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.pivoted_uo` AS (
    WITH pivoted_uo AS (
        select
        icustay_id
        , charttime
        , sum(UrineOutput) as UrineOutput
        from
        (
        select
        -- patient identifiers
            oe.icustay_id
        , oe.charttime
        -- volumes associated with urine output ITEMIDs
        -- note we consider input of GU irrigant as a negative volume
        , case
            when oe.itemid = 227488 and oe.value > 0 then -1*oe.value
            else oe.value
            end as UrineOutput
        from `physionet-data.mimiciii_clinical.outputevents` oe
        WHERE itemid in
        (
        -- these are the most frequently occurring urine output observations in CareVue
        40055, -- "Urine Out Foley"
        43175, -- "Urine ."
        40069, -- "Urine Out Void"
        40094, -- "Urine Out Condom Cath"
        40715, -- "Urine Out Suprapubic"
        40473, -- "Urine Out IleoConduit"
        40085, -- "Urine Out Incontinent"
        40057, -- "Urine Out Rt Nephrostomy"
        40056, -- "Urine Out Lt Nephrostomy"
        40405, -- "Urine Out Other"
        40428, -- "Urine Out Straight Cath"
        40086,--	Urine Out Incontinent
        40096, -- "Urine Out Ureteral Stent #1"
        40651, -- "Urine Out Ureteral Stent #2"

        -- these are the most frequently occurring urine output observations in CareVue
        226559, -- "Foley"
        226560, -- "Void"
        226561, -- "Condom Cath"
        226584, -- "Ileoconduit"
        226563, -- "Suprapubic"
        226564, -- "R Nephrostomy"
        226565, -- "L Nephrostomy"
        226567, --	Straight Cath
        226557, -- R Ureteral Stent
        226558, -- L Ureteral Stent
        227488, -- GU Irrigant Volume In
        227489  -- GU Irrigant/Urine Volume Out
        )
        ) t1
        group by t1.icustay_id, t1.charttime
        order by t1.icustay_id, t1.charttime
    )
    SELECT * FROM pivoted_uo
); 

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.weightdurations` AS (
    WITH echodata AS (
        select ROW_ID
        , subject_id, hadm_id
        , chartdate
        -- charttime is always null for echoes..
        -- however, the time is available in the echo text, e.g.:
        -- , substring(ne.text, 'Date/Time: [\[\]0-9*-]+ at ([0-9:]+)') as TIMESTAMP
        -- we can therefore impute it and re-create charttime
        , cast(parse_timestamp('%Y-%m-%d %H:%M',CONCAT(chartdate,' ',REGEXP_EXTRACT(ne.text,r'Date/Time: [\[\]0-9*-]+ at ([0-9:]+)'))) as DATETIME) as charttime

        -- explanation of below substring:
        --  'Indication: ' - matched verbatim
        --  (.*?) - match any character
        --  \n - the end of the line
        -- substring only returns the item in ()s
        -- note: the '?' makes it non-greedy. if you exclude it, it matches until it reaches the *last* \n

        , REGEXP_EXTRACT(ne.text, r'Indication: (.*?)\n') as Indication

        -- sometimes numeric values contain de-id text, e.g. [** Numeric Identifier **]
        -- this removes that text
        , case
            when REGEXP_EXTRACT(ne.text, r'Height: \(in\) (.*?)\n') like '%*%'
                then null
            else cast(REGEXP_EXTRACT(ne.text, r'Height: \(in\) (.*?)\n') as numeric)
            end as Height

        , case
            when REGEXP_EXTRACT(ne.text, r'Weight \(lb\): (.*?)\n') like '%*%'
                then null
            else cast(REGEXP_EXTRACT(ne.text, r'Weight \(lb\): (.*?)\n') as numeric)
            end as Weight

        , case
            when REGEXP_EXTRACT(ne.text, r'BSA \(m2\): (.*?) m2\n') like '%*%'
                then null
            else cast(REGEXP_EXTRACT(ne.text, r'BSA \(m2\): (.*?) m2\n') as numeric)
            end as BSA -- ends in 'm2'

        , REGEXP_EXTRACT(ne.text, r'BP \(mm Hg\): (.*?)\n') as BP -- Sys/Dias

        , case
            when REGEXP_EXTRACT(ne.text, r'BP \(mm Hg\): ([0-9]+)/[0-9]+?\n') like '%*%'
                then null
            else cast(REGEXP_EXTRACT(ne.text, r'BP \(mm Hg\): ([0-9]+)/[0-9]+?\n') as numeric)
            end as BPSys -- first part of fraction

        , case
            when REGEXP_EXTRACT(ne.text, r'BP \(mm Hg\): [0-9]+/([0-9]+?)\n') like '%*%'
                then null
            else cast(REGEXP_EXTRACT(ne.text, r'BP \(mm Hg\): [0-9]+/([0-9]+?)\n') as numeric)
            end as BPDias -- second part of fraction

        , case
            when REGEXP_EXTRACT(ne.text, r'HR \(bpm\): ([0-9]+?)\n') like '%*%'
                then null
            else cast(REGEXP_EXTRACT(ne.text, r'HR \(bpm\): ([0-9]+?)\n') as numeric)
            end as HR

        , REGEXP_EXTRACT(ne.text, r'Status: (.*?)\n') as Status
        , REGEXP_EXTRACT(ne.text, r'Test: (.*?)\n') as Test
        , REGEXP_EXTRACT(ne.text, r'Doppler: (.*?)\n') as Doppler
        , REGEXP_EXTRACT(ne.text, r'Contrast: (.*?)\n') as Contrast
        , REGEXP_EXTRACT(ne.text, r'Technical Quality: (.*?)\n') as TechnicalQuality
        from `physionet-data.mimiciii_notes.noteevents` ne
        where category = 'Echo'
    ), weightdurations as (
        -- This query extracts weights for adult ICU patients with start/stop times
        -- if an admission weight is given, then this is assigned from intime to outtime
        WITH wt_neonate AS
        ( 
            SELECT c.icustay_id, c.charttime
            , MAX(CASE WHEN c.itemid = 3580 THEN c.valuenum END) as wt_kg
            , MAX(CASE WHEN c.itemid = 3581 THEN c.valuenum END) as wt_lb
            , MAX(CASE WHEN c.itemid = 3582 THEN c.valuenum END) as wt_oz
            FROM `physionet-data.mimiciii_clinical.chartevents` c
            WHERE c.itemid in (3580, 3581, 3582)
            AND c.icustay_id IS NOT NULL
            AND c.error != 1
            -- wt_oz/wt_lb/wt_kg are only 0 erroneously, so drop these rows
            AND c.valuenum > 0
        -- a separate query was run to manually verify only 1 value exists per
        -- icustay_id/charttime/itemid grouping
        -- therefore, we can use max() across itemid to collapse these values to 1 row per group
            GROUP BY c.icustay_id, c.charttime
        )
        , birth_wt AS
        (
            SELECT c.icustay_id, c.charttime
            , MAX(
            CASE
            WHEN c.itemid = 4183 THEN
                -- clean free-text birth weight data
                CASE
                -- ignore value if there are any non-numeric characters
                WHEN REGEXP_CONTAINS(c.value,r'[^0-9\.]') THEN NULL 
                -- convert grams to kd
                WHEN CAST(c.value AS NUMERIC) > 100 THEN CAST(c.value AS NUMERIC)/1000
                -- keep kg as is, filtering bad values (largest baby ever born was conveniently 9.98kg)
                WHEN CAST(c.value AS NUMERIC) < 10 THEN CAST(c.value AS NUMERIC)
                -- ignore other values (those between 10-100) - junk data
                ELSE NULL END
            -- itemid 3723 happily has all numeric data - also doesn't store any grams data
            WHEN c.itemid = 3723 AND c.valuenum < 10 THEN c.valuenum
            ELSE NULL END) as wt_kg
            FROM `physionet-data.mimiciii_clinical.chartevents` c
            WHERE c.itemid in (3723, 4183)
            AND c.icustay_id IS NOT NULL
            AND c.error != 1
        -- a separate query was run to manually verify only 1 value exists per
        -- icustay_id/charttime/itemid grouping
        -- therefore, we can use max() across itemid to collapse these values to 1 row per group
            GROUP BY c.icustay_id, c.charttime
        )
        , wt_stg as
        (
            SELECT
                c.icustay_id
            , c.charttime
            , case when c.itemid in (762,226512) then 'admit'
                else 'daily' end as weight_type
            -- TODO: eliminate obvious outliers if there is a reasonable weight
            , c.valuenum as weight
            FROM `physionet-data.mimiciii_clinical.chartevents` c
            WHERE c.valuenum IS NOT NULL
            AND c.itemid in
            (
                762,226512 -- Admit Wt
                , 763,224639 -- Daily Weight
            )
            AND c.icustay_id IS NOT NULL
            AND c.valuenum > 0
            -- exclude rows marked as error
            AND c.error != 1
            UNION ALL
            SELECT
                n.icustay_id
            , n.charttime
            , 'daily' AS weight_type
            , CASE
                WHEN wt_kg IS NOT NULL THEN wt_kg
                WHEN wt_lb IS NOT NULL THEN wt_lb*0.45359237 + wt_oz*0.0283495231
                ELSE NULL END AS weight
            FROM wt_neonate n
            UNION ALL
            SELECT
                b.icustay_id
            , b.charttime
            -- birth weight of neonates is treated as admission weight
            , 'admit' AS weight_type
            , wt_kg as weight
            FROM birth_wt b
        )
        -- assign ascending row number
        , wt_stg1 as
        (
        select
            icustay_id
            , charttime
            , weight_type
            , weight
            , ROW_NUMBER() OVER (partition by icustay_id, weight_type order by charttime) as rn
        from wt_stg
        WHERE weight IS NOT NULL
        )
        -- change charttime to intime for the first admission weight recorded
        , wt_stg2 AS
        (
        SELECT 
            wt_stg1.icustay_id
            , ie.intime, ie.outtime
            , case when wt_stg1.weight_type = 'admit' and wt_stg1.rn = 1
                then DATETIME_SUB(ie.intime, interval '2' hour)
            else wt_stg1.charttime end as starttime
            , wt_stg1.weight
        from wt_stg1
        INNER JOIN `physionet-data.mimiciii_clinical.icustays` ie
            on ie.icustay_id = wt_stg1.icustay_id
        )
        , wt_stg3 as
        (
        select
            icustay_id
            , intime, outtime
            , starttime
            , coalesce(
                LEAD(starttime) OVER (PARTITION BY icustay_id ORDER BY starttime),
                DATETIME_ADD(outtime, interval '2' hour)
            ) as endtime
            , weight
        from wt_stg2
        )
        -- this table is the start/stop times from admit/daily weight in charted data
        , wt1 as
        (
        select
            icustay_id
            , starttime
            , coalesce(endtime,
            LEAD(starttime) OVER (partition by icustay_id order by starttime),
            -- impute ICU discharge as the end of the final weight measurement
            -- plus a 2 hour "fuzziness" window
            DATETIME_ADD(outtime, interval '2' hour))
            as endtime
            , weight
        from wt_stg3
        )
        -- if the intime for the patient is < the first charted daily weight
        -- then we will have a "gap" at the start of their stay
        -- to prevent this, we look for these gaps and backfill the first weight
        -- this adds (153255-149657)=3598 rows, meaning this fix helps for up to 3598 icustay_id
        , wt_fix as
        (
        select ie.icustay_id
            -- we add a 2 hour "fuzziness" window
            , DATETIME_SUB(ie.intime, interval '2' hour) as starttime
            , wt.starttime as endtime
            , wt.weight
        from `physionet-data.mimiciii_clinical.icustays` ie
        inner join
        -- the below subquery returns one row for each unique icustay_id
        -- the row contains: the first starttime and the corresponding weight
        (
            SELECT wt1.icustay_id, wt1.starttime, wt1.weight
            , ROW_NUMBER() OVER (PARTITION BY wt1.icustay_id ORDER BY wt1.starttime) as rn
            FROM wt1
        ) wt
            ON  ie.icustay_id = wt.icustay_id
            AND wt.rn = 1
            and ie.intime < wt.starttime
        )
        -- add the backfill rows to the main weight table
        , wt2 as
        (
        select
            wt1.icustay_id
            , wt1.starttime
            , wt1.endtime
            , wt1.weight
        from wt1
        UNION ALL
        SELECT
            wt_fix.icustay_id
            , wt_fix.starttime
            , wt_fix.endtime
            , wt_fix.weight
        from wt_fix
        )
        -- get more weights from echo - completes data for ~2500 patients
        -- we only use echo data if there is *no* charted data
        -- we impute the median echo weight for their entire ICU stay
        , echo_lag as
        (
        select
            ie.icustay_id
            , ie.intime, ie.outtime
            , 0.453592*ec.weight as weight_echo
            , ROW_NUMBER() OVER (PARTITION BY ie.icustay_id ORDER BY ec.charttime) as rn
            , ec.charttime as starttime
            , LEAD(ec.charttime) OVER (PARTITION BY ie.icustay_id ORDER BY ec.charttime) as endtime
        from `physionet-data.mimiciii_clinical.icustays` ie
        inner join echodata ec
            on ie.hadm_id = ec.hadm_id
        where ec.weight is not null
        )
        , echo_final as
        (
            select
            el.icustay_id
            , el.starttime
                -- we add a 2 hour "fuzziness" window
            , coalesce(el.endtime, DATETIME_ADD(el.outtime,interval '2' hour)) as endtime
            , weight_echo
            from echo_lag el
            UNION ALL
            -- if the starttime was later than ICU admission, back-propogate the weight
            select
            el.icustay_id
            , DATETIME_SUB(el.intime, interval '2' hour) as starttime
            , el.starttime as endtime
            , el.weight_echo
            from echo_lag el
            where el.rn = 1
            and el.starttime > DATETIME_SUB(el.intime, interval '2' hour)
        )
        select
        wt2.icustay_id, wt2.starttime, wt2.endtime, wt2.weight
        from wt2
        UNION ALL
        -- only add echos if we have no charted weight data
        select
        ef.icustay_id, ef.starttime, ef.endtime, ef.weight_echo as weight
        from echo_final ef
        where ef.icustay_id not in (select distinct icustay_id from wt2)
        order by icustay_id, starttime, endtime
    )
    SELECT * FROM weightdurations
); 

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.norepinephrine_dose` AS (
    WITH norepinephrine_dose as (
    -- Get drug administration data from CareVue first
        with vasocv1 as
        (
            select
                cv.icustay_id, cv.charttime
                -- case statement determining whether the ITEMID is an instance of vasopressor usage
                , max(case when itemid in (30047,30120) then 1 else 0 end) as vaso -- norepinephrine

                -- the 'stopped' column indicates if a vasopressor has been disconnected
                , max(case when itemid in (30047,30120)       and stopped in ('Stopped','D/C','d') then 1
                    else 0 end) as vaso_stopped

            -- case statement determining whether the ITEMID is an instance of vasopressor usage

                , max(case when itemid in (30047,30120) and rate is not null then 1 else 0 end) as vaso_null
                , max(case
                        when itemid = 30047 and wd.weight is null then rate / 80.0 -- this is rare, only affects a total of ~400 rows
                        when itemid = 30047 then rate / wd.weight -- measured in mcgmin
                        when itemid = 30120 then rate -- measured in mcgkgmin ** there are clear errors, perhaps actually mcgmin
                    else null end) as vaso_rate
                , max(case when itemid in (30047,30120) then amount else null end) as vaso_amount

            from `physionet-data.mimiciii_clinical.inputevents_cv` cv
            left join `amsterdam-translation.amsterdam_custom.weightdurations` wd
                on cv.icustay_id = wd.icustay_id
                and cv.charttime between wd.starttime and wd.endtime
            where itemid in (30047,30120) -- norepinephrine
            and cv.icustay_id is not null
            group by cv.icustay_id, cv.charttime
            )
            , vasocv2 as
            (
            select v.*
                , sum(vaso_null) over (partition by icustay_id order by charttime) as vaso_partition
            from
                vasocv1 v
            )
            , vasocv3 as
            (
            select v.*
                , first_value(vaso_rate) over (partition by icustay_id, vaso_partition order by charttime) as vaso_prevrate_ifnull
            from
                vasocv2 v
            )
            , vasocv4 as
            (
            select
                icustay_id
                , charttime
                -- , (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) AS delta

                , vaso
                , vaso_rate
                , vaso_amount
                , vaso_stopped
                , vaso_prevrate_ifnull

                -- We define start time here
                , case
                    when vaso = 0 then null

                    -- if this is the first instance of the vasoactive drug
                    when vaso_rate > 0 and
                    LAG(vaso_prevrate_ifnull,1)
                    OVER
                    (
                    partition by icustay_id, vaso, vaso_null
                    order by charttime
                    )
                    is null
                    then 1

                    -- you often get a string of 0s
                    -- we decide not to set these as 1, just because it makes vasonum sequential
                    when vaso_rate = 0 and
                    LAG(vaso_prevrate_ifnull,1)
                    OVER
                    (
                    partition by icustay_id, vaso
                    order by charttime
                    )
                    = 0
                    then 0

                    -- sometimes you get a string of NULL, associated with 0 volumes
                    -- same reason as before, we decide not to set these as 1
                    -- vaso_prevrate_ifnull is equal to the previous value *iff* the current value is null
                    when vaso_prevrate_ifnull = 0 and
                    LAG(vaso_prevrate_ifnull,1)
                    OVER
                    (
                    partition by icustay_id, vaso
                    order by charttime
                    )
                    = 0
                    then 0

                    -- If the last recorded rate was 0, newvaso = 1
                    when LAG(vaso_prevrate_ifnull,1)
                    OVER
                    (
                    partition by icustay_id, vaso
                    order by charttime
                    ) = 0
                    then 1

                    -- If the last recorded vaso was D/C'd, newvaso = 1
                    when
                    LAG(vaso_stopped,1)
                    OVER
                    (
                    partition by icustay_id, vaso
                    order by charttime
                    )
                    = 1 then 1

                    -- ** not sure if the below is needed
                    --when (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) > (interval '4 hours') then 1
                else null
                end as vaso_start

            FROM
            vasocv3
            )
            -- propagate start/stop flags forward in time
            , vasocv5 as
            (
            select v.*
                , SUM(vaso_start) OVER (partition by icustay_id, vaso order by charttime) as vaso_first
            FROM
            vasocv4 v
            )
            , vasocv6 as
            (
            select v.*
                -- We define end time here
                , case
                    when vaso = 0
                    then null

                    -- If the recorded vaso was D/C'd, this is an end time
                    when vaso_stopped = 1
                    then vaso_first

                    -- If the rate is zero, this is the end time
                    when vaso_rate = 0
                    then vaso_first

                    -- the last row in the table is always a potential end time
                    -- this captures patients who die/are discharged while on vasopressors
                    -- in principle, this could add an extra end time for the vasopressor
                    -- however, since we later group on vaso_start, any extra end times are ignored
                    when LEAD(CHARTTIME,1)
                    OVER
                    (
                    partition by icustay_id, vaso
                    order by charttime
                    ) is null
                    then vaso_first

                    else null
                    end as vaso_stop
                from vasocv5 v
            )

            -- -- if you want to look at the results of the table before grouping:
            -- select
            --   icustay_id, charttime, vaso, vaso_rate, vaso_amount
            --     , vaso_stopped
            --     , vaso_start
            --     , vaso_first
            --     , vaso_stop
            -- from vasocv6 order by icustay_id, charttime;

            , vasocv7 as
            (
            select
            icustay_id
            , charttime as starttime
            , lead(charttime) OVER (partition by icustay_id, vaso_first order by charttime) as endtime
            , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
            from vasocv6
            where
            vaso_first is not null -- bogus data
            and
            vaso_first != 0 -- sometimes *only* a rate of 0 appears, i.e. the drug is never actually delivered
            and
            icustay_id is not null -- there are data for "floating" admissions, we don't worry about these
            )
            -- table of start/stop times for event
            , vasocv8 as
            (
            select
                icustay_id
                , starttime, endtime
                , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
            from vasocv7
            where endtime is not null
            and vaso_rate > 0
            and starttime != endtime
            )
            -- collapse these start/stop times down if the rate doesn't change
            , vasocv9 as
            (
            select
                icustay_id
                , starttime, endtime
                , case
                    when LAG(endtime) OVER (partition by icustay_id order by starttime, endtime) = starttime
                    AND  LAG(vaso_rate) OVER (partition by icustay_id order by starttime, endtime) = vaso_rate
                    THEN 0
                else 1
                end as vaso_groups
                , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
            from vasocv8
            where endtime is not null
            and vaso_rate > 0
            and starttime != endtime
            )
            , vasocv10 as
            (
            select
                icustay_id
                , starttime, endtime
                , vaso_groups
                , SUM(vaso_groups) OVER (partition by icustay_id order by starttime, endtime) as vaso_groups_sum
                , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
            from vasocv9
            )
            , vasocv as
            (
            select icustay_id
            , min(starttime) as starttime
            , max(endtime) as endtime
            , vaso_groups_sum
            , vaso_rate
            , sum(vaso_amount) as vaso_amount
            from vasocv10
            group by icustay_id, vaso_groups_sum, vaso_rate
            )
            -- now we extract the associated data for metavision patients
            , vasomv as
            (
            select
                icustay_id, linkorderid
                , rate as vaso_rate
                , amount as vaso_amount
                , starttime
                , endtime
            from `physionet-data.mimiciii_clinical.inputevents_mv`
            where itemid = 221906 -- norepinephrine
            and statusdescription != 'Rewritten' -- only valid orders
            )
            -- now assign this data to every hour of the patient's stay
            -- vaso_amount for carevue is not accurate
        SELECT icustay_id
        , starttime, endtime
        , vaso_rate, vaso_amount
        from vasocv
        UNION ALL
        SELECT icustay_id
        , starttime, endtime
        , vaso_rate, vaso_amount
        from vasomv
        order by icustay_id, starttime
    )
    SELECT * FROM norepinephrine_dose
); 

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.epinephrine_dose` AS (
    WITH epinephrine_dose as (
        -- Get drug administration data from CareVue first
        with vasocv1 as
        (
        select
            cv.icustay_id, cv.charttime
            -- case statement determining whether the ITEMID is an instance of vasopressor usage
            , max(case when itemid in (30044,30119,30309) then 1 else 0 end) as vaso -- epinephrine

            -- the 'stopped' column indicates if a vasopressor has been disconnected
            , max(case when itemid in (30044,30119,30309) and stopped in ('Stopped','D/C','d') then 1
                else 0 end) as vaso_stopped

            , max(case when itemid in (30044,30119,30309) and rate is not null then 1 else 0 end) as vaso_null
            , max(case
                    when itemid = 30044 and wd.weight is null then rate / 80.0 -- super rare to be missing weight... affects 2 patients for 14 rows
                    when itemid = 30044 then rate / wd.weight -- measured in mcgmin
                    when itemid in (30119,30309) then rate -- measured in mcgkgmin
                    else null
                end) as vaso_rate
            , max(case when itemid in (30044,30119,30309) then amount else null end) as vaso_amount

        from `physionet-data.mimiciii_clinical.inputevents_cv` cv
        left join `amsterdam-translation.amsterdam_custom.weightdurations` wd
            on cv.icustay_id = wd.icustay_id
            and cv.charttime between wd.starttime and wd.endtime
        where itemid in
        (
                30044,30119,30309 -- epinephrine
        )
        and cv.icustay_id is not null
        group by cv.icustay_id, charttime
        )
        , vasocv2 as
        (
        select v.*
            , sum(vaso_null) over (partition by icustay_id order by charttime) as vaso_partition
        from
            vasocv1 v
        )
        , vasocv3 as
        (
        select v.*
            , first_value(vaso_rate) over (partition by icustay_id, vaso_partition order by charttime) as vaso_prevrate_ifnull
        from
            vasocv2 v
        )
        , vasocv4 as
        (
        select
            icustay_id
            , charttime
            -- , (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) AS delta

            , vaso
            , vaso_rate
            , vaso_amount
            , vaso_stopped
            , vaso_prevrate_ifnull

            -- We define start time here
            , case
                when vaso = 0 then null

                -- if this is the first instance of the vasoactive drug
                when vaso_rate > 0 and
                LAG(vaso_prevrate_ifnull,1)
                OVER
                (
                partition by icustay_id, vaso, vaso_null
                order by charttime
                )
                is null
                then 1

                -- you often get a string of 0s
                -- we decide not to set these as 1, just because it makes vasonum sequential
                when vaso_rate = 0 and
                LAG(vaso_prevrate_ifnull,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                )
                = 0
                then 0

                -- sometimes you get a string of NULL, associated with 0 volumes
                -- same reason as before, we decide not to set these as 1
                -- vaso_prevrate_ifnull is equal to the previous value *iff* the current value is null
                when vaso_prevrate_ifnull = 0 and
                LAG(vaso_prevrate_ifnull,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                )
                = 0
                then 0

                -- If the last recorded rate was 0, newvaso = 1
                when LAG(vaso_prevrate_ifnull,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                ) = 0
                then 1

                -- If the last recorded vaso was D/C'd, newvaso = 1
                when
                LAG(vaso_stopped,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                )
                = 1 then 1

                -- ** not sure if the below is needed
                --when (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) > (interval '4 hours') then 1
            else null
            end as vaso_start

        FROM
        vasocv3
        )
        -- propagate start/stop flags forward in time
        , vasocv5 as
        (
        select v.*
            , SUM(vaso_start) OVER (partition by icustay_id, vaso order by charttime) as vaso_first
        FROM
        vasocv4 v
        )
        , vasocv6 as
        (
        select v.*
            -- We define end time here
            , case
                when vaso = 0
                then null

                -- If the recorded vaso was D/C'd, this is an end time
                when vaso_stopped = 1
                then vaso_first

                -- If the rate is zero, this is the end time
                when vaso_rate = 0
                then vaso_first

                -- the last row in the table is always a potential end time
                -- this captures patients who die/are discharged while on vasopressors
                -- in principle, this could add an extra end time for the vasopressor
                -- however, since we later group on vaso_start, any extra end times are ignored
                when LEAD(CHARTTIME,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                ) is null
                then vaso_first

                else null
                end as vaso_stop
            from vasocv5 v
        )

        -- -- if you want to look at the results of the table before grouping:
        -- select
        --   icustay_id, charttime, vaso, vaso_rate, vaso_amount
        --     , vaso_stopped
        --     , vaso_start
        --     , vaso_first
        --     , vaso_stop
        -- from vasocv6 order by icustay_id, charttime;

        , vasocv7 as
        (
        select
        icustay_id
        , charttime as starttime
        , lead(charttime) OVER (partition by icustay_id, vaso_first order by charttime) as endtime
        , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
        from vasocv6
        where
        vaso_first is not null -- bogus data
        and
        vaso_first != 0 -- sometimes *only* a rate of 0 appears, i.e. the drug is never actually delivered
        and
        icustay_id is not null -- there are data for "floating" admissions, we don't worry about these
        )
        -- table of start/stop times for event
        , vasocv8 as
        (
        select
            icustay_id
            , starttime, endtime
            , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
        from vasocv7
        where endtime is not null
        and vaso_rate > 0
        and starttime != endtime
        )
        -- collapse these start/stop times down if the rate doesn't change
        , vasocv9 as
        (
        select
            icustay_id
            , starttime, endtime
            , case
                when LAG(endtime) OVER (partition by icustay_id order by starttime, endtime) = starttime
                AND  LAG(vaso_rate) OVER (partition by icustay_id order by starttime, endtime) = vaso_rate
                THEN 0
            else 1
            end as vaso_groups
            , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
        from vasocv8
        where endtime is not null
        and vaso_rate > 0
        and starttime != endtime
        )
        , vasocv10 as
        (
        select
            icustay_id
            , starttime, endtime
            , vaso_groups
            , SUM(vaso_groups) OVER (partition by icustay_id order by starttime, endtime) as vaso_groups_sum
            , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
        from vasocv9
        )
        , vasocv as
        (
        select icustay_id
        , min(starttime) as starttime
        , max(endtime) as endtime
        , vaso_groups_sum
        , vaso_rate
        , sum(vaso_amount) as vaso_amount
        from vasocv10
        group by icustay_id, vaso_groups_sum, vaso_rate
        )
        -- now we extract the associated data for metavision patients
        , vasomv as
        (
        select
            icustay_id, linkorderid
            , rate as vaso_rate
            , amount as vaso_amount
            , starttime
            , endtime
        from `physionet-data.mimiciii_clinical.inputevents_mv`
        where itemid = 221289 -- epinephrine
        and statusdescription != 'Rewritten' -- only valid orders
        )
        -- now assign this data to every hour of the patient's stay
        -- vaso_amount for carevue is not accurate
        SELECT icustay_id
        , starttime, endtime
        , vaso_rate, vaso_amount
        from vasocv
        UNION ALL
        SELECT icustay_id
        , starttime, endtime
        , vaso_rate, vaso_amount
        from vasomv
        order by icustay_id, starttime
    )
    SELECT * FROM epinephrine_dose
);

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.dopamine_dose` AS (
    WITH dopamine_dose as (
        -- Get drug administration data from CareVue first
        with vasocv1 as
        (
        select
            icustay_id, charttime
            -- case statement determining whether the ITEMID is an instance of vasopressor usage
            , max(case when itemid in (30043,30307) then 1 else 0 end) as vaso -- dopamine
            -- the 'stopped' column indicates if a vasopressor has been disconnected
            , max(case when itemid in (30043,30307)       and stopped in ('Stopped','D/C','d') then 1
                else 0 end) as vaso_stopped
            , max(case when itemid in (30043,30307) and rate is not null then 1 else 0 end) as vaso_null
            , max(case when itemid in (30043,30307) then rate else null end) as vaso_rate
            , max(case when itemid in (30043,30307) then amount else null end) as vaso_amount

        from `physionet-data.mimiciii_clinical.inputevents_cv`
        where itemid in
        (
                30043,30307 -- dopamine
        )
        group by icustay_id, charttime
        )
        , vasocv2 as
        (
        select v.*
            , sum(vaso_null) over (partition by icustay_id order by charttime) as vaso_partition
        from
            vasocv1 v
        )
        , vasocv3 as
        (
        select v.*
            , first_value(vaso_rate) over (partition by icustay_id, vaso_partition order by charttime) as vaso_prevrate_ifnull
        from
            vasocv2 v
        )
        , vasocv4 as
        (
        select
            icustay_id
            , charttime
            -- , (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) AS delta

            , vaso
            , vaso_rate
            , vaso_amount
            , vaso_stopped
            , vaso_prevrate_ifnull

            -- We define start time here
            , case
                when vaso = 0 then null

                -- if this is the first instance of the vasoactive drug
                when vaso_rate > 0 and
                LAG(vaso_prevrate_ifnull,1)
                OVER
                (
                partition by icustay_id, vaso, vaso_null
                order by charttime
                )
                is null
                then 1

                -- you often get a string of 0s
                -- we decide not to set these as 1, just because it makes vasonum sequential
                when vaso_rate = 0 and
                LAG(vaso_prevrate_ifnull,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                )
                = 0
                then 0

                -- sometimes you get a string of NULL, associated with 0 volumes
                -- same reason as before, we decide not to set these as 1
                -- vaso_prevrate_ifnull is equal to the previous value *iff* the current value is null
                when vaso_prevrate_ifnull = 0 and
                LAG(vaso_prevrate_ifnull,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                )
                = 0
                then 0

                -- If the last recorded rate was 0, newvaso = 1
                when LAG(vaso_prevrate_ifnull,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                ) = 0
                then 1

                -- If the last recorded vaso was D/C'd, newvaso = 1
                when
                LAG(vaso_stopped,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                )
                = 1 then 1

                -- ** not sure if the below is needed
                --when (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) > (interval '4 hours') then 1
            else null
            end as vaso_start

        FROM
        vasocv3
        )
        -- propagate start/stop flags forward in time
        , vasocv5 as
        (
        select v.*
            , SUM(vaso_start) OVER (partition by icustay_id, vaso order by charttime) as vaso_first
        FROM
        vasocv4 v
        )
        , vasocv6 as
        (
        select v.*
            -- We define end time here
            , case
                when vaso = 0
                then null

                -- If the recorded vaso was D/C'd, this is an end time
                when vaso_stopped = 1
                then vaso_first

                -- If the rate is zero, this is the end time
                when vaso_rate = 0
                then vaso_first

                -- the last row in the table is always a potential end time
                -- this captures patients who die/are discharged while on vasopressors
                -- in principle, this could add an extra end time for the vasopressor
                -- however, since we later group on vaso_start, any extra end times are ignored
                when LEAD(CHARTTIME,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                ) is null
                then vaso_first

                else null
                end as vaso_stop
            from vasocv5 v
        )

        -- -- if you want to look at the results of the table before grouping:
        -- select
        --   icustay_id, charttime, vaso, vaso_rate, vaso_amount
        --     , vaso_stopped
        --     , vaso_start
        --     , vaso_first
        --     , vaso_stop
        -- from vasocv6 order by icustay_id, charttime;

        , vasocv7 as
        (
        select
        icustay_id
        , charttime as starttime
        , lead(charttime) OVER (partition by icustay_id, vaso_first order by charttime) as endtime
        , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
        from vasocv6
        where
        vaso_first is not null -- bogus data
        and
        vaso_first != 0 -- sometimes *only* a rate of 0 appears, i.e. the drug is never actually delivered
        and
        icustay_id is not null -- there are data for "floating" admissions, we don't worry about these
        )
        -- table of start/stop times for event
        , vasocv8 as
        (
        select
            icustay_id
            , starttime, endtime
            , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
        from vasocv7
        where endtime is not null
        and vaso_rate > 0
        and starttime != endtime
        )
        -- collapse these start/stop times down if the rate doesn't change
        , vasocv9 as
        (
        select
            icustay_id
            , starttime, endtime
            , case
                when LAG(endtime) OVER (partition by icustay_id order by starttime, endtime) = starttime
                AND  LAG(vaso_rate) OVER (partition by icustay_id order by starttime, endtime) = vaso_rate
                THEN 0
            else 1
            end as vaso_groups
            , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
        from vasocv8
        where endtime is not null
        and vaso_rate > 0
        and starttime != endtime
        )
        , vasocv10 as
        (
        select
            icustay_id
            , starttime, endtime
            , vaso_groups
            , SUM(vaso_groups) OVER (partition by icustay_id order by starttime, endtime) as vaso_groups_sum
            , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
        from vasocv9
        )
        , vasocv as
        (
        select icustay_id
        , min(starttime) as starttime
        , max(endtime) as endtime
        , vaso_groups_sum
        , vaso_rate
        , sum(vaso_amount) as vaso_amount
        from vasocv10
        group by icustay_id, vaso_groups_sum, vaso_rate
        )
        -- now we extract the associated data for metavision patients
        , vasomv as
        (
        select
            icustay_id, linkorderid
            , rate as vaso_rate
            , amount as vaso_amount
            , starttime
            , endtime
        from `physionet-data.mimiciii_clinical.inputevents_mv`
        where itemid = 221662 -- dopamine
        and statusdescription != 'Rewritten' -- only valid orders
        )
        -- now assign this data to every hour of the patient's stay
        -- vaso_amount for carevue is not accurate
        SELECT icustay_id
        , starttime, endtime
        , vaso_rate, vaso_amount
        from vasocv
        UNION ALL
        SELECT icustay_id
        , starttime, endtime
        , vaso_rate, vaso_amount
        from vasomv
        order by icustay_id, starttime
    )
    SELECT * FROM dopamine_dose
); 

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.dobutamine_dose` AS (
    WITH dobutamine_dose as (
        -- Get drug administration data from CareVue first
        with vasocv1 as
        (
            select
            icustay_id, charttime
            -- case statement determining whether the ITEMID is an instance of vasopressor usage
            , max(case when itemid in (30042,30306) then 1 else 0 end) as vaso -- dobutamine

            -- the 'stopped' column indicates if a vasopressor has been disconnected
            , max(case when itemid in (30042,30306)       and stopped in ('Stopped','D/C','d') then 1
                else 0 end) as vaso_stopped

            , max(case when itemid in (30042,30306) and rate is not null then 1 else 0 end) as vaso_null
            , max(case when itemid in (30042,30306) then rate else null end) as vaso_rate
            , max(case when itemid in (30042,30306) then amount else null end) as vaso_amount

        from `physionet-data.mimiciii_clinical.inputevents_cv`
        where itemid in (30042,30306) -- dobutamine
        group by icustay_id, charttime
        )
        , vasocv2 as
        (
        select v.*
            , sum(vaso_null) over (partition by icustay_id order by charttime) as vaso_partition
        from
            vasocv1 v
        )
        , vasocv3 as
        (
        select v.*
            , first_value(vaso_rate) over (partition by icustay_id, vaso_partition order by charttime) as vaso_prevrate_ifnull
        from
            vasocv2 v
        )
        , vasocv4 as
        (
        select
            icustay_id
            , charttime
            -- , (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) AS delta

            , vaso
            , vaso_rate
            , vaso_amount
            , vaso_stopped
            , vaso_prevrate_ifnull

            -- We define start time here
            , case
                when vaso = 0 then null

                -- if this is the first instance of the vasoactive drug
                when vaso_rate > 0 and
                LAG(vaso_prevrate_ifnull,1)
                OVER
                (
                partition by icustay_id, vaso, vaso_null
                order by charttime
                )
                is null
                then 1

                -- you often get a string of 0s
                -- we decide not to set these as 1, just because it makes vasonum sequential
                when vaso_rate = 0 and
                LAG(vaso_prevrate_ifnull,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                )
                = 0
                then 0

                -- sometimes you get a string of NULL, associated with 0 volumes
                -- same reason as before, we decide not to set these as 1
                -- vaso_prevrate_ifnull is equal to the previous value *iff* the current value is null
                when vaso_prevrate_ifnull = 0 and
                LAG(vaso_prevrate_ifnull,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                )
                = 0
                then 0

                -- If the last recorded rate was 0, newvaso = 1
                when LAG(vaso_prevrate_ifnull,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                ) = 0
                then 1

                -- If the last recorded vaso was D/C'd, newvaso = 1
                when
                LAG(vaso_stopped,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                )
                = 1 then 1

                -- ** not sure if the below is needed
                --when (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) > (interval '4 hours') then 1
            else null
            end as vaso_start

        FROM
        vasocv3
        )
        -- propagate start/stop flags forward in time
        , vasocv5 as
        (
        select v.*
            , SUM(vaso_start) OVER (partition by icustay_id, vaso order by charttime) as vaso_first
        FROM
        vasocv4 v
        )
        , vasocv6 as
        (
        select v.*
            -- We define end time here
            , case
                when vaso = 0
                then null

                -- If the recorded vaso was D/C'd, this is an end time
                when vaso_stopped = 1
                then vaso_first

                -- If the rate is zero, this is the end time
                when vaso_rate = 0
                then vaso_first

                -- the last row in the table is always a potential end time
                -- this captures patients who die/are discharged while on vasopressors
                -- in principle, this could add an extra end time for the vasopressor
                -- however, since we later group on vaso_start, any extra end times are ignored
                when LEAD(CHARTTIME,1)
                OVER
                (
                partition by icustay_id, vaso
                order by charttime
                ) is null
                then vaso_first

                else null
                end as vaso_stop
            from vasocv5 v
        )

        -- -- if you want to look at the results of the table before grouping:
        -- select
        --   icustay_id, charttime, vaso, vaso_rate, vaso_amount
        --     , vaso_stopped
        --     , vaso_start
        --     , vaso_first
        --     , vaso_stop
        -- from vasocv6 order by icustay_id, charttime;

        , vasocv7 as
        (
        select
        icustay_id
        , charttime as starttime
        , lead(charttime) OVER (partition by icustay_id, vaso_first order by charttime) as endtime
        , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
        from vasocv6
        where
        vaso_first is not null -- bogus data
        and
        vaso_first != 0 -- sometimes *only* a rate of 0 appears, i.e. the drug is never actually delivered
        and
        icustay_id is not null -- there are data for "floating" admissions, we don't worry about these
        )
        -- table of start/stop times for event
        , vasocv8 as
        (
        select
            icustay_id
            , starttime, endtime
            , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
        from vasocv7
        where endtime is not null
        and vaso_rate > 0
        and starttime != endtime
        )
        -- collapse these start/stop times down if the rate doesn't change
        , vasocv9 as
        (
        select
            icustay_id
            , starttime, endtime
            , case
                when LAG(endtime) OVER (partition by icustay_id order by starttime, endtime) = starttime
                AND  LAG(vaso_rate) OVER (partition by icustay_id order by starttime, endtime) = vaso_rate
                THEN 0
            else 1
            end as vaso_groups
            , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
        from vasocv8
        where endtime is not null
        and vaso_rate > 0
        and starttime != endtime
        )
        , vasocv10 as
        (
        select
            icustay_id
            , starttime, endtime
            , vaso_groups
            , SUM(vaso_groups) OVER (partition by icustay_id order by starttime, endtime) as vaso_groups_sum
            , vaso, vaso_rate, vaso_amount, vaso_stop, vaso_start, vaso_first
        from vasocv9
        )
        , vasocv as
        (
        select icustay_id
        , min(starttime) as starttime
        , max(endtime) as endtime
        , vaso_groups_sum
        , vaso_rate
        , sum(vaso_amount) as vaso_amount
        from vasocv10
        group by icustay_id, vaso_groups_sum, vaso_rate
        )
        -- now we extract the associated data for metavision patients
        , vasomv as
        (
        select
            icustay_id, linkorderid
            , rate as vaso_rate
            , amount as vaso_amount
            , starttime
            , endtime
        from `physionet-data.mimiciii_clinical.inputevents_mv`
        where itemid = 221653 -- dobutamine
        and statusdescription != 'Rewritten' -- only valid orders
        )
        -- now assign this data to every hour of the patient's stay
        -- vaso_amount for carevue is not accurate
        SELECT icustay_id
        , starttime, endtime
        , vaso_rate, vaso_amount
        from vasocv
        UNION ALL
        SELECT icustay_id
        , starttime, endtime
        , vaso_rate, vaso_amount
        from vasomv
        order by icustay_id, starttime
    )
    SELECT * FROM dobutamine_dose
); 

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.pivoted_lab` AS (
    WITH pivoted_lab as (
        -- create a table which has fuzzy boundaries on ICU admission (+- 12 hours from documented time)
        -- this is used to assign icustay_id to lab data, which can be collected outside ICU
        -- involves first creating a lag/lead version of intime/outtime
        with i as
        (
        select
            subject_id, icustay_id, intime, outtime
            , lag (outtime) over (partition by subject_id order by intime) as outtime_lag
            , lead (intime) over (partition by subject_id order by intime) as intime_lead
        from `physionet-data.mimiciii_clinical.icustays`
        )
        , iid_assign as
        (
        select
            i.subject_id, i.icustay_id
            -- this rule is:
            --  if there are two hospitalizations within 24 hours, set the start/stop
            --  time as half way between the two admissions
            , case
                when i.outtime_lag is not null
                and i.outtime_lag > DATETIME_SUB(i.intime, INTERVAL 24 HOUR)
                then DATETIME_SUB(i.intime, INTERVAL CAST(DATETIME_DIFF(i.intime,i.outtime_lag,HOUR)/2 AS INT64) HOUR)
            else DATETIME_SUB(i.intime,interval 12 hour)
            end as data_start
            , case
                when i.intime_lead is not null
                and i.intime_lead < DATETIME_ADD(i.outtime, INTERVAL 24 HOUR)
                then DATETIME_ADD(i.outtime, INTERVAL CAST(DATETIME_DIFF(i.intime_lead,i.outtime,HOUR)/2 AS INT64) hour)
            else DATETIME_ADD(i.outtime,interval 12 HOUR)
            end as data_end
            from i
        )
        -- also create fuzzy boundaries on hospitalization
        , h as
        (
        select
            subject_id, hadm_id, admittime, dischtime
            , lag (dischtime) over (partition by subject_id order by admittime) as dischtime_lag
            , lead (admittime) over (partition by subject_id order by admittime) as admittime_lead
        from `physionet-data.mimiciii_clinical.admissions`
        )
        , adm as
        (
        select
            h.subject_id, h.hadm_id
            -- this rule is:
            --  if there are two hospitalizations within 24 hours, set the start/stop
            --  time as half way between the two admissions
            , case
                when h.dischtime_lag is not null
                and h.dischtime_lag > DATETIME_SUB(h.admittime, interval '24' hour)
                then DATETIME_SUB(h.admittime, interval CAST(DATETIME_DIFF(h.admittime,h.dischtime_lag, HOUR)/2 AS INT64) HOUR)
            else DATETIME_SUB(h.admittime, interval '12' hour)
            end as data_start
            , case
                when h.admittime_lead is not null
                and h.admittime_lead < DATETIME_ADD(h.dischtime,interval '24' hour)
                then DATETIME_ADD(h.dischtime, INTERVAL CAST(DATETIME_DIFF(h.admittime_lead,h.dischtime,HOUR)/2 AS INT64) HOUR)
            else DATETIME_ADD(h.dischtime, interval '12' hour)
            end as data_end
            from h
        )
        , le as
        (
        -- begin query that extracts the data
        SELECT subject_id, charttime
        -- here we assign labels to ITEMIDs
        -- this also fuses together multiple ITEMIDs containing the same data
        , CASE
                WHEN itemid = 50868 THEN 'ANION GAP'
                WHEN itemid = 50862 THEN 'ALBUMIN'
                WHEN itemid = 51144 THEN 'BANDS'
                WHEN itemid = 50882 THEN 'BICARBONATE'
                WHEN itemid = 50885 THEN 'BILIRUBIN'
                WHEN itemid = 50912 THEN 'CREATININE'
                -- exclude blood gas
                -- WHEN itemid = 50806 THEN 'CHLORIDE'
                WHEN itemid = 50902 THEN 'CHLORIDE'
                -- exclude blood gas
                -- WHEN itemid = 50809 THEN 'GLUCOSE'
                WHEN itemid = 50931 THEN 'GLUCOSE'
                -- exclude blood gas
                --WHEN itemid = 50810 THEN 'HEMATOCRIT'
                WHEN itemid = 51221 THEN 'HEMATOCRIT'
                -- exclude blood gas
                --WHEN itemid = 50811 THEN 'HEMOGLOBIN'
                WHEN itemid = 51222 THEN 'HEMOGLOBIN'
                WHEN itemid = 50813 THEN 'LACTATE'
                WHEN itemid = 51265 THEN 'PLATELET'
                -- exclude blood gas
                -- WHEN itemid = 50822 THEN 'POTASSIUM'
                WHEN itemid = 50971 THEN 'POTASSIUM'
                WHEN itemid = 51275 THEN 'PTT'
                WHEN itemid = 51237 THEN 'INR'
                WHEN itemid = 51274 THEN 'PT'
                -- exclude blood gas
                -- WHEN itemid = 50824 THEN 'SODIUM'
                WHEN itemid = 50983 THEN 'SODIUM'
                WHEN itemid = 51006 THEN 'BUN'
                WHEN itemid = 51300 THEN 'WBC'
                WHEN itemid = 51301 THEN 'WBC'
            ELSE null
            END AS label
        , -- add in some sanity checks on the values
        -- the where clause below requires all valuenum to be > 0, so these are only upper limit checks
            CASE
            WHEN itemid = 50862 and valuenum >    10 THEN null -- g/dL 'ALBUMIN'
            WHEN itemid = 50868 and valuenum > 10000 THEN null -- mEq/L 'ANION GAP'
            WHEN itemid = 51144 and valuenum <     0 THEN null -- immature band forms, %
            WHEN itemid = 51144 and valuenum >   100 THEN null -- immature band forms, %
            WHEN itemid = 50882 and valuenum > 10000 THEN null -- mEq/L 'BICARBONATE'
            WHEN itemid = 50885 and valuenum >   150 THEN null -- mg/dL 'BILIRUBIN'
            WHEN itemid = 50806 and valuenum > 10000 THEN null -- mEq/L 'CHLORIDE'
            WHEN itemid = 50902 and valuenum > 10000 THEN null -- mEq/L 'CHLORIDE'
            WHEN itemid = 50912 and valuenum >   150 THEN null -- mg/dL 'CREATININE'
            WHEN itemid = 50809 and valuenum > 10000 THEN null -- mg/dL 'GLUCOSE'
            WHEN itemid = 50931 and valuenum > 10000 THEN null -- mg/dL 'GLUCOSE'
            WHEN itemid = 50810 and valuenum >   100 THEN null -- % 'HEMATOCRIT'
            WHEN itemid = 51221 and valuenum >   100 THEN null -- % 'HEMATOCRIT'
            WHEN itemid = 50811 and valuenum >    50 THEN null -- g/dL 'HEMOGLOBIN'
            WHEN itemid = 51222 and valuenum >    50 THEN null -- g/dL 'HEMOGLOBIN'
            WHEN itemid = 50813 and valuenum >    50 THEN null -- mmol/L 'LACTATE'
            WHEN itemid = 51265 and valuenum > 10000 THEN null -- K/uL 'PLATELET'
            WHEN itemid = 50822 and valuenum >    30 THEN null -- mEq/L 'POTASSIUM'
            WHEN itemid = 50971 and valuenum >    30 THEN null -- mEq/L 'POTASSIUM'
            WHEN itemid = 51275 and valuenum >   150 THEN null -- sec 'PTT'
            WHEN itemid = 51237 and valuenum >    50 THEN null -- 'INR'
            WHEN itemid = 51274 and valuenum >   150 THEN null -- sec 'PT'
            WHEN itemid = 50824 and valuenum >   200 THEN null -- mEq/L == mmol/L 'SODIUM'
            WHEN itemid = 50983 and valuenum >   200 THEN null -- mEq/L == mmol/L 'SODIUM'
            WHEN itemid = 51006 and valuenum >   300 THEN null -- 'BUN'
            WHEN itemid = 51300 and valuenum >  1000 THEN null -- 'WBC'
            WHEN itemid = 51301 and valuenum >  1000 THEN null -- 'WBC'
            ELSE valuenum
            END AS valuenum
        FROM `physionet-data.mimiciii_clinical.labevents`
        WHERE ITEMID in
        (
            -- comment is: LABEL | CATEGORY | FLUID | NUMBER OF ROWS IN LABEVENTS
            50868, -- ANION GAP | CHEMISTRY | BLOOD | 769895
            50862, -- ALBUMIN | CHEMISTRY | BLOOD | 146697
            51144, -- BANDS - hematology
            50882, -- BICARBONATE | CHEMISTRY | BLOOD | 780733
            50885, -- BILIRUBIN, TOTAL | CHEMISTRY | BLOOD | 238277
            50912, -- CREATININE | CHEMISTRY | BLOOD | 797476
            50902, -- CHLORIDE | CHEMISTRY | BLOOD | 795568
            -- 50806, -- CHLORIDE, WHOLE BLOOD | BLOOD GAS | BLOOD | 48187
            50931, -- GLUCOSE | CHEMISTRY | BLOOD | 748981
            -- 50809, -- GLUCOSE | BLOOD GAS | BLOOD | 196734
            51221, -- HEMATOCRIT | HEMATOLOGY | BLOOD | 881846
            -- 50810, -- HEMATOCRIT, CALCULATED | BLOOD GAS | BLOOD | 89715
            51222, -- HEMOGLOBIN | HEMATOLOGY | BLOOD | 752523
            -- 50811, -- HEMOGLOBIN | BLOOD GAS | BLOOD | 89712
            50813, -- LACTATE | BLOOD GAS | BLOOD | 187124
            51265, -- PLATELET COUNT | HEMATOLOGY | BLOOD | 778444
            50971, -- POTASSIUM | CHEMISTRY | BLOOD | 845825
            -- 50822, -- POTASSIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 192946
            51275, -- PTT | HEMATOLOGY | BLOOD | 474937
            51237, -- INR(PT) | HEMATOLOGY | BLOOD | 471183
            51274, -- PT | HEMATOLOGY | BLOOD | 469090
            50983, -- SODIUM | CHEMISTRY | BLOOD | 808489
            -- 50824, -- SODIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 71503
            51006, -- UREA NITROGEN | CHEMISTRY | BLOOD | 791925
            51301, -- WHITE BLOOD CELLS | HEMATOLOGY | BLOOD | 753301
            51300  -- WBC COUNT | HEMATOLOGY | BLOOD | 2371
        )
        AND valuenum IS NOT NULL AND valuenum > 0 -- lab values cannot be 0 and cannot be negative
        )
        , le_avg as
        (
        SELECT
            le.subject_id, le.charttime
            , avg(CASE WHEN label = 'ANION GAP' THEN valuenum ELSE null END) as ANIONGAP
            , avg(CASE WHEN label = 'ALBUMIN' THEN valuenum ELSE null END) as ALBUMIN
            , avg(CASE WHEN label = 'BANDS' THEN valuenum ELSE null END) as BANDS
            , avg(CASE WHEN label = 'BICARBONATE' THEN valuenum ELSE null END) as BICARBONATE
            , avg(CASE WHEN label = 'BILIRUBIN' THEN valuenum ELSE null END) as BILIRUBIN
            , avg(CASE WHEN label = 'CREATININE' THEN valuenum ELSE null END) as CREATININE
            , avg(CASE WHEN label = 'CHLORIDE' THEN valuenum ELSE null END) as CHLORIDE
            , avg(CASE WHEN label = 'GLUCOSE' THEN valuenum ELSE null END) as GLUCOSE
            , avg(CASE WHEN label = 'HEMATOCRIT' THEN valuenum ELSE null END) as HEMATOCRIT
            , avg(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum ELSE null END) as HEMOGLOBIN
            , avg(CASE WHEN label = 'LACTATE' THEN valuenum ELSE null END) as LACTATE
            , avg(CASE WHEN label = 'PLATELET' THEN valuenum ELSE null END) as PLATELET
            , avg(CASE WHEN label = 'POTASSIUM' THEN valuenum ELSE null END) as POTASSIUM
            , avg(CASE WHEN label = 'PTT' THEN valuenum ELSE null END) as PTT
            , avg(CASE WHEN label = 'INR' THEN valuenum ELSE null END) as INR
            , avg(CASE WHEN label = 'PT' THEN valuenum ELSE null END) as PT
            , avg(CASE WHEN label = 'SODIUM' THEN valuenum ELSE null end) as SODIUM
            , avg(CASE WHEN label = 'BUN' THEN valuenum ELSE null end) as BUN
            , avg(CASE WHEN label = 'WBC' THEN valuenum ELSE null end) as WBC
        FROM le
        GROUP BY le.subject_id, le.charttime
        )
        select
        iid.icustay_id, adm.hadm_id, le_avg.*
        from le_avg
        left join adm
        on le_avg.subject_id  = adm.subject_id
        and le_avg.charttime >= adm.data_start
        and le_avg.charttime  < adm.data_end
        left join iid_assign iid
        on  le_avg.subject_id = iid.subject_id
        and le_avg.charttime >= iid.data_start
        and le_avg.charttime  < iid.data_end
        order by le_avg.subject_id, le_avg.charttime 
    )
    SELECT * FROM pivoted_lab
); 

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.pivoted_gcs` AS (
    WITH pivoted_gcs as (
        with base as
        (
        select ce.icustay_id, ce.charttime
        -- pivot each value into its own column
        , max(case when ce.ITEMID in (454,223901) then ce.valuenum else null end) as GCSMotor
        , max(case
            when ce.ITEMID = 723 and ce.VALUE = '1.0 ET/Trach' then 0
            when ce.ITEMID = 223900 and ce.VALUE = 'No Response-ETT' then 0
            when ce.ITEMID in (723,223900) then ce.valuenum
            else null 
            end) as GCSVerbal
        , max(case when ce.ITEMID in (184,220739) then ce.valuenum else null end) as GCSEyes
        -- convert the data into a number, reserving a value of 0 for ET/Trach
        , max(case
            -- endotrach/vent is assigned a value of 0, later parsed specially
            when ce.ITEMID = 723 and ce.VALUE = '1.0 ET/Trach' then 1 -- carevue
            when ce.ITEMID = 223900 and ce.VALUE = 'No Response-ETT' then 1 -- metavision
            else 0 end)
            as endotrachflag
        , ROW_NUMBER ()
                OVER (PARTITION BY ce.icustay_id ORDER BY ce.charttime ASC) as rn
        from `physionet-data.mimiciii_clinical.chartevents` ce
        -- Isolate the desired GCS variables
        where ce.ITEMID in
        (
            -- 198 -- GCS
            -- GCS components, CareVue
            184, 454, 723
            -- GCS components, Metavision
            , 223900, 223901, 220739
        )
        -- exclude rows marked as error
        and ce.error != 1
        group by ce.ICUSTAY_ID, ce.charttime
        )
        , gcs as (
        select b.*
        , b2.GCSVerbal as GCSVerbalPrev
        , b2.GCSMotor as GCSMotorPrev
        , b2.GCSEyes as GCSEyesPrev
        -- Calculate GCS, factoring in special case when they are intubated and prev vals
        -- note that the coalesce are used to implement the following if:
        --  if current value exists, use it
        --  if previous value exists, use it
        --  otherwise, default to normal
        , case
            -- replace GCS during sedation with 15
            when b.GCSVerbal = 0
                then 15
            when b.GCSVerbal is null and b2.GCSVerbal = 0
                then 15
            -- if previously they were intub, but they aren't now, do not use previous GCS values
            when b2.GCSVerbal = 0
                then
                    coalesce(b.GCSMotor,6)
                + coalesce(b.GCSVerbal,5)
                + coalesce(b.GCSEyes,4)
            -- otherwise, add up score normally, imputing previous value if none available at current time
            else
                    coalesce(b.GCSMotor,coalesce(b2.GCSMotor,6))
                + coalesce(b.GCSVerbal,coalesce(b2.GCSVerbal,5))
                + coalesce(b.GCSEyes,coalesce(b2.GCSEyes,4))
            end as GCS

        from base b
        -- join to itself within 6 hours to get previous value
        left join base b2
            on b.ICUSTAY_ID = b2.ICUSTAY_ID
            and b.rn = b2.rn+1
            and b2.charttime > DATETIME_SUB(b.charttime, interval '6' hour)
        )
        -- combine components with previous within 6 hours
        -- filter down to cohort which is not excluded
        -- truncate charttime to the hour
        , gcs_stg as
        (
        select gs.icustay_id, gs.charttime
        , GCS
        , coalesce(GCSMotor,GCSMotorPrev) as GCSMotor
        , coalesce(GCSVerbal,GCSVerbalPrev) as GCSVerbal
        , coalesce(GCSEyes,GCSEyesPrev) as GCSEyes
        , case when coalesce(GCSMotor,GCSMotorPrev) is null then 0 else 1 end
        + case when coalesce(GCSVerbal,GCSVerbalPrev) is null then 0 else 1 end
        + case when coalesce(GCSEyes,GCSEyesPrev) is null then 0 else 1 end
            as components_measured
        , EndoTrachFlag
        from gcs gs
        )
        -- priority is:
        --  (i) complete data, (ii) non-sedated GCS, (iii) lowest GCS, (iv) charttime
        , gcs_priority as
        (
        select icustay_id
            , charttime
            , GCS
            , GCSMotor
            , GCSVerbal
            , GCSEyes
            , EndoTrachFlag
            , ROW_NUMBER() over
            (
                PARTITION BY icustay_id, charttime
                ORDER BY components_measured DESC, endotrachflag, gcs, charttime DESC
            ) as rn
        from gcs_stg
        )
        select icustay_id
        , charttime
        , GCS
        , GCSMotor
        , GCSVerbal
        , GCSEyes
        , EndoTrachFlag
        from gcs_priority gs
        where rn = 1
        ORDER BY icustay_id, charttime
    )
    SELECT * FROM pivoted_gcs
);

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.ventdurations` AS (
    WITH ventsettings AS (
        select
        icustay_id, charttime
        -- case statement determining whether it is an instance of mech vent
        , max(
            case
            when itemid is null or value is null then 0 -- can't have null values
            when itemid = 720 and value != 'Other/Remarks' THEN 1  -- VentTypeRecorded
            when itemid = 223848 and value != 'Other' THEN 1
            when itemid = 223849 then 1 -- ventilator mode
            when itemid = 467 and value = 'Ventilator' THEN 1 -- O2 delivery device == ventilator
            when itemid in
                (
                445, 448, 449, 450, 1340, 1486, 1600, 224687 -- minute volume
                , 639, 654, 681, 682, 683, 684,224685,224684,224686 -- tidal volume
                , 218,436,535,444,459,224697,224695,224696,224746,224747 -- High/Low/Peak/Mean/Neg insp force ("RespPressure")
                , 221,1,1211,1655,2000,226873,224738,224419,224750,227187 -- Insp pressure
                , 543 -- PlateauPressure
                , 5865,5866,224707,224709,224705,224706 -- APRV pressure
                , 60,437,505,506,686,220339,224700 -- PEEP
                , 3459 -- high pressure relief
                , 501,502,503,224702 -- PCV
                , 223,667,668,669,670,671,672 -- TCPCV
                , 224701 -- PSVlevel
                )
                THEN 1
            else 0
            end
            ) as MechVent
            , max(
            case
                -- initiation of oxygen therapy indicates the ventilation has ended
                when itemid = 226732 and value in
                (
                'Nasal cannula', -- 153714 observations
                'Face tent', -- 24601 observations
                'Aerosol-cool', -- 24560 observations
                'Trach mask ', -- 16435 observations
                'High flow neb', -- 10785 observations
                'Non-rebreather', -- 5182 observations
                'Venti mask ', -- 1947 observations
                'Medium conc mask ', -- 1888 observations
                'T-piece', -- 1135 observations
                'High flow nasal cannula', -- 925 observations
                'Ultrasonic neb', -- 9 observations
                'Vapomist' -- 3 observations
                ) then 1
                when itemid = 467 and value in
                (
                'Cannula', -- 278252 observations
                'Nasal Cannula', -- 248299 observations
                -- 'None', -- 95498 observations
                'Face Tent', -- 35766 observations
                'Aerosol-Cool', -- 33919 observations
                'Trach Mask', -- 32655 observations
                'Hi Flow Neb', -- 14070 observations
                'Non-Rebreather', -- 10856 observations
                'Venti Mask', -- 4279 observations
                'Medium Conc Mask', -- 2114 observations
                'Vapotherm', -- 1655 observations
                'T-Piece', -- 779 observations
                'Hood', -- 670 observations
                'Hut', -- 150 observations
                'TranstrachealCat', -- 78 observations
                'Heated Neb', -- 37 observations
                'Ultrasonic Neb' -- 2 observations
                ) then 1
            else 0
            end
            ) as OxygenTherapy
            , max(
            case when itemid is null or value is null then 0
                -- extubated indicates ventilation event has ended
                when itemid = 640 and value = 'Extubated' then 1
                when itemid = 640 and value = 'Self Extubation' then 1
            else 0
            end
            )
            as Extubated
            , max(
            case when itemid is null or value is null then 0
                when itemid = 640 and value = 'Self Extubation' then 1
            else 0
            end
            )
            as SelfExtubated
        from `physionet-data.mimiciii_clinical.chartevents` ce
        where ce.value is not null
        -- exclude rows marked as error
        and ce.error != 1
        and itemid in
        (
            -- the below are settings used to indicate ventilation
            720, 223849 -- vent mode
            , 223848 -- vent type
            , 445, 448, 449, 450, 1340, 1486, 1600, 224687 -- minute volume
            , 639, 654, 681, 682, 683, 684,224685,224684,224686 -- tidal volume
            , 218,436,535,444,224697,224695,224696,224746,224747 -- High/Low/Peak/Mean ("RespPressure")
            , 221,1,1211,1655,2000,226873,224738,224419,224750,227187 -- Insp pressure
            , 543 -- PlateauPressure
            , 5865,5866,224707,224709,224705,224706 -- APRV pressure
            , 60,437,505,506,686,220339,224700 -- PEEP
            , 3459 -- high pressure relief
            , 501,502,503,224702 -- PCV
            , 223,667,668,669,670,671,672 -- TCPCV
            , 224701 -- PSVlevel

            -- the below are settings used to indicate extubation
            , 640 -- extubated

            -- the below indicate oxygen/NIV, i.e. the end of a mechanical vent event
            , 468 -- O2 Delivery Device#2
            , 469 -- O2 Delivery Mode
            , 470 -- O2 Flow (lpm)
            , 471 -- O2 Flow (lpm) #2
            , 227287 -- O2 Flow (additional cannula)
            , 226732 -- O2 Delivery Device(s)
            , 223834 -- O2 Flow

            -- used in both oxygen + vent calculation
            , 467 -- O2 Delivery Device
        )
        group by icustay_id, charttime
        UNION ALL
        -- add in the extubation flags from procedureevents_mv
        -- note that we only need the start time for the extubation
        -- (extubation is always charted as ending 1 minute after it started)
        select
        icustay_id, starttime as charttime
        , 0 as MechVent
        , 0 as OxygenTherapy
        , 1 as Extubated
        , case when itemid = 225468 then 1 else 0 end as SelfExtubated
        from `physionet-data.mimiciii_clinical.procedureevents_mv`
        where itemid in
        (
        227194 -- "Extubation"
        , 225468 -- "Unplanned Extubation (patient-initiated)"
        , 225477 -- "Unplanned Extubation (non-patient initiated)"
        )
    ), ventdurations as (
        with vd0 as
        (
        select
            icustay_id
            -- this carries over the previous charttime which had a mechanical ventilation event
            , case
                when MechVent=1 then
                LAG(CHARTTIME, 1) OVER (partition by icustay_id, MechVent order by charttime)
                else null
            end as charttime_lag
            , charttime
            , MechVent
            , OxygenTherapy
            , Extubated
            , SelfExtubated
        from ventsettings
        )
        , vd1 as
        (
        select
            icustay_id
            , charttime_lag
            , charttime
            , MechVent
            , OxygenTherapy
            , Extubated
            , SelfExtubated

            -- if this is a mechanical ventilation event, we calculate the time since the last event
            , case
                -- if the current observation indicates mechanical ventilation is present
                -- calculate the time since the last vent event
                when MechVent=1 then
                    DATETIME_DIFF(CHARTTIME,charttime_lag,HOUR)
                else null
                end as ventduration

            , LAG(Extubated,1)
            OVER
            (
            partition by icustay_id, case when MechVent=1 or Extubated=1 then 1 else 0 end
            order by charttime
            ) as ExtubatedLag

            -- now we determine if the current mech vent event is a "new", i.e. they've just been intubated
            , case
                -- if there is an extubation flag, we mark any subsequent ventilation as a new ventilation event
                --when Extubated = 1 then 0 -- extubation is *not* a new ventilation event, the *subsequent* row is
                when
                    LAG(Extubated,1)
                    OVER
                    (
                    partition by icustay_id, case when MechVent=1 or Extubated=1 then 1 else 0 end
                    order by charttime
                    )
                    = 1 then 1
                -- if patient has initiated oxygen therapy, and is not currently vented, start a newvent
                when MechVent = 0 and OxygenTherapy = 1 then 1
                    -- if there is less than 8 hours between vent settings, we do not treat this as a new ventilation event
                when DATETIME_DIFF(CHARTTIME,charttime_lag,HOUR) > 8
                    then 1
                else 0
                end as newvent
        -- use the staging table with only vent settings from chart events
        FROM vd0 ventsettings
        )
        , vd2 as
        (
        select vd1.*
        -- create a cumulative sum of the instances of new ventilation
        -- this results in a monotonic integer assigned to each instance of ventilation
        , case when MechVent=1 or Extubated = 1 then
            SUM( newvent )
            OVER ( partition by icustay_id order by charttime )
            else null end
            as ventnum
        --- now we convert CHARTTIME of ventilator settings into durations
        from vd1
        )
        -- create the durations for each mechanical ventilation instance
        SELECT
        icustay_id
        -- regenerate ventnum so it's sequential
        , ventnum
        , min(charttime) as starttime
        , max(charttime) as endtime
        , DATETIME_DIFF(max(charttime),min(charttime), HOUR) AS duration_hours
        from (
            SELECT 
            icustay_id
            , ROW_NUMBER() over (partition by icustay_id order by ventnum) as ventnum
            , charttime
            , mechvent
            from vd2
        )
        group by icustay_id, ventnum
        having min(charttime) != max(charttime)
        -- patient had to be mechanically ventilated at least once
        -- i.e. max(mechvent) should be 1
        -- this excludes a frequent situation of NIV/oxygen before intub
        -- in these cases, ventnum=0 and max(mechvent)=0, so they are ignored
        and max(mechvent) = 1
        order by icustay_id, ventnum
    )
    SELECT * FROM ventdurations
);

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.mimic_co` as (
    with co_stg as
    (
    select icustay_id, hadm_id
    , DATETIME_TRUNC(intime, HOUR) as intime
    , outtime
    , GENERATE_ARRAY(
        -24,
        DATETIME_DIFF(outtime,intime,HOUR)
    ) AS hr
    from `physionet-data.mimiciii_clinical.icustays` ie
    inner join  `physionet-data.mimiciii_clinical.patients` pt
        on ie.subject_id = pt.subject_id
    -- filter to adults by removing admissions with DOB ~= admission time
    where ie.intime > DATETIME_ADD(pt.dob, interval '1' year)
    )
    -- add in the charttime column
    , co as
    (
    select icustay_id, hadm_id, intime, outtime
    , DATETIME_ADD(intime,INTERVAL hr-1 HOUR) as starttime
    , DATETIME_ADD(intime, INTERVAL hr HOUR) as endtime
    , hr
    FROM co_stg
    CROSS JOIN UNNEST(co_stg.hr) as hr
    )
    SELECT * FROM co
);

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.pivoted_sofa` AS (
    WITH pivoted_sofa as (
        -- generate a row for every hour the patient was in the ICU
        -- get minimum blood pressure from chartevents
        with co as (
            SELECT * FROM `amsterdam-translation.amsterdam_custom.mimic_co`
        ), bp as
        (
        select ce.icustay_id
            , ce.charttime
            , min(valuenum) as MeanBP_min
        from `physionet-data.mimiciii_clinical.chartevents` ce
        -- exclude rows marked as error
        where ce.error != 1
        and ce.itemid in
        (
        -- MEAN ARTERIAL PRESSURE
        456, --"NBP Mean"
        52, --"Arterial BP Mean"
        6702, --	Arterial BP Mean #2
        443, --	Manual BP Mean(calc)
        220052, --"Arterial Blood Pressure mean"
        220181, --"Non Invasive Blood Pressure mean"
        225312  --"ART BP mean"
        )
        and valuenum > 0 and valuenum < 300
        group by ce.icustay_id, ce.charttime
        )
        , mini_agg as
        (
        select co.icustay_id, co.hr
        -- vitals
        , min(bp.MeanBP_min) as MeanBP_min
        -- gcs
        , min(gcs.GCS) as GCS_min
        -- labs
        , max(labs.bilirubin) as bilirubin_max
        , max(labs.creatinine) as creatinine_max
        , min(labs.platelet) as platelet_min
        -- because pafi has an interaction between vent/PaO2:FiO2, we need two columns for the score
        -- it can happen that the lowest unventilated PaO2/FiO2 is 68, but the lowest ventilated PaO2/FiO2 is 120
        -- in this case, the SOFA score is 3, *not* 4.
        , min(case when vd.icustay_id is null then pao2fio2ratio else null end) AS PaO2FiO2Ratio_novent
        , min(case when vd.icustay_id is not null then pao2fio2ratio else null end) AS PaO2FiO2Ratio_vent
        from co
        left join bp
            on co.icustay_id = bp.icustay_id
            and co.starttime < bp.charttime
            and co.endtime >= bp.charttime
        left join `amsterdam-translation.amsterdam_custom.pivoted_gcs` gcs
            on co.icustay_id = gcs.icustay_id
            and co.starttime < gcs.charttime
            and co.endtime >= gcs.charttime
        left join `amsterdam-translation.amsterdam_custom.pivoted_lab` labs
            on co.hadm_id = labs.hadm_id
            and co.starttime < labs.charttime
            and co.endtime >= labs.charttime
        -- bring in blood gases that occurred during this hour
        left join `amsterdam-translation.amsterdam_custom.pivoted_bg_art` bg
            on co.icustay_id = bg.icustay_id
            and co.starttime < bg.charttime
            and co.endtime >= bg.charttime
        -- at the time of the blood gas, determine if patient was ventilated
        left join `amsterdam-translation.amsterdam_custom.ventdurations` vd
            on co.icustay_id = vd.icustay_id
            and bg.charttime >= vd.starttime
            and bg.charttime <= vd.endtime
        group by co.icustay_id, co.hr
        )
        -- sum uo separately to prevent duplicating values
        , uo as
        (
        select co.icustay_id, co.hr
        -- uo
        , sum(uo.urineoutput) as UrineOutput
        from co
        left join `amsterdam-translation.amsterdam_custom.pivoted_uo` uo
            on co.icustay_id = uo.icustay_id
        WHERE co.starttime < uo.charttime
            and co.endtime >= uo.charttime
        group by co.icustay_id, co.hr
        )
        , scorecomp as
        (
        select
            co.icustay_id
            , co.hr
            , co.starttime, co.endtime
            , ma.PaO2FiO2Ratio_novent
            , ma.PaO2FiO2Ratio_vent
            , epi.vaso_rate as rate_epinephrine
            , nor.vaso_rate as rate_norepinephrine
            , dop.vaso_rate as rate_dopamine
            , dob.vaso_rate as rate_dobutamine
            , ma.MeanBP_min
            , ma.GCS_min
            -- uo
            , uo.urineoutput
            -- labs
            , ma.bilirubin_max
            , ma.creatinine_max
            , ma.platelet_min
        from co
        left join mini_agg ma
            on co.icustay_id = ma.icustay_id
            and co.hr = ma.hr
        left join uo 
            on co.icustay_id = uo.icustay_id
            and co.hr = uo.hr
        -- add in dose of vasopressors
        -- dose tables have 1 row for each start/stop interval,
        -- so no aggregation needed
        left join `amsterdam-translation.amsterdam_custom.epinephrine_dose` epi
            on co.icustay_id = epi.icustay_id
            and co.endtime > epi.starttime
            and co.endtime <= epi.endtime
        left join `amsterdam-translation.amsterdam_custom.norepinephrine_dose` nor
            on co.icustay_id = nor.icustay_id
            and co.endtime > nor.starttime
            and co.endtime <= nor.endtime
        left join `amsterdam-translation.amsterdam_custom.dopamine_dose` dop
            on co.icustay_id = dop.icustay_id
            and co.endtime > dop.starttime
            and co.endtime <= dop.endtime
        left join `amsterdam-translation.amsterdam_custom.dobutamine_dose` dob
            on co.icustay_id = dob.icustay_id
            and co.endtime > dob.starttime
            and co.endtime <= dob.endtime
        )
        , scorecalc as
        (
        -- Calculate the final score
        -- note that if the underlying data is missing, the component is null
        -- eventually these are treated as 0 (normal), but knowing when data is missing is useful for debugging
        select scorecomp.*
        -- Respiration
        , case
            when PaO2FiO2Ratio_vent   < 100 then 4
            when PaO2FiO2Ratio_vent   < 200 then 3
            when PaO2FiO2Ratio_novent < 300 then 2
            when PaO2FiO2Ratio_novent < 400 then 1
            when coalesce(PaO2FiO2Ratio_vent, PaO2FiO2Ratio_novent) is null then null
            else 0
            end as respiration

        -- Coagulation
        , case
            when platelet_min < 20  then 4
            when platelet_min < 50  then 3
            when platelet_min < 100 then 2
            when platelet_min < 150 then 1
            when platelet_min is null then null
            else 0
            end as coagulation

        -- Liver
        , case
            -- Bilirubin checks in mg/dL
                when Bilirubin_Max >= 12.0 then 4
                when Bilirubin_Max >= 6.0  then 3
                when Bilirubin_Max >= 2.0  then 2
                when Bilirubin_Max >= 1.2  then 1
                when Bilirubin_Max is null then null
                else 0
            end as liver

        -- Cardiovascular
        , case
            when rate_dopamine > 15 or rate_epinephrine >  0.1 or rate_norepinephrine >  0.1 then 4
            when rate_dopamine >  5 or rate_epinephrine <= 0.1 or rate_norepinephrine <= 0.1 then 3
            when rate_dopamine >  0 or rate_dobutamine > 0 then 2
            when MeanBP_Min < 70 then 1
            when coalesce(MeanBP_Min, rate_dopamine, rate_dobutamine, rate_epinephrine, rate_norepinephrine) is null then null
            else 0
            end as cardiovascular

        -- Neurological failure (GCS)
        , case
            when (GCS_min >= 13 and GCS_min <= 14) then 1
            when (GCS_min >= 10 and GCS_min <= 12) then 2
            when (GCS_min >=  6 and GCS_min <=  9) then 3
            when  GCS_min <   6 then 4
            when  GCS_min is null then null
        else 0 end
            as cns

        -- Renal failure - high creatinine or low urine output
        , case
            when (Creatinine_Max >= 5.0) then 4
            when
            SUM(urineoutput) OVER W < 200
                then 4
            when (Creatinine_Max >= 3.5 and Creatinine_Max < 5.0) then 3
            when
            SUM(urineoutput) OVER W < 500
                then 3
            when (Creatinine_Max >= 2.0 and Creatinine_Max < 3.5) then 2
            when (Creatinine_Max >= 1.2 and Creatinine_Max < 2.0) then 1
            when coalesce
            (
                SUM(urineoutput) OVER W
                , Creatinine_Max
            ) is null then null
        else 0 end
            as renal
        from scorecomp
        WINDOW W as
        (
            PARTITION BY icustay_id
            ORDER BY hr
            ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING
        )
        )
        , score_final as
        (
        select s.*
            -- Combine all the scores to get SOFA
            -- Impute 0 if the score is missing
        -- the window function takes the max over the last 24 hours
            , coalesce(MAX(respiration) OVER W, 0) as respiration_24hours
            , coalesce(MAX(coagulation) OVER W, 0) as coagulation_24hours
            , coalesce(MAX(liver) OVER W, 0) as liver_24hours
            , coalesce(MAX(cardiovascular) OVER W,0) as cardiovascular_24hours
            , coalesce(MAX(cns) OVER W,0) as cns_24hours
            , coalesce(MAX(renal) OVER W,0) as renal_24hours

            -- sum together data for final SOFA
            , (coalesce(MAX(respiration) OVER W,0)
            + coalesce(MAX(coagulation) OVER W, 0)
            + coalesce(MAX(liver) OVER W, 0)
            + coalesce(MAX(cardiovascular) OVER W, 0)
            + coalesce(MAX(cns) OVER W, 0)
            + coalesce(MAX(renal) OVER W, 0)
            )
            as SOFA_24hours
        from scorecalc s
        WINDOW W as
        (
            PARTITION BY icustay_id
            ORDER BY hr
            ROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING
        )
        )
        select * from score_final
        where hr >= 0
        order by icustay_id, hr
    )
    SELECT
    icustay_id,
    starttime,
    endtime,
    MAX(respiration_24hours) as respiration_24hours,
    MAX(coagulation_24hours) as coagulation_24hours,
    MAX(liver_24hours) as liver_24hours,
    MAX(cardiovascular_24hours) as cardiovascular_24hours,
    MAX(cns_24hours) as cns_24hours,
    MAX(renal_24hours) as renal_24hours,
    MAX(SOFA_24hours) as SOFA_24hours
    FROM pivoted_sofa
    GROUP BY icustay_id, starttime, endtime
    ORDER BY icustay_id, starttime
); 

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.mimic_transfusion` as (
WITH demographics as (
    SELECT ie.subject_id, ie.hadm_id, ie.icustay_id
    -- patient level factors
    , pat.gender, pat.dod

    -- hospital level factors
    , adm.admittime, adm.dischtime
    , DATETIME_DIFF(adm.admittime,pat.dob,YEAR) AS age
    , adm.ethnicity
    , case when ethnicity in
    (
        'WHITE' --  40996
        , 'WHITE - RUSSIAN' --    164
        , 'WHITE - OTHER EUROPEAN' --     81
        , 'WHITE - BRAZILIAN' --     59
        , 'WHITE - EASTERN EUROPEAN' --     25
    ) then 'white'
    when ethnicity in
    (
        'BLACK/AFRICAN AMERICAN' --   5440
        , 'BLACK/CAPE VERDEAN' --    200
        , 'BLACK/HAITIAN' --    101
        , 'BLACK/AFRICAN' --     44
        , 'CARIBBEAN ISLAND' --      9
    ) then 'black'
    when ethnicity in
        (
        'HISPANIC OR LATINO' --   1696
        , 'HISPANIC/LATINO - PUERTO RICAN' --    232
        , 'HISPANIC/LATINO - DOMINICAN' --     78
        , 'HISPANIC/LATINO - GUATEMALAN' --     40
        , 'HISPANIC/LATINO - CUBAN' --     24
        , 'HISPANIC/LATINO - SALVADORAN' --     19
        , 'HISPANIC/LATINO - CENTRAL AMERICAN (OTHER)' --     13
        , 'HISPANIC/LATINO - MEXICAN' --     13
        , 'HISPANIC/LATINO - COLOMBIAN' --      9
        , 'HISPANIC/LATINO - HONDURAN' --      4
    ) then 'hispanic'
    when ethnicity in
    (
        'ASIAN' --   1509
        , 'ASIAN - CHINESE' --    277
        , 'ASIAN - ASIAN INDIAN' --     85
        , 'ASIAN - VIETNAMESE' --     53
        , 'ASIAN - FILIPINO' --     25
        , 'ASIAN - CAMBODIAN' --     17
        , 'ASIAN - OTHER' --     17
        , 'ASIAN - KOREAN' --     13
        , 'ASIAN - JAPANESE' --      7
        , 'ASIAN - THAI' --      4
    ) then 'asian'
    when ethnicity in
    (
        'AMERICAN INDIAN/ALASKA NATIVE' --     51
        , 'AMERICAN INDIAN/ALASKA NATIVE FEDERALLY RECOGNIZED TRIBE' --      3
    ) then 'native'
    when ethnicity in
    (
        'UNKNOWN/NOT SPECIFIED' --   4523
        , 'UNABLE TO OBTAIN' --    814
        , 'PATIENT DECLINED TO ANSWER' --    559
    ) then 'unknown'
    else 'other' end as ethnicity_grouped
    -- , 'OTHER' --   1512
    -- , 'MULTI RACE ETHNICITY' --    130
    -- , 'PORTUGUESE' --     61
    -- , 'MIDDLE EASTERN' --     43
    -- , 'NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER' --     18
    -- , 'SOUTH AMERICAN' --      8

    , adm.admission_type
    , adm.hospital_expire_flag
    , DENSE_RANK() OVER (PARTITION BY adm.subject_id ORDER BY adm.admittime) AS hospstay_seq
    , CASE
        WHEN DENSE_RANK() OVER (PARTITION BY adm.subject_id ORDER BY adm.admittime) = 1 THEN True
        ELSE False END AS first_hosp_stay

    -- icu level factors
    , ie.intime, ie.outtime
    , DENSE_RANK() OVER (PARTITION BY ie.hadm_id ORDER BY ie.intime) AS icustay_seq

    -- first ICU stay *for the current hospitalization*
    , CASE
        WHEN DENSE_RANK() OVER (PARTITION BY ie.hadm_id ORDER BY ie.intime) = 1 THEN True
        ELSE False END AS first_icu_stay

    FROM `physionet-data.mimiciii_clinical.icustays` ie
    INNER JOIN `physionet-data.mimiciii_clinical.admissions` adm
        ON ie.hadm_id = adm.hadm_id
    INNER JOIN `physionet-data.mimiciii_clinical.patients` pat
        ON ie.subject_id = pat.subject_id
    ORDER BY ie.subject_id, adm.admittime, ie.intime
), pivoted_sofa AS (
    SELECT * FROM `amsterdam-translation.amsterdam_custom.pivoted_sofa`
), pivoted_lab AS (
    SELECT * FROM `amsterdam-translation.amsterdam_custom.pivoted_lab`
), mechvent AS (
    select
    icustay_id, charttime
    -- case statement determining whether it is an instance of mech vent
    , max(
        case
        when itemid is null or value is null then 0 -- can't have null values
        when itemid = 720 and value != 'Other/Remarks' THEN 1  -- VentTypeRecorded
        when itemid = 223848 and value != 'Other' THEN 1
        when itemid = 223849 then 1 -- ventilator mode
        when itemid = 467 and value = 'Ventilator' THEN 1 -- O2 delivery device == ventilator
        when itemid in
            (
            445, 448, 449, 450, 1340, 1486, 1600, 224687 -- minute volume
            , 639, 654, 681, 682, 683, 684,224685,224684,224686 -- tidal volume
            , 218,436,535,444,459,224697,224695,224696,224746,224747 -- High/Low/Peak/Mean/Neg insp force ("RespPressure")
            , 221,1,1211,1655,2000,226873,224738,224419,224750,227187 -- Insp pressure
            , 543 -- PlateauPressure
            , 5865,5866,224707,224709,224705,224706 -- APRV pressure
            , 60,437,505,506,686,220339,224700 -- PEEP
            , 3459 -- high pressure relief
            , 501,502,503,224702 -- PCV
            , 223,667,668,669,670,671,672 -- TCPCV
            , 224701 -- PSVlevel
            )
            THEN 1
        else 0
        end
        ) as ventmarker
    from `physionet-data.mimiciii_clinical.chartevents` ce
    where ce.value is not null
    -- exclude rows marked as error
    and ce.error != 1
    and itemid in
    (
        -- the below are settings used to indicate ventilation
        720, 223849 -- vent mode
        , 223848 -- vent type
        , 445, 448, 449, 450, 1340, 1486, 1600, 224687 -- minute volume
        , 639, 654, 681, 682, 683, 684,224685,224684,224686 -- tidal volume
        , 218,436,535,444,224697,224695,224696,224746,224747 -- High/Low/Peak/Mean ("RespPressure")
        , 221,1,1211,1655,2000,226873,224738,224419,224750,227187 -- Insp pressure
        , 543 -- PlateauPressure
        , 5865,5866,224707,224709,224705,224706 -- APRV pressure
        , 60,437,505,506,686,220339,224700 -- PEEP
        , 3459 -- high pressure relief
        , 501,502,503,224702 -- PCV
        , 223,667,668,669,670,671,672 -- TCPCV
        , 224701 -- PSVlevel

        -- the below are settings used to indicate extubation
        , 640 -- extubated

        -- the below indicate oxygen/NIV, i.e. the end of a mechanical vent event
        , 468 -- O2 Delivery Device#2
        , 469 -- O2 Delivery Mode
        , 470 -- O2 Flow (lpm)
        , 471 -- O2 Flow (lpm) #2
        , 227287 -- O2 Flow (additional cannula)
        , 226732 -- O2 Delivery Device(s)
        , 223834 -- O2 Flow

        -- used in both oxygen + vent calculation
        , 467 -- O2 Delivery Device
    )
    group by icustay_id, charttime
), vasopressor_duration as (
    with io_cv as
    (
    select
        icustay_id, charttime, itemid, stopped
        -- ITEMIDs (42273, 42802) accidentally store rate in amount column
        , case
            when itemid in (42273, 42802)
            then amount
            else rate
        end as rate
        , case
            when itemid in (42273, 42802)
            then rate
            else amount
        end as amount
    from `physionet-data.mimiciii_clinical.inputevents_cv`
    where itemid in
    (
        30047,30120,30044,30119,30309,30127
    , 30128,30051,30043,30307,30042,30306,30125
    , 42273, 42802
    )
    )
    -- select only the ITEMIDs from the inputevents_mv table related to vasopressors
    , io_mv as
    (
    select
        icustay_id, linkorderid, starttime, endtime
    from `physionet-data.mimiciii_clinical.inputevents_mv` io
    -- Subselect the vasopressor ITEMIDs
    where itemid in
    (
    221906,221289,221749,222315,221662,221653,221986
    )
    and statusdescription != 'Rewritten' -- only valid orders
    )
    , vasocv1 as
    (
    select
        icustay_id, charttime, itemid
        -- case statement determining whether the ITEMID is an instance of vasopressor usage
        , 1 as vaso

        -- the 'stopped' column indicates if a vasopressor has been disconnected
        , max(case when stopped in ('Stopped','D/C','d') then 1
            else 0 end) as vaso_stopped

        , max(case when rate is not null then 1 else 0 end) as vaso_null
        , max(rate) as vaso_rate
        , max(amount) as vaso_amount

    from io_cv
    group by icustay_id, charttime, itemid
    )
    , vasocv2 as
    (
    select v.*
        , sum(vaso_null) over (partition by icustay_id, itemid order by charttime) as vaso_partition
    from
        vasocv1 v
    )
    , vasocv3 as
    (
    select v.*
        , first_value(vaso_rate) over (partition by icustay_id, itemid, vaso_partition order by charttime) as vaso_prevrate_ifnull
    from
        vasocv2 v
    )
    , vasocv4 as
    (
    select
        icustay_id
        , charttime
        , itemid
        -- , (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) AS delta

        , vaso
        , vaso_rate
        , vaso_amount
        , vaso_stopped
        , vaso_prevrate_ifnull

        -- We define start time here
        , case
            when vaso = 0 then null

            -- if this is the first instance of the vasoactive drug
            when vaso_rate > 0 and
            LAG(vaso_prevrate_ifnull,1)
            OVER
            (
            partition by icustay_id, itemid, vaso, vaso_null
            order by charttime
            )
            is null
            then 1

            -- you often get a string of 0s
            -- we decide not to set these as 1, just because it makes vasonum sequential
            when vaso_rate = 0 and
            LAG(vaso_prevrate_ifnull,1)
            OVER
            (
            partition by icustay_id, itemid, vaso
            order by charttime
            )
            = 0
            then 0

            -- sometimes you get a string of NULL, associated with 0 volumes
            -- same reason as before, we decide not to set these as 1
            -- vaso_prevrate_ifnull is equal to the previous value *iff* the current value is null
            when vaso_prevrate_ifnull = 0 and
            LAG(vaso_prevrate_ifnull,1)
            OVER
            (
            partition by icustay_id, itemid, vaso
            order by charttime
            )
            = 0
            then 0

            -- If the last recorded rate was 0, newvaso = 1
            when LAG(vaso_prevrate_ifnull,1)
            OVER
            (
            partition by icustay_id, itemid, vaso
            order by charttime
            ) = 0
            then 1

            -- If the last recorded vaso was D/C'd, newvaso = 1
            when
            LAG(vaso_stopped,1)
            OVER
            (
            partition by icustay_id, itemid, vaso
            order by charttime
            )
            = 1 then 1

            -- ** not sure if the below is needed
            --when (CHARTTIME - (LAG(CHARTTIME, 1) OVER (partition by icustay_id, vaso order by charttime))) > (interval '4 hours') then 1
        else null
        end as vaso_start

    FROM
    vasocv3
    )
    -- propagate start/stop flags forward in time
    , vasocv5 as
    (
    select v.*
        , SUM(vaso_start) OVER (partition by icustay_id, itemid, vaso order by charttime) as vaso_first
    FROM
    vasocv4 v
    )
    , vasocv6 as
    (
    select v.*
        -- We define end time here
        , case
            when vaso = 0
            then null

            -- If the recorded vaso was D/C'd, this is an end time
            when vaso_stopped = 1
            then vaso_first

            -- If the rate is zero, this is the end time
            when vaso_rate = 0
            then vaso_first

            -- the last row in the table is always a potential end time
            -- this captures patients who die/are discharged while on vasopressors
            -- in principle, this could add an extra end time for the vasopressor
            -- however, since we later group on vaso_start, any extra end times are ignored
            when LEAD(CHARTTIME,1)
            OVER
            (
            partition by icustay_id, itemid, vaso
            order by charttime
            ) is null
            then vaso_first

            else null
            end as vaso_stop
        from vasocv5 v
    )

    -- -- if you want to look at the results of the table before grouping:
    -- select
    --   icustay_id, charttime, vaso, vaso_rate, vaso_amount
    --     , case when vaso_stopped = 1 then 'Y' else '' end as stopped
    --     , vaso_start
    --     , vaso_first
    --     , vaso_stop
    -- from vasocv6 order by charttime;


    , vasocv as
    (
    -- below groups together vasopressor administrations into groups
    select
    icustay_id
    , itemid
    -- the first non-null rate is considered the starttime
    , min(case when vaso_rate is not null then charttime else null end) as starttime
    -- the *first* time the first/last flags agree is the stop time for this duration
    , min(case when vaso_first = vaso_stop then charttime else null end) as endtime
    from vasocv6
    where
    vaso_first is not null -- bogus data
    and
    vaso_first != 0 -- sometimes *only* a rate of 0 appears, i.e. the drug is never actually delivered
    and
    icustay_id is not null -- there are data for "floating" admissions, we don't worry about these
    group by icustay_id, itemid, vaso_first
    having -- ensure start time is not the same as end time
    min(charttime) != min(case when vaso_first = vaso_stop then charttime else null end)
    and
    max(vaso_rate) > 0 -- if the rate was always 0 or null, we consider it not a real drug delivery
    )
    -- we do not group by ITEMID in below query
    -- this is because we want to collapse all vasopressors together
    , vasocv_grp as
    (
    SELECT
    s1.icustay_id,
    s1.starttime,
    MIN(t1.endtime) AS endtime
    FROM vasocv s1
    INNER JOIN vasocv t1
    ON  s1.icustay_id = t1.icustay_id
    WHERE s1.starttime <= t1.endtime
    AND NOT EXISTS(SELECT * FROM vasocv t2
                    WHERE t1.icustay_id = t2.icustay_id
                    AND t1.endtime >= t2.starttime
                    AND t1.endtime < t2.endtime)
    AND NOT EXISTS(SELECT * FROM vasocv s2
                    WHERE s1.icustay_id = s2.icustay_id
                    AND s1.starttime > s2.starttime
                    AND s1.starttime <= s2.endtime)
    GROUP BY s1.icustay_id, s1.starttime
    ORDER BY s1.icustay_id, s1.starttime
    )
    -- now we extract the associated data for metavision patients
    -- do not need to group by itemid because we group by linkorderid
    , vasomv as
    (
    select
        icustay_id, linkorderid
        , min(starttime) as starttime, max(endtime) as endtime
    from io_mv
    group by icustay_id, linkorderid
    )
    , vasomv_grp as
    (
    SELECT
    s1.icustay_id,
    s1.starttime,
    MIN(t1.endtime) AS endtime
    FROM vasomv s1
    INNER JOIN vasomv t1
    ON  s1.icustay_id = t1.icustay_id
    WHERE s1.starttime <= t1.endtime
    AND NOT EXISTS(SELECT * FROM vasomv t2
                WHERE t1.icustay_id = t2.icustay_id
                AND t1.endtime >= t2.starttime
                AND t1.endtime < t2.endtime)
    AND NOT EXISTS(SELECT * FROM vasomv s2
                WHERE s1.icustay_id = s2.icustay_id
                AND s1.starttime > s2.starttime
                AND s1.starttime <= s2.endtime)
    GROUP BY s1.icustay_id, s1.starttime
    ORDER BY s1.icustay_id, s1.starttime
    )
    select
    icustay_id
    -- generate a sequential integer for convenience
    , ROW_NUMBER() over (partition by icustay_id order by starttime) as vasonum
    , starttime, endtime
    , DATETIME_DIFF(endtime,starttime,HOUR) AS duration_hours
    -- add durations
    from
    vasocv_grp
    UNION ALL
    select
    icustay_id
    , ROW_NUMBER() over (partition by icustay_id order by starttime) as vasonum
    , starttime, endtime
    , DATETIME_DIFF(endtime,starttime,HOUR) AS duration_hours
    -- add durations
    from
    vasomv_grp
    order by icustay_id, vasonum
), sepsis as (
    -- ICD-9 codes for Angus criteria of sepsis
    SELECT hadm_id,
	MAX(CASE
		WHEN SUBSTR(icd9_code,1,3) IN ('001','002','003','004','005','008',
			   '009','010','011','012','013','014','015','016','017','018',
			   '020','021','022','023','024','025','026','027','030','031',
			   '032','033','034','035','036','037','038','039','040','041',
			   '090','091','092','093','094','095','096','097','098','100',
			   '101','102','103','104','110','111','112','114','115','116',
			   '117','118','320','322','324','325','420','421','451','461',
			   '462','463','464','465','481','482','485','486','494','510',
			   '513','540','541','542','566','567','590','597','601','614',
			   '615','616','681','682','683','686','730') THEN 1
		WHEN SUBSTR(icd9_code,1,4) IN ('5695','5720','5721','5750','5990','7110',
				'7907','9966','9985','9993') THEN 1
		WHEN SUBSTR(icd9_code,1,5) IN ('49121','56201','56203','56211','56213',
				'56983') THEN 1
		ELSE 0 END) AS sepsisflag
	FROM `physionet-data.mimiciii_clinical.diagnoses_icd`
    GROUP BY hadm_id
), premorbid as (
    with icd as (
    select hadm_id, seq_num
        , cast(icd9_code as STRING) as icd9_code
    from `physionet-data.mimiciii_clinical.diagnoses_icd`
    ), eliflg as (
        select hadm_id, seq_num, icd9_code
        -- note that these codes will seem incomplete at first
        -- for example, CHF is missing a lot of codes referenced in the literature (402.11, 402.91, etc)
        -- these codes are captured by hypertension flags instead
        -- later there are some complicated rules which confirm/reject those codes as CHF
        , CASE 
            when SUBSTR(icd9_code,1,3) IN(
                '410', --Acute myocardial infarction
                '411', --Other acute and subacute forms of ischemic heart disease
                '412', --Old myocardial infarction
                '413', --Angina pectoris
                '414' --Other forms of chronic ischemic heart disease
            ) THEN 1 END AS IHD

        , CASE
            when icd9_code = '39891' then 1
            when icd9_code between '4280 ' and '4289 ' then 1
                    end as CHF       /* Congestive heart failure */
    -- cardiac arrhythmias is removed in up to date versions
        , CASE
            when icd9_code between '4400 ' and '4409 ' then 1
            when icd9_code between '44100' and '4419 ' then 1
            when icd9_code between '4420 ' and '4429 ' then 1
            when icd9_code between '4431 ' and '4439 ' then 1
            when icd9_code between '44421' and '44422' then 1
            when icd9_code = '4471 ' then 1
            when icd9_code = '449  ' then 1
            when icd9_code = '5571 ' then 1
            when icd9_code = '5579 ' then 1
            when icd9_code = 'V434 ' then 1
                    end as PERIVASC  /* Peripheral vascular disorder */
        , CASE
            when icd9_code = '40200' then 1
            when icd9_code = '40210' then 1
            when icd9_code = '40290' then 1
            when icd9_code = '40509' then 1
            when icd9_code = '40519' then 1
            when icd9_code = '40599'         then 1
                    end as HTNWOCHF  /* Hypertensive heart disease without heart failure */
        , CASE
            when icd9_code = '40201' then 1
            when icd9_code = '40211' then 1
            when icd9_code = '40291'         then 1
                    end as HTNWCHF   /* Hypertensive heart disease with heart failure */
        , CASE
            when icd9_code = '40301' then 1
            when icd9_code = '40311' then 1
            when icd9_code = '40391'         then 1
                    end as HRENWRF   /* Hypertensive renal disease with renal failure */
        , CASE
        when icd9_code = '40401' then 1
        when icd9_code = '40411' then 1
        when icd9_code = '40491'         then 1
                end as HHRWCHF   /* Hypertensive heart and renal disease with heart failure */
        , CASE
        when icd9_code = '40402' then 1
        when icd9_code = '40412' then 1
        when icd9_code = '40492'         then 1
                end as HHRWRF    /* Hypertensive heart and renal disease with renal failure */
        , CASE
        when icd9_code = '40403' then 1
        when icd9_code = '40413' then 1
        when icd9_code = '40493'         then 1
                end as HHRWHRF   /* Hypertensive heart and renal disease with heart and renal failure */
        , CASE
        when icd9_code = '585  ' then 1 -- discontinued code
        when icd9_code = '5853 ' then 1
        when icd9_code = '5854 ' then 1
        when icd9_code = '5855 ' then 1
        when icd9_code = '5856 ' then 1
        when icd9_code = '5859 ' then 1
        when icd9_code = '586  ' then 1
        when icd9_code = 'V420 ' then 1
        when icd9_code = 'V451 ' then 1
        when icd9_code between 'V560 ' and 'V5632' then 1
        when icd9_code = 'V568 ' then 1
        when icd9_code between 'V4511' and 'V4512' then 1
                end as RENLFAIL  /* Renal failure */
        FROM icd
    )
    -- collapse the icd9_code specific flags into hadm_id specific flags
    -- this groups comorbidities together for a single patient admission
    , eligrp as (
        select hadm_id
        , max(ihd) as ihd
        , max(chf) as chf
        , max(perivasc) as perivasc
        , max(htnwchf) as htnwchf
        , max(hrenwrf) as hrenwrf
        , max(hhrwchf) as hhrwchf
        , max(hhrwrf) as hhrwrf
        , max(hhrwhrf) as hhrwhrf
        , max(renlfail) as renlfail
        from eliflg
        group by hadm_id
    )
    -- now merge these flags together to define elixhauser
    -- most are straightforward.. but hypertension flags are a bit more complicated
    select adm.subject_id, adm.hadm_id
    , case when ihd = 1 then 1 else 0  end as ISCHEMIC_HEART_DISEASE
    , case
        when chf     = 1 then 1
        when htnwchf = 1 then 1
        when hhrwchf = 1 then 1
        when hhrwhrf = 1 then 1
    else 0 end as CONGESTIVE_HEART_FAILURE
    , case when perivasc = 1 then 1 else 0 end as PERIPHERAL_VASCULAR
    , case
        when renlfail = 1 then 1
        when hrenwrf  = 1 then 1
        when hhrwrf   = 1 then 1
        when hhrwhrf  = 1 then 1
    else 0 end as RENAL_FAILURE
    from `physionet-data.mimiciii_clinical.admissions` adm
    left join eligrp eli
        on adm.hadm_id = eli.hadm_id
    order by adm.hadm_id
), surgflag as (
  select adm.hadm_id
    , case when lower(curr_service) like '%surg%' then 1 else 0 end as surgical
    , ROW_NUMBER() over
    (
      PARTITION BY adm.HADM_ID
      ORDER BY TRANSFERTIME
    ) as serviceOrder
  from `physionet-data.mimiciii_clinical.admissions` adm
  left join `physionet-data.mimiciii_clinical.services` se
    on adm.hadm_id = se.hadm_id
), transfusion AS (
    with t1 as
    (
    select
        mv.icustay_id
    , mv.starttime as charttime
    -- standardize the units to millilitres
    -- also metavision has floating point precision.. but we only care down to the mL
    , round(case
        when mv.amountuom = 'L'
            then mv.amount * 1000.0
        when mv.amountuom = 'ml'
            then mv.amount
        else null end) as amount
    from `physionet-data.mimiciii_clinical.inputevents_mv` mv
    where mv.itemid in
    (
        225168, -- Packed Red Blood Cells
        220996 -- Packed Red Cells
    )
    and mv.statusdescription != 'Rewritten'
    and
    -- in MetaVision, these ITEMIDs never appear with a null rate
    -- so it is sufficient to check the rate is > 10
        (
        (mv.rateuom = 'mL/hour' and mv.rate > 100)
        OR (mv.rateuom = 'mL/min' and mv.rate > (100/60.0))
        OR (mv.rateuom = 'mL/kg/hour' and (mv.rate*mv.patientweight) > 100)
        )
    )
    , t2 as
    (
    select
        cv.icustay_id
    , cv.charttime
    -- carevue always has units in millilitres (or null)
    , round(cv.amount) as amount
    from `physionet-data.mimiciii_clinical.inputevents_cv` cv
    where cv.itemid in
    (
    30179, -- PRBC's
    30001, -- Packed RBC's
    30004, -- Washed PRBC's
    42588, -- VICU PRBC 
    42239 -- CC7 PRBC
    )
    and cv.amount > 100
    and cv.amount < 2000
    ), t3 as
    (
    select
        ce.icustay_id
    , ce.charttime
    -- carevue always has units in millilitres (or null)
    , round(ce.valuenum) as amount
    from `physionet-data.mimiciii_clinical.chartevents` ce
    where ce.itemid in
    (
        --chartevents
        5649, -- PRBCS
        5751, -- PRBCS
        7597 -- PRBC
    )
    and ce.valuenum is not null
    and ce.valuenum > 100
    and ce.valuenum < 2000
    )
    --there are zero entries from chartevents when reviewed
    select
    t1.icustay_id
    , charttime
    , sum(amount) as rbc_transfusion
    from t1
    -- just because the rate was high enough, does *not* mean the final amount was
    where amount > 100
    group by t1.icustay_id, t1.charttime
    UNION ALL
    select
    t2.icustay_id
    , charttime
    , sum(amount) as rbc_transfusion
    from t2
    group by t2.icustay_id, t2.charttime
), transfusionoffset as (
    SELECT 
    icustay_id,
    COUNT(rbc_transfusion) as transfusioncount,
    MIN(charttime) AS charttime,
    FROM transfusion
    WHERE charttime IS NOT NULL AND rbc_transfusion IS NOT NULL
    GROUP BY icustay_id
), icu_mort as (
    SELECT 
    adm.hadm_id,
    icu.icustay_id,
    icu.INTIME,
    icu.OUTTIME,
    adm.DEATHTIME,
    CASE WHEN adm.DEATHTIME BETWEEN icu.INTIME AND DATETIME_ADD(icu.OUTTIME, INTERVAL 24 HOUR) THEN 1 ELSE 0 END AS icu_mort
    FROM `physionet-data.mimiciii_clinical.admissions` adm
    LEFT JOIN `physionet-data.mimiciii_clinical.icustays` icu
        ON adm.hadm_id=icu.hadm_id
)
SELECT
d.icustay_id,
MAX(d.age) as age,
MAX(d.gender) as gender,
MAX(d.hospital_expire_flag) as expiremarker,
MAX(CASE WHEN d.hospital_expire_flag = 1 THEN DATETIME_DIFF(d.dod,d.intime,DAY) ELSE NULL END) AS death_offset,
MAX(d.first_icu_stay) as first_icu_stay,
MIN(CASE WHEN hgbmin = p.admission_hgbmin THEN 1 ELSE 0 END) AS subgroup_flag,
MIN(p.hgbmin) as hgbmin,
MIN(DATETIME_DIFF(p.hgbmin_offset,d.intime,DAY)) as hgbmin_offset,
MAX(CASE WHEN p.hgbmin_offset BETWEEN ps.starttime AND ps.endtime THEN ps.SOFA_24hours END) AS sofatotal_dayLowestHmg,
MAX(CASE WHEN DATETIME_ADD(p.hgbmin_offset,INTERVAL 1 DAY) BETWEEN ps.starttime AND ps.endtime THEN ps.SOFA_24hours END) AS sofatotal_dayAfterLowestHmg,
MAX(sepsis.sepsisflag) as septicflag,
MAX(pd.ISCHEMIC_HEART_DISEASE) as ihdflag,
MAX(pd.RENAL_FAILURE) as esrfflag,
MAX(pd.CONGESTIVE_HEART_FAILURE) as chfflag,
MAX(pd.PERIPHERAL_VASCULAR) as pvdflag,
MAX(CASE WHEN mv.ventmarker IS NULL THEN 0 ELSE mv.ventmarker END) as ventmarker,
MAX(CASE WHEN sf.surgical = 1 THEN 1 ELSE 0 END) as surgeryflag,
MAX(CASE WHEN d.icustay_id IN (SELECT
                               DISTINCT icustay_id
                               FROM
                               vasopressor_duration) THEN 1 ELSE 0 END) as vasopressor,
MIN(DATETIME_DIFF(t.charttime,d.intime,DAY)) as transfusion_offset,
MAX(CASE WHEN t.transfusioncount >= 1 THEN 1 ELSE 0 END) AS transfusedmarker,
MAX(CASE WHEN t.transfusioncount IS NOT NULL THEN t.transfusioncount ELSE 0 END) AS transfusioncount,
MAX(im.icu_mort) as icu_mort
FROM demographics d
LEFT JOIN transfusionoffset t
    ON d.icustay_id=t.icustay_id
LEFT JOIN (
    SELECT
    a.icustay_id,
    a.admission_hgbmin,
    a.transfusion_offset,
    a.hgbmin,
    CASE WHEN a.transfusion_offset IS NOT NULL
         AND p.charttime < a.transfusion_offset THEN p.charttime
         --ensure that timestamp of hgbmin is taken prior to any transfusions 
         WHEN a.transfusion_offset IS NULL THEN p.charttime
         --if there was no transfusion, then we just take the timestamp of the lowest hgbmin
         ELSE NULL END AS hgbmin_offset
    FROM (
        SELECT
        p.icustay_id,
        MIN(m.hgbmin) as admission_hgbmin,
        MIN(CASE WHEN t.transfusioncount >= 1 
                 AND DATETIME_DIFF(p.charttime,d.intime,DAY) > -1 --ensure that reading were taken within 1 day of ICU admission 
                 AND p.charttime BETWEEN DATETIME_SUB(t.charttime,INTERVAL 2 DAY) AND t.charttime THEN p.HEMOGLOBIN
                 --ensure that hgbmin is taken prior to any transfusions and within 2 days of transfusion
                 WHEN t.transfusioncount = 0 OR t.transfusioncount IS NULL 
                 AND DATETIME_DIFF(p.charttime,d.intime,DAY) > -1 --ensure that reading were taken within 1 day of ICU admission 
                 THEN m.hgbmin
                 --if there was no transfusion, then we just take the lowest hgbmin
                 ELSE NULL END) as hgbmin,
        MIN(t.charttime) as transfusion_offset
        FROM pivoted_lab p
        LEFT JOIN demographics d
            ON p.icustay_id=d.icustay_id
        LEFT JOIN (
            SELECT
            icustay_id,
            MIN(HEMOGLOBIN) as hgbmin,
            FROM pivoted_lab
            GROUP BY icustay_id
        ) m
            ON p.icustay_id=m.icustay_id
        LEFT JOIN transfusionoffset t
            ON p.icustay_id=t.icustay_id
        GROUP BY icustay_id
    ) a
    LEFT JOIN pivoted_lab p
        ON a.icustay_id=p.icustay_id
        AND p.HEMOGLOBIN = a.hgbmin
) p
    ON d.icustay_id=p.icustay_id
LEFT JOIN mechvent mv
    ON d.icustay_id=mv.icustay_id
LEFT JOIN sepsis
    ON d.hadm_id=sepsis.hadm_id
LEFT JOIN premorbid pd
    ON d.hadm_id=pd.hadm_id
LEFT JOIN surgflag sf
    ON d.hadm_id=sf.hadm_id
LEFT JOIN pivoted_sofa ps
    ON d.icustay_id=ps.icustay_id
LEFT JOIN icu_mort im
    ON d.icustay_id=im.icustay_id
GROUP BY d.icustay_id
)

### MIMIC cohort breakdown code

In [0]:
%%bigquery mimic_total_cases

SELECT
COUNT(DISTINCT icustay_id)
FROM `amsterdam-translation.amsterdam_custom.mimic_transfusion`

In [0]:
%%bigquery mimic_exclude_agebelow18

SELECT
COUNT(DISTINCT icustay_id)
FROM `amsterdam-translation.amsterdam_custom.mimic_transfusion`
WHERE age >= 18

In [0]:
%%bigquery mimic_exclude_readmissions

SELECT
COUNT(DISTINCT icustay_id)
FROM `amsterdam-translation.amsterdam_custom.mimic_transfusion`
WHERE age >= 18
AND first_icu_stay = True

In [0]:
%%bigquery mimic_exclude_bleeding_trauma_dx

SELECT
COUNT(DISTINCT icustay_id)
FROM `amsterdam-translation.amsterdam_custom.mimic_transfusion`
WHERE age >= 18
AND first_icu_stay = True
AND icustay_id NOT IN (
    SELECT 
    i.icustay_id
    FROM `physionet-data.mimiciii_clinical.diagnoses_icd` d
    INNER JOIN `physionet-data.mimiciii_clinical.icustays` i
        ON d.hadm_id=i.hadm_id
    WHERE icd9_code IN (
        --ref: https://www.bmj.com/content/bmj/suppl/2015/02/03/bmj.h246.DC1/junm020747.ww1_default.pdf
        --intracranial bleeding
        '430', --Subarachnoid
        '431', --intracerebral
        '4320', --other and unspecified intracranial bleeding
        '4321', --subdural
        '4329', --unspecified intracranial bleeding
        --upper gastrointestinal
        '5310', --acute gastric ulcer with bleeding without obstruction
        '5312', --acute gastric ulcer with bleeding and perforation without obstruction
        '5314', --chronic or unspecified gastric ulcer with bleeding without obstruction
        '5316', --chronic or unspecified gastric ulcer with bleeding and perforation without obstruction
        '5320', --acute duodenal ulcer with bleeding without obstruction
        '5322', --acute duodenal ulcer with bleeding and perforation without obstruction
        '5324', --chronic or unspecified duodenal ulcer with bleeding without obstruction
        '5326', --chronic or unspecified duodenal ulcer with bleeding and perforation without obstruction
        '5330', --acute peptic ulcer of unspecified site with bleeding without obstruction
        '5332', --acute peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5334', --chronic or unspecified peptic ulcer of unspecified site with bleeding without obstruction
        '5336', --chronic or unspecified peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5340', --acute gastrojejunal ulcer with bleeding without obstruction
        '5342', --acute gastrojejunal ulcer with bleeding and perforation without obstruction
        '5344', --chronic or unspecified gastrojejunal ulcer with bleeding without obstruction
        '5346', --chronic or unspecified gastrojejunal ulcer with bleeding and perforation without obstruction
        '5780', --hematemesis
        '5781', --blood in stool
        '5789', --bleeding of gastrointestinal tract unspecified
        --Lower GI
        '5693', --bleeding of rectum and anus
        --Other Bleeding
        '2878', --other unspecified hemorrhagic conditions
        '2879', --unspecified hemorrhagic conditions
        '5967', --bleeding into bladder wall
        '7848', --bleeding from throat 
        '5997', --hematuria, unspecified
        '6271', --postmenopausal bleeding
        '4590', --bleeding unspecified
        '7191', --hemarthrosis site unspecified 
        '7863', --hemoptysis, unspecified    
        '72992', --nontraumatic hematoma soft tissue
        '9582' --secondary or recurrent following trauma
    )
    OR icd9_code LIKE '900%' --vessel injuries
    OR icd9_code LIKE '901%' --vessel injuries
    OR icd9_code LIKE '902%' --vessel injuries
    OR icd9_code LIKE '903%' --vessel injuries
    OR icd9_code LIKE '904%' --vessel injuries
)

In [0]:
%%bigquery mimic_exclude_missing_fluids_data

SELECT
COUNT(DISTINCT icustay_id)
FROM `amsterdam-translation.amsterdam_custom.mimic_transfusion`
WHERE age >= 18
AND first_icu_stay = True
AND icustay_id NOT IN (
    SELECT 
    i.icustay_id
    FROM `physionet-data.mimiciii_clinical.diagnoses_icd` d
    INNER JOIN `physionet-data.mimiciii_clinical.icustays` i
        ON d.hadm_id=i.hadm_id
    WHERE icd9_code IN (
        --ref: https://www.bmj.com/content/bmj/suppl/2015/02/03/bmj.h246.DC1/junm020747.ww1_default.pdf
        --intracranial bleeding
        '430', --Subarachnoid
        '431', --intracerebral
        '4320', --other and unspecified intracranial bleeding
        '4321', --subdural
        '4329', --unspecified intracranial bleeding
        --upper gastrointestinal
        '5310', --acute gastric ulcer with bleeding without obstruction
        '5312', --acute gastric ulcer with bleeding and perforation without obstruction
        '5314', --chronic or unspecified gastric ulcer with bleeding without obstruction
        '5316', --chronic or unspecified gastric ulcer with bleeding and perforation without obstruction
        '5320', --acute duodenal ulcer with bleeding without obstruction
        '5322', --acute duodenal ulcer with bleeding and perforation without obstruction
        '5324', --chronic or unspecified duodenal ulcer with bleeding without obstruction
        '5326', --chronic or unspecified duodenal ulcer with bleeding and perforation without obstruction
        '5330', --acute peptic ulcer of unspecified site with bleeding without obstruction
        '5332', --acute peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5334', --chronic or unspecified peptic ulcer of unspecified site with bleeding without obstruction
        '5336', --chronic or unspecified peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5340', --acute gastrojejunal ulcer with bleeding without obstruction
        '5342', --acute gastrojejunal ulcer with bleeding and perforation without obstruction
        '5344', --chronic or unspecified gastrojejunal ulcer with bleeding without obstruction
        '5346', --chronic or unspecified gastrojejunal ulcer with bleeding and perforation without obstruction
        '5780', --hematemesis
        '5781', --blood in stool
        '5789', --bleeding of gastrointestinal tract unspecified
        --Lower GI
        '5693', --bleeding of rectum and anus
        --Other Bleeding
        '2878', --other unspecified hemorrhagic conditions
        '2879', --unspecified hemorrhagic conditions
        '5967', --bleeding into bladder wall
        '7848', --bleeding from throat 
        '5997', --hematuria, unspecified
        '6271', --postmenopausal bleeding
        '4590', --bleeding unspecified
        '7191', --hemarthrosis site unspecified 
        '7863', --hemoptysis, unspecified    
        '72992', --nontraumatic hematoma soft tissue
        '9582' --secondary or recurrent following trauma
    )
    OR icd9_code LIKE '900%' --vessel injuries
    OR icd9_code LIKE '901%' --vessel injuries
    OR icd9_code LIKE '902%' --vessel injuries
    OR icd9_code LIKE '903%' --vessel injuries
    OR icd9_code LIKE '904%' --vessel injuries
)

In [0]:
%%bigquery mimic_exclude_missing_hemoglobin

SELECT
COUNT(DISTINCT icustay_id)
FROM `amsterdam-translation.amsterdam_custom.mimic_transfusion`
WHERE age >= 18
AND first_icu_stay = True
AND icustay_id NOT IN (
    SELECT 
    i.icustay_id
    FROM `physionet-data.mimiciii_clinical.diagnoses_icd` d
    INNER JOIN `physionet-data.mimiciii_clinical.icustays` i
        ON d.hadm_id=i.hadm_id
    WHERE icd9_code IN (
        --ref: https://www.bmj.com/content/bmj/suppl/2015/02/03/bmj.h246.DC1/junm020747.ww1_default.pdf
        --intracranial bleeding
        '430', --Subarachnoid
        '431', --intracerebral
        '4320', --other and unspecified intracranial bleeding
        '4321', --subdural
        '4329', --unspecified intracranial bleeding
        --upper gastrointestinal
        '5310', --acute gastric ulcer with bleeding without obstruction
        '5312', --acute gastric ulcer with bleeding and perforation without obstruction
        '5314', --chronic or unspecified gastric ulcer with bleeding without obstruction
        '5316', --chronic or unspecified gastric ulcer with bleeding and perforation without obstruction
        '5320', --acute duodenal ulcer with bleeding without obstruction
        '5322', --acute duodenal ulcer with bleeding and perforation without obstruction
        '5324', --chronic or unspecified duodenal ulcer with bleeding without obstruction
        '5326', --chronic or unspecified duodenal ulcer with bleeding and perforation without obstruction
        '5330', --acute peptic ulcer of unspecified site with bleeding without obstruction
        '5332', --acute peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5334', --chronic or unspecified peptic ulcer of unspecified site with bleeding without obstruction
        '5336', --chronic or unspecified peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5340', --acute gastrojejunal ulcer with bleeding without obstruction
        '5342', --acute gastrojejunal ulcer with bleeding and perforation without obstruction
        '5344', --chronic or unspecified gastrojejunal ulcer with bleeding without obstruction
        '5346', --chronic or unspecified gastrojejunal ulcer with bleeding and perforation without obstruction
        '5780', --hematemesis
        '5781', --blood in stool
        '5789', --bleeding of gastrointestinal tract unspecified
        --Lower GI
        '5693', --bleeding of rectum and anus
        --Other Bleeding
        '2878', --other unspecified hemorrhagic conditions
        '2879', --unspecified hemorrhagic conditions
        '5967', --bleeding into bladder wall
        '7848', --bleeding from throat 
        '5997', --hematuria, unspecified
        '6271', --postmenopausal bleeding
        '4590', --bleeding unspecified
        '7191', --hemarthrosis site unspecified 
        '7863', --hemoptysis, unspecified    
        '72992', --nontraumatic hematoma soft tissue
        '9582' --secondary or recurrent following trauma
    )
    OR icd9_code LIKE '900%' --vessel injuries
    OR icd9_code LIKE '901%' --vessel injuries
    OR icd9_code LIKE '902%' --vessel injuries
    OR icd9_code LIKE '903%' --vessel injuries
    OR icd9_code LIKE '904%' --vessel injuries
)
AND hgbmin IS NOT NULL

In [0]:
%%bigquery mimic_exclude_hb_6

SELECT
COUNT(DISTINCT icustay_id)
FROM `amsterdam-translation.amsterdam_custom.mimic_transfusion`
WHERE age >= 18
AND first_icu_stay = True
AND icustay_id NOT IN (
    SELECT 
    i.icustay_id
    FROM `physionet-data.mimiciii_clinical.diagnoses_icd` d
    INNER JOIN `physionet-data.mimiciii_clinical.icustays` i
        ON d.hadm_id=i.hadm_id
    WHERE icd9_code IN (
        --ref: https://www.bmj.com/content/bmj/suppl/2015/02/03/bmj.h246.DC1/junm020747.ww1_default.pdf
        --intracranial bleeding
        '430', --Subarachnoid
        '431', --intracerebral
        '4320', --other and unspecified intracranial bleeding
        '4321', --subdural
        '4329', --unspecified intracranial bleeding
        --upper gastrointestinal
        '5310', --acute gastric ulcer with bleeding without obstruction
        '5312', --acute gastric ulcer with bleeding and perforation without obstruction
        '5314', --chronic or unspecified gastric ulcer with bleeding without obstruction
        '5316', --chronic or unspecified gastric ulcer with bleeding and perforation without obstruction
        '5320', --acute duodenal ulcer with bleeding without obstruction
        '5322', --acute duodenal ulcer with bleeding and perforation without obstruction
        '5324', --chronic or unspecified duodenal ulcer with bleeding without obstruction
        '5326', --chronic or unspecified duodenal ulcer with bleeding and perforation without obstruction
        '5330', --acute peptic ulcer of unspecified site with bleeding without obstruction
        '5332', --acute peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5334', --chronic or unspecified peptic ulcer of unspecified site with bleeding without obstruction
        '5336', --chronic or unspecified peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5340', --acute gastrojejunal ulcer with bleeding without obstruction
        '5342', --acute gastrojejunal ulcer with bleeding and perforation without obstruction
        '5344', --chronic or unspecified gastrojejunal ulcer with bleeding without obstruction
        '5346', --chronic or unspecified gastrojejunal ulcer with bleeding and perforation without obstruction
        '5780', --hematemesis
        '5781', --blood in stool
        '5789', --bleeding of gastrointestinal tract unspecified
        --Lower GI
        '5693', --bleeding of rectum and anus
        --Other Bleeding
        '2878', --other unspecified hemorrhagic conditions
        '2879', --unspecified hemorrhagic conditions
        '5967', --bleeding into bladder wall
        '7848', --bleeding from throat 
        '5997', --hematuria, unspecified
        '6271', --postmenopausal bleeding
        '4590', --bleeding unspecified
        '7191', --hemarthrosis site unspecified 
        '7863', --hemoptysis, unspecified    
        '72992', --nontraumatic hematoma soft tissue
        '9582' --secondary or recurrent following trauma
    )
    OR icd9_code LIKE '900%' --vessel injuries
    OR icd9_code LIKE '901%' --vessel injuries
    OR icd9_code LIKE '902%' --vessel injuries
    OR icd9_code LIKE '903%' --vessel injuries
    OR icd9_code LIKE '904%' --vessel injuries
)
AND hgbmin IS NOT NULL
AND hgbmin >= 6

In [0]:
%%bigquery mimic_exclude_missing_sofa_scores

SELECT
COUNT(DISTINCT icustay_id)
FROM `amsterdam-translation.amsterdam_custom.mimic_transfusion`
WHERE age >= 18
AND first_icu_stay = True
AND icustay_id NOT IN (
    SELECT 
    i.icustay_id
    FROM `physionet-data.mimiciii_clinical.diagnoses_icd` d
    INNER JOIN `physionet-data.mimiciii_clinical.icustays` i
        ON d.hadm_id=i.hadm_id
    WHERE icd9_code IN (
        --ref: https://www.bmj.com/content/bmj/suppl/2015/02/03/bmj.h246.DC1/junm020747.ww1_default.pdf
        --intracranial bleeding
        '430', --Subarachnoid
        '431', --intracerebral
        '4320', --other and unspecified intracranial bleeding
        '4321', --subdural
        '4329', --unspecified intracranial bleeding
        --upper gastrointestinal
        '5310', --acute gastric ulcer with bleeding without obstruction
        '5312', --acute gastric ulcer with bleeding and perforation without obstruction
        '5314', --chronic or unspecified gastric ulcer with bleeding without obstruction
        '5316', --chronic or unspecified gastric ulcer with bleeding and perforation without obstruction
        '5320', --acute duodenal ulcer with bleeding without obstruction
        '5322', --acute duodenal ulcer with bleeding and perforation without obstruction
        '5324', --chronic or unspecified duodenal ulcer with bleeding without obstruction
        '5326', --chronic or unspecified duodenal ulcer with bleeding and perforation without obstruction
        '5330', --acute peptic ulcer of unspecified site with bleeding without obstruction
        '5332', --acute peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5334', --chronic or unspecified peptic ulcer of unspecified site with bleeding without obstruction
        '5336', --chronic or unspecified peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5340', --acute gastrojejunal ulcer with bleeding without obstruction
        '5342', --acute gastrojejunal ulcer with bleeding and perforation without obstruction
        '5344', --chronic or unspecified gastrojejunal ulcer with bleeding without obstruction
        '5346', --chronic or unspecified gastrojejunal ulcer with bleeding and perforation without obstruction
        '5780', --hematemesis
        '5781', --blood in stool
        '5789', --bleeding of gastrointestinal tract unspecified
        --Lower GI
        '5693', --bleeding of rectum and anus
        --Other Bleeding
        '2878', --other unspecified hemorrhagic conditions
        '2879', --unspecified hemorrhagic conditions
        '5967', --bleeding into bladder wall
        '7848', --bleeding from throat 
        '5997', --hematuria, unspecified
        '6271', --postmenopausal bleeding
        '4590', --bleeding unspecified
        '7191', --hemarthrosis site unspecified 
        '7863', --hemoptysis, unspecified    
        '72992', --nontraumatic hematoma soft tissue
        '9582' --secondary or recurrent following trauma
    )
    OR icd9_code LIKE '900%' --vessel injuries
    OR icd9_code LIKE '901%' --vessel injuries
    OR icd9_code LIKE '902%' --vessel injuries
    OR icd9_code LIKE '903%' --vessel injuries
    OR icd9_code LIKE '904%' --vessel injuries
)
AND hgbmin IS NOT NULL
AND hgbmin >= 6
AND sofatotal_dayLowestHmg IS NOT NULL
AND sofatotal_dayAfterLowestHmg IS NOT NULL

In [0]:
%%bigquery mimic_exclude_invalid_transfusion_offset

SELECT
COUNT(DISTINCT icustay_id)
FROM `amsterdam-translation.amsterdam_custom.mimic_transfusion`
WHERE age >= 18
AND first_icu_stay = True
AND icustay_id NOT IN (
    SELECT 
    i.icustay_id
    FROM `physionet-data.mimiciii_clinical.diagnoses_icd` d
    INNER JOIN `physionet-data.mimiciii_clinical.icustays` i
        ON d.hadm_id=i.hadm_id
    WHERE icd9_code IN (
        --ref: https://www.bmj.com/content/bmj/suppl/2015/02/03/bmj.h246.DC1/junm020747.ww1_default.pdf
        --intracranial bleeding
        '430', --Subarachnoid
        '431', --intracerebral
        '4320', --other and unspecified intracranial bleeding
        '4321', --subdural
        '4329', --unspecified intracranial bleeding
        --upper gastrointestinal
        '5310', --acute gastric ulcer with bleeding without obstruction
        '5312', --acute gastric ulcer with bleeding and perforation without obstruction
        '5314', --chronic or unspecified gastric ulcer with bleeding without obstruction
        '5316', --chronic or unspecified gastric ulcer with bleeding and perforation without obstruction
        '5320', --acute duodenal ulcer with bleeding without obstruction
        '5322', --acute duodenal ulcer with bleeding and perforation without obstruction
        '5324', --chronic or unspecified duodenal ulcer with bleeding without obstruction
        '5326', --chronic or unspecified duodenal ulcer with bleeding and perforation without obstruction
        '5330', --acute peptic ulcer of unspecified site with bleeding without obstruction
        '5332', --acute peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5334', --chronic or unspecified peptic ulcer of unspecified site with bleeding without obstruction
        '5336', --chronic or unspecified peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5340', --acute gastrojejunal ulcer with bleeding without obstruction
        '5342', --acute gastrojejunal ulcer with bleeding and perforation without obstruction
        '5344', --chronic or unspecified gastrojejunal ulcer with bleeding without obstruction
        '5346', --chronic or unspecified gastrojejunal ulcer with bleeding and perforation without obstruction
        '5780', --hematemesis
        '5781', --blood in stool
        '5789', --bleeding of gastrointestinal tract unspecified
        --Lower GI
        '5693', --bleeding of rectum and anus
        --Other Bleeding
        '2878', --other unspecified hemorrhagic conditions
        '2879', --unspecified hemorrhagic conditions
        '5967', --bleeding into bladder wall
        '7848', --bleeding from throat 
        '5997', --hematuria, unspecified
        '6271', --postmenopausal bleeding
        '4590', --bleeding unspecified
        '7191', --hemarthrosis site unspecified 
        '7863', --hemoptysis, unspecified    
        '72992', --nontraumatic hematoma soft tissue
        '9582' --secondary or recurrent following trauma
    )
    OR icd9_code LIKE '900%' --vessel injuries
    OR icd9_code LIKE '901%' --vessel injuries
    OR icd9_code LIKE '902%' --vessel injuries
    OR icd9_code LIKE '903%' --vessel injuries
    OR icd9_code LIKE '904%' --vessel injuries
)
AND hgbmin IS NOT NULL
AND hgbmin >= 6
--AND sofatotal_dayLowestHmg IS NOT NULL
--AND sofatotal_dayAfterLowestHmg IS NOT NULL
AND (transfusedmarker = 0 OR (transfusion_offset - hgbmin_offset) < 2) --we exclude cases where transfusion occured > 2 days after minimum hb value
AND subgroup_flag = 1 

In [0]:
%%bigquery mimic_final_transfused

SELECT
COUNT(DISTINCT icustay_id)
FROM `amsterdam-translation.amsterdam_custom.mimic_transfusion`
WHERE age >= 18
AND first_icu_stay = True
AND icustay_id NOT IN (
    SELECT 
    i.icustay_id
    FROM `physionet-data.mimiciii_clinical.diagnoses_icd` d
    INNER JOIN `physionet-data.mimiciii_clinical.icustays` i
        ON d.hadm_id=i.hadm_id
    WHERE icd9_code IN (
        --ref: https://www.bmj.com/content/bmj/suppl/2015/02/03/bmj.h246.DC1/junm020747.ww1_default.pdf
        --intracranial bleeding
        '430', --Subarachnoid
        '431', --intracerebral
        '4320', --other and unspecified intracranial bleeding
        '4321', --subdural
        '4329', --unspecified intracranial bleeding
        --upper gastrointestinal
        '5310', --acute gastric ulcer with bleeding without obstruction
        '5312', --acute gastric ulcer with bleeding and perforation without obstruction
        '5314', --chronic or unspecified gastric ulcer with bleeding without obstruction
        '5316', --chronic or unspecified gastric ulcer with bleeding and perforation without obstruction
        '5320', --acute duodenal ulcer with bleeding without obstruction
        '5322', --acute duodenal ulcer with bleeding and perforation without obstruction
        '5324', --chronic or unspecified duodenal ulcer with bleeding without obstruction
        '5326', --chronic or unspecified duodenal ulcer with bleeding and perforation without obstruction
        '5330', --acute peptic ulcer of unspecified site with bleeding without obstruction
        '5332', --acute peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5334', --chronic or unspecified peptic ulcer of unspecified site with bleeding without obstruction
        '5336', --chronic or unspecified peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5340', --acute gastrojejunal ulcer with bleeding without obstruction
        '5342', --acute gastrojejunal ulcer with bleeding and perforation without obstruction
        '5344', --chronic or unspecified gastrojejunal ulcer with bleeding without obstruction
        '5346', --chronic or unspecified gastrojejunal ulcer with bleeding and perforation without obstruction
        '5780', --hematemesis
        '5781', --blood in stool
        '5789', --bleeding of gastrointestinal tract unspecified
        --Lower GI
        '5693', --bleeding of rectum and anus
        --Other Bleeding
        '2878', --other unspecified hemorrhagic conditions
        '2879', --unspecified hemorrhagic conditions
        '5967', --bleeding into bladder wall
        '7848', --bleeding from throat 
        '5997', --hematuria, unspecified
        '6271', --postmenopausal bleeding
        '4590', --bleeding unspecified
        '7191', --hemarthrosis site unspecified 
        '7863', --hemoptysis, unspecified    
        '72992', --nontraumatic hematoma soft tissue
        '9582' --secondary or recurrent following trauma
    )
    OR icd9_code LIKE '900%' --vessel injuries
    OR icd9_code LIKE '901%' --vessel injuries
    OR icd9_code LIKE '902%' --vessel injuries
    OR icd9_code LIKE '903%' --vessel injuries
    OR icd9_code LIKE '904%' --vessel injuries
)
AND hgbmin IS NOT NULL
AND hgbmin >= 6
--AND sofatotal_dayLowestHmg IS NOT NULL
--AND sofatotal_dayAfterLowestHmg IS NOT NULL
AND (transfusion_offset - hgbmin_offset) < 2 --we exclude cases where transfusion occured > 2 days after minimum hb value
AND subgroup_flag = 1 
AND transfusedmarker = 1

In [0]:
%%bigquery mimic_final_nontransfused

SELECT
COUNT(DISTINCT icustay_id)
FROM `amsterdam-translation.amsterdam_custom.mimic_transfusion`
WHERE age >= 18
AND first_icu_stay = True
AND icustay_id NOT IN (
    SELECT 
    i.icustay_id
    FROM `physionet-data.mimiciii_clinical.diagnoses_icd` d
    INNER JOIN `physionet-data.mimiciii_clinical.icustays` i
        ON d.hadm_id=i.hadm_id
    WHERE icd9_code IN (
        --ref: https://www.bmj.com/content/bmj/suppl/2015/02/03/bmj.h246.DC1/junm020747.ww1_default.pdf
        --intracranial bleeding
        '430', --Subarachnoid
        '431', --intracerebral
        '4320', --other and unspecified intracranial bleeding
        '4321', --subdural
        '4329', --unspecified intracranial bleeding
        --upper gastrointestinal
        '5310', --acute gastric ulcer with bleeding without obstruction
        '5312', --acute gastric ulcer with bleeding and perforation without obstruction
        '5314', --chronic or unspecified gastric ulcer with bleeding without obstruction
        '5316', --chronic or unspecified gastric ulcer with bleeding and perforation without obstruction
        '5320', --acute duodenal ulcer with bleeding without obstruction
        '5322', --acute duodenal ulcer with bleeding and perforation without obstruction
        '5324', --chronic or unspecified duodenal ulcer with bleeding without obstruction
        '5326', --chronic or unspecified duodenal ulcer with bleeding and perforation without obstruction
        '5330', --acute peptic ulcer of unspecified site with bleeding without obstruction
        '5332', --acute peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5334', --chronic or unspecified peptic ulcer of unspecified site with bleeding without obstruction
        '5336', --chronic or unspecified peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5340', --acute gastrojejunal ulcer with bleeding without obstruction
        '5342', --acute gastrojejunal ulcer with bleeding and perforation without obstruction
        '5344', --chronic or unspecified gastrojejunal ulcer with bleeding without obstruction
        '5346', --chronic or unspecified gastrojejunal ulcer with bleeding and perforation without obstruction
        '5780', --hematemesis
        '5781', --blood in stool
        '5789', --bleeding of gastrointestinal tract unspecified
        --Lower GI
        '5693', --bleeding of rectum and anus
        --Other Bleeding
        '2878', --other unspecified hemorrhagic conditions
        '2879', --unspecified hemorrhagic conditions
        '5967', --bleeding into bladder wall
        '7848', --bleeding from throat 
        '5997', --hematuria, unspecified
        '6271', --postmenopausal bleeding
        '4590', --bleeding unspecified
        '7191', --hemarthrosis site unspecified 
        '7863', --hemoptysis, unspecified    
        '72992', --nontraumatic hematoma soft tissue
        '9582' --secondary or recurrent following trauma
    )
    OR icd9_code LIKE '900%' --vessel injuries
    OR icd9_code LIKE '901%' --vessel injuries
    OR icd9_code LIKE '902%' --vessel injuries
    OR icd9_code LIKE '903%' --vessel injuries
    OR icd9_code LIKE '904%' --vessel injuries
)
AND hgbmin IS NOT NULL
AND hgbmin >= 6
--AND sofatotal_dayLowestHmg IS NOT NULL
--AND sofatotal_dayAfterLowestHmg IS NOT NULL
AND subgroup_flag = 1 
AND transfusedmarker = 0

## eICU SQL code

In [0]:
%%bigquery

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.eicu_transfusion_count` AS(
    SELECT
    t.patientunitstayid,
    COUNT(treatmentstring) as transfusioncount,
    MIN(treatmentoffset) as treatmentoffset,
    FROM `physionet-data.eicu_crd.treatment` t
    LEFT JOIN (
        SELECT 
        patientunitstayid,
        unitdischargeoffset
        FROM 
        `physionet-data.eicu_crd.patient`
        WHERE unitvisitnumber = 1
    ) p
        ON t.patientunitstayid = p.patientunitstayid
    WHERE LOWER(treatmentstring) LIKE '%transfusion%' or lower(treatmentstring) like '%packed red blood cell%'
    AND treatmentoffset <= unitdischargeoffset
    GROUP BY t.patientunitstayid
);

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.eicu_transfusion` AS(
WITH patient AS ( --shortening the table names for use in bigquery--
    SELECT * FROM `physionet-data.eicu_crd.patient`
), treatment AS (
    SELECT * FROM `physionet-data.eicu_crd.treatment`
), diagnosis AS (
    SELECT * FROM `physionet-data.eicu_crd.diagnosis`
), lab AS (
    SELECT * FROM `physionet-data.eicu_crd.lab`
), apachepatientresult AS (
    SELECT * FROM `physionet-data.eicu_crd.apachepatientresult`
), physicalexam AS (
    SELECT * FROM `physionet-data.eicu_crd.physicalexam`
), vitalperiodic AS (
    SELECT * FROM `physionet-data.eicu_crd.vitalperiodic`
), vitalaperiodic AS (
    SELECT * FROM `physionet-data.eicu_crd.vitalaperiodic`
), infusiondrug AS (
    SELECT * FROM `physionet-data.eicu_crd.infusiondrug`
), intakeoutput AS (
    SELECT * FROM `physionet-data.eicu_crd.intakeoutput`
), respiratorycharting AS (
    SELECT * FROM `physionet-data.eicu_crd.respiratorycharting`
), nursecharting AS (
    SELECT * FROM `physionet-data.eicu_crd.nursecharting`
), respiratorycare AS (
    SELECT * FROM `physionet-data.eicu_crd.respiratorycare`
), hospital AS (
    SELECT * FROM `physionet-data.eicu_crd.hospital`
), first_transfusion AS(
    SELECT
    patientunitstayid,
    treatmentoffset,
    ROW_NUMBER() OVER (PARTITION BY patientunitstayid ORDER BY treatmentoffset ASC) AS rn
    FROM treatment
    WHERE LOWER(treatmentstring) LIKE '%transfusion%'
    OR LOWER(treatmentstring) LIKE '%packed red blood cell%'
), Reliable_ICUs as( --pickup reliable ICUs
    SELECT *
    FROM patient
    WHERE
    (wardID IN(259,261,267,273,285,286,307,317,324,337,338,345,347,362,369,376,377,384,391,394,
    408,413,417,425,428,429,430,431,434,445,464,451,487,489,491,495,498,504,506,512,513,594,601,
    602,607,608,609,611,613,619,622,628,829,831,809,814,840,841,991,876,962,953,966,984,1017,1021,
    1020,1030,1035,1026,1027,1029,1037,1032,1039,1041,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2016)
    OR
    (wardID IN(261,286,307,273,290,285,259,267,384,347,394,317,362,369,337,402,345,413,408,335,377,
    417,391,376,427,428,425,431,430,429,445,464,434,451,491,498,489,506,601,607,609,608,602,619,628,
    622,611,613,809,829,772,831,822,814,841,840,876,983,991,962,966,953,968,1020,1017,1021,1030,1032,
    1027,1035,1037,1026,1025,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2015)
    OR
    (wardID IN(261,286,307,290,256,285,273,259,267,347,384,317,394,362,369,402,337,413,408,345,335,377,
    386,364,417,376,391,425,428,431,430,445,451,434,464,489,609,607,601,608,602,619,622,822,829,809,804,
    766,814,888,841,876,953,996,1020,1017,1021,1026,1039,1027,1029,1037,1032,1035,1025,1053,1043,1087,1074)
    AND hospitaldischargeyear =2014)
    OR
    (wardID IN(384,347,317,394,362,369,402,345,337,413,408,386,391,376,698,809,814,888,841,876,1087,1074)
    AND hospitaldischargeyear =2013)
    OR
    (wardID IN(809,831,888,841,876)
    AND hospitaldischargeyear =2012)
    ORDER BY hospitalID
), trsfsn as (
    SELECT * FROM `amsterdam-translation.amsterdam_custom.eicu_transfusion_count`
), lowest_hgb_offset as (
    SELECT
    a.patientunitstayid,
    MIN(a.admission_hgbmin) as admission_hgbmin,
    MIN(a.hgbmin) as hgbmin,
    MAX(CASE WHEN a.treatmentoffset IS NOT NULL
        AND l.labresultoffset < a.treatmentoffset 
        AND l.labresultoffset BETWEEN -1440 AND unitDischargeOffset 
        THEN labresultoffset --link the offset to the actual hgbmin
        WHEN  a.treatmentoffset IS NULL 
        AND l.labresultoffset BETWEEN -1440 AND unitdischargeoffset 
        THEN l.labresultoffset --link the offset to the actual hgbmin
        ELSE NULL END) as labresultoffset
    FROM (
        SELECT
        p.patientunitstayid,
        MIN(p.unitDischargeOffset) as unitdischargeoffset,
        MIN(t.treatmentoffset) as treatmentoffset, --first transfusion
        MIN(CASE WHEN l.labresult IS NOT NULL 
            AND l.labresultoffset BETWEEN -1440 AND p.unitDischargeOffset
            THEN l.labresult ELSE NULL END) as admission_hgbmin, --hgbmin throughout entire admission
        MIN(CASE WHEN t.treatmentoffset IS NOT NULL 
            AND l.labresultoffset BETWEEN treatmentoffset - 2880 AND treatmentoffset  --ensure hgbmin recorded within 48h before transfusion
            AND l.labresultoffset BETWEEN -1440 AND p.unitDischargeOffset 
            AND l.labresult IS NOT NULL THEN labresult 
            WHEN treatmentoffset IS NULL
            AND l.labresultoffset BETWEEN -1440 AND p.unitDischargeOffset --ensure that hgbmin is taken within 1 day of admission
            AND l.labresult IS NOT NULL THEN l.labresult END) AS hgbmin, --minimum hgbmin throughout admisison
        FROM patient p
        LEFT JOIN lab l USING(patientunitstayid)
        LEFT JOIN trsfsn t USING(patientunitstayid)
        WHERE LOWER(l.labname) LIKE '%hgb%'
        GROUP BY p.patientunitstayid
    ) a
    LEFT JOIN (
        SELECT * FROM lab
        WHERE LOWER(labname) LIKE '%hgb%'
    ) l 
        ON a.patientunitstayid=l.patientunitstayid
        AND a.hgbmin = l.labresult
    GROUP BY patientunitstayid
), conditions_list AS (
   SELECT 
   patientunitstayid,
   MAX(CASE WHEN LOWER(diagnosisstring) LIKE '%sepsis%' OR LOWER(diagnosisstring) LIKE '%septic%' THEN 1 ELSE 0 END) AS septicflag,
   MAX(CASE WHEN LOWER(diagnosisstring) LIKE '%peripheral vascular ischemia%' THEN 1 ELSE 0 END) AS pvdflag,
   MAX(CASE WHEN (LOWER(diagnosisstring) LIKE '%acute coronary syndrome%' OR LOWER(diagnosisstring) LIKE '%coronary artery disease%')
       OR (LOWER(labname) IN('troponin - I','troponin - T') AND labresult > 0.1) THEN 1 ELSE 0 END) AS ihdflag,
   MAX(CASE WHEN LOWER(diagnosisstring) LIKE '%heart failure%' THEN 1 ELSE 0 END) AS chfflag,
   MAX(CASE WHEN LOWER(diagnosisstring) LIKE '%chronic kidney disease|stage 5%' THEN 1 ELSE 0 END) AS esrfflag,
   MAX(CASE WHEN LOWER(diagnosisstring) LIKE '%surgery%' THEN 1 ELSE 0 END) AS surgeryflag,
   FROM diagnosis
   LEFT JOIN lab USING(patientunitstayid)
   GROUP BY patientunitstayid
), tr as (
    SELECT
    patientunitstayid,
    treatmentoffset as chartoffset,
    MAX(CASE WHEN treatmentstring in (
     'toxicology|drug overdose|vasopressors|vasopressin' --                                                                   |    23
   , 'toxicology|drug overdose|vasopressors|phenylephrine (Neosynephrine)' --                                                 |    21
   , 'toxicology|drug overdose|vasopressors|norepinephrine > 0.1 micrograms/kg/min' --                                        |    62
   , 'toxicology|drug overdose|vasopressors|norepinephrine <= 0.1 micrograms/kg/min' --                                       |    29
   , 'toxicology|drug overdose|vasopressors|epinephrine > 0.1 micrograms/kg/min' --                                           |     6
   , 'toxicology|drug overdose|vasopressors|epinephrine <= 0.1 micrograms/kg/min' --                                          |     2
   , 'toxicology|drug overdose|vasopressors|dopamine 5-15 micrograms/kg/min' --                                               |     7
   , 'toxicology|drug overdose|vasopressors|dopamine >15 micrograms/kg/min' --                                                |     3
   , 'toxicology|drug overdose|vasopressors' --                                                                               |    30
   , 'surgery|cardiac therapies|vasopressors|vasopressin' --                                                                  |   356
   , 'surgery|cardiac therapies|vasopressors|phenylephrine (Neosynephrine)' --                                                |  1000
   , 'surgery|cardiac therapies|vasopressors|norepinephrine > 0.1 micrograms/kg/min' --                                       |   390
   , 'surgery|cardiac therapies|vasopressors|norepinephrine <= 0.1 micrograms/kg/min' --                                      |   347
   , 'surgery|cardiac therapies|vasopressors|epinephrine > 0.1 micrograms/kg/min' --                                          |   117
   , 'surgery|cardiac therapies|vasopressors|epinephrine <= 0.1 micrograms/kg/min' --                                         |   178
   , 'surgery|cardiac therapies|vasopressors|dopamine  5-15 micrograms/kg/min' --                                             |   274
   , 'surgery|cardiac therapies|vasopressors|dopamine >15 micrograms/kg/min' --                                               |    23
   , 'surgery|cardiac therapies|vasopressors' --                                                                              |   596
   , 'renal|electrolyte correction|treatment of hypernatremia|vasopressin' --                                                 |     7
   , 'neurologic|therapy for controlling cerebral perfusion pressure|vasopressors|phenylephrine (Neosynephrine)' --           |   321
   , 'neurologic|therapy for controlling cerebral perfusion pressure|vasopressors|norepinephrine > 0.1 micrograms/kg/min' --  |   348
   , 'neurologic|therapy for controlling cerebral perfusion pressure|vasopressors|norepinephrine <= 0.1 micrograms/kg/min' -- |   374
   , 'neurologic|therapy for controlling cerebral perfusion pressure|vasopressors|epinephrine > 0.1 micrograms/kg/min' --     |    21
   , 'neurologic|therapy for controlling cerebral perfusion pressure|vasopressors|epinephrine <= 0.1 micrograms/kg/min' --    |   199
   , 'neurologic|therapy for controlling cerebral perfusion pressure|vasopressors|dopamine 5-15 micrograms/kg/min' --         |   277
   , 'neurologic|therapy for controlling cerebral perfusion pressure|vasopressors|dopamine > 15 micrograms/kg/min' --         |    20
   , 'neurologic|therapy for controlling cerebral perfusion pressure|vasopressors' --                                         |   172
   , 'gastrointestinal|medications|hormonal therapy (for varices)|vasopressin' --                                             |   964
   , 'cardiovascular|shock|vasopressors|vasopressin' --                                                                       | 11082
   , 'cardiovascular|shock|vasopressors|phenylephrine (Neosynephrine)' --                                                     | 13189
   , 'cardiovascular|shock|vasopressors|norepinephrine > 0.1 micrograms/kg/min' --                                            | 24174
   , 'cardiovascular|shock|vasopressors|norepinephrine <= 0.1 micrograms/kg/min' --                                           | 17467
   , 'cardiovascular|shock|vasopressors|epinephrine > 0.1 micrograms/kg/min' --                                               |  2410
   , 'cardiovascular|shock|vasopressors|epinephrine <= 0.1 micrograms/kg/min' --                                              |  2384
   , 'cardiovascular|shock|vasopressors|dopamine  5-15 micrograms/kg/min' --                                                  |  4822
   , 'cardiovascular|shock|vasopressors|dopamine >15 micrograms/kg/min' --                                                    |  1102
   , 'cardiovascular|shock|vasopressors' --                                                                                   |  9335
   , 'toxicology|drug overdose|agent specific therapy|beta blockers overdose|dopamine' --                             |    66
   , 'cardiovascular|ventricular dysfunction|inotropic agent|norepinephrine > 0.1 micrograms/kg/min' --                       |   537
   , 'cardiovascular|ventricular dysfunction|inotropic agent|norepinephrine <= 0.1 micrograms/kg/min' --                      |   411
   , 'cardiovascular|ventricular dysfunction|inotropic agent|epinephrine > 0.1 micrograms/kg/min' --                          |   274
   , 'cardiovascular|ventricular dysfunction|inotropic agent|epinephrine <= 0.1 micrograms/kg/min' --                         |   456
   , 'cardiovascular|shock|inotropic agent|norepinephrine > 0.1 micrograms/kg/min' --                                         |  1940
   , 'cardiovascular|shock|inotropic agent|norepinephrine <= 0.1 micrograms/kg/min' --                                        |  1262
   , 'cardiovascular|shock|inotropic agent|epinephrine > 0.1 micrograms/kg/min' --                                            |   477
   , 'cardiovascular|shock|inotropic agent|epinephrine <= 0.1 micrograms/kg/min' --                                           |   505
   , 'cardiovascular|shock|inotropic agent|dopamine <= 5 micrograms/kg/min' --                                        |  1103
   , 'cardiovascular|shock|inotropic agent|dopamine  5-15 micrograms/kg/min' --                                       |  1156
   , 'cardiovascular|shock|inotropic agent|dopamine >15 micrograms/kg/min' --                                         |   144
   , 'surgery|cardiac therapies|inotropic agent|dopamine <= 5 micrograms/kg/min' --                                   |   171
   , 'surgery|cardiac therapies|inotropic agent|dopamine  5-15 micrograms/kg/min' --                                  |    93
   , 'surgery|cardiac therapies|inotropic agent|dopamine >15 micrograms/kg/min' --                                    |     3
   , 'cardiovascular|myocardial ischemia / infarction|inotropic agent|norepinephrine > 0.1 micrograms/kg/min' --              |   688
   , 'cardiovascular|myocardial ischemia / infarction|inotropic agent|norepinephrine <= 0.1 micrograms/kg/min' --             |   670
   , 'cardiovascular|myocardial ischemia / infarction|inotropic agent|epinephrine > 0.1 micrograms/kg/min' --                 |   381
   , 'cardiovascular|myocardial ischemia / infarction|inotropic agent|epinephrine <= 0.1 micrograms/kg/min' --                |   357
   , 'cardiovascular|ventricular dysfunction|inotropic agent|dopamine <= 5 micrograms/kg/min' --                      |   886
   , 'cardiovascular|ventricular dysfunction|inotropic agent|dopamine  5-15 micrograms/kg/min' --                     |   649
   , 'cardiovascular|ventricular dysfunction|inotropic agent|dopamine >15 micrograms/kg/min' --                       |    86
   , 'cardiovascular|myocardial ischemia / infarction|inotropic agent|dopamine <= 5 micrograms/kg/min' --             |   346
   , 'cardiovascular|myocardial ischemia / infarction|inotropic agent|dopamine  5-15 micrograms/kg/min' --            |   520
   , 'cardiovascular|myocardial ischemia / infarction|inotropic agent|dopamine >15 micrograms/kg/min' --              |    54
    ) THEN 1 ELSE 0 END) AS vasopressor
    FROM treatment
    GROUP BY patientunitstayid, treatmentoffset
), sofalist as (
    WITH sofa_3others_dayHmg_dayAHmg AS (
        WITH t1f_day1 AS (
            SELECT
            patientunitstayid,
            physicalexamoffset,
            MIN(CASE WHEN LOWER(physicalexampath) LIKE '%gcs/eyes%' THEN CAST(physicalexamvalue AS INT64)
                ELSE NULL END) AS gcs_eyes,
            MIN(CASE WHEN LOWER(physicalexampath) LIKE '%gcs/verbal%' THEN CAST(physicalexamvalue AS INT64)
                ELSE NULL END) AS gcs_verbal,
            MIN(CASE WHEN LOWER(physicalexampath) LIKE '%gcs/motor%' THEN CAST(physicalexamvalue AS INT64)
                ELSE NULL END) AS gcs_motor
            FROM physicalexam pe
            LEFT JOIN lowest_hgb_offset using(patientunitstayid)    
            WHERE (LOWER(physicalexampath) LIKE '%gcs/eyes%'
            OR LOWER(physicalexampath) LIKE '%gcs/verbal%'
            OR LOWER(physicalexampath) LIKE '%gcs/motor%')
            AND physicalexamoffset = labresultoffset
            GROUP BY patientunitstayid, physicalexamoffset
        ), t1_day1 AS (
            SELECT
            patientunitstayid,
            MIN(COALESCE(gcs_eyes,4) + COALESCE(gcs_verbal,5) + COALESCE(gcs_motor,6)) AS gcs
            FROM t1f_day1
            GROUP BY patientunitstayid
        ), t2_day1 AS (
            SELECT
            pt.patientunitstayid,
            MAX(CASE WHEN LOWER(labname) LIKE 'total bili%' THEN labresult
                ELSE NULL END) AS bili,
            MIN(CASE WHEN LOWER(labname) LIKE 'platelet%' THEN labresult
                ELSE NULL END) AS plt
            FROM patient pt
            LEFT OUTER JOIN lab lb USING(patientunitstayid)
            LEFT JOIN lowest_hgb_offset USING(patientunitstayid)
            WHERE lb.labresultoffset = lowest_hgb_offset.labresultoffset 
            GROUP BY pt.patientunitstayid
        ), t1f_day4 AS (
            SELECT
            patientunitstayid,
            physicalexamoffset,
            MIN(CASE WHEN LOWER(physicalexampath) LIKE '%gcs/eyes%' THEN CAST(physicalexamvalue AS INT64)
                ELSE NULL END) AS gcs_eyes,
            MIN(CASE WHEN LOWER(physicalexampath) LIKE '%gcs/verbal%' THEN CAST(physicalexamvalue AS INT64)
                ELSE NULL END) AS gcs_verbal,
            MIN(CASE WHEN LOWER(physicalexampath) LIKE '%gcs/motor%' THEN CAST(physicalexamvalue AS INT64)
                ELSE NULL END) AS gcs_motor
            FROM physicalexam pe
            LEFT JOIN lowest_hgb_offset USING(patientunitstayid) 
            WHERE (LOWER(physicalexampath) LIKE '%gcs/eyes%'
                   OR LOWER(physicalexampath) LIKE '%gcs/verbal%'
                   OR LOWER(physicalexampath) LIKE '%gcs/motor%')
            AND physicalexamoffset BETWEEN labresultoffset AND labresultoffset + 1440
            GROUP BY patientunitstayid, physicalexamoffset
        ), t1_day4 AS (
            SELECT
            patientunitstayid,
            MIN(COALESCE(gcs_eyes,4) + COALESCE(gcs_verbal,5) + COALESCE(gcs_motor,6)) AS gcs
            FROM t1f_day4
            GROUP BY patientunitstayid
        ), t2_day4 AS (
            SELECT
            pt.patientunitstayid,
            MAX(CASE WHEN LOWER(labname) LIKE 'total bili%' THEN labresult
                ELSE NULL END) AS bili,
            MIN(CASE WHEN LOWER(labname) LIKE 'platelet%' THEN labresult
                ELSE NULL END) AS plt
            FROM patient pt
            LEFT OUTER JOIN lab lb USING(patientunitstayid)
            LEFT JOIN lowest_hgb_offset USING(patientunitstayid)
            WHERE lb.labresultoffset BETWEEN lowest_hgb_offset.labresultoffset 
            AND lowest_hgb_offset.labresultoffset + 1440
            GROUP BY patientunitstayid
        )
        SELECT
        DISTINCT pt.patientunitstayid,
        MIN(t1_day1.gcs) AS gcs_day1,
        MAX(t2_day1.bili) AS bili_day1,
        MIN(t2_day1.plt) AS plt_day1,
        MAX(CASE WHEN t2_day1.plt<20 THEN 4
                 WHEN t2_day1.plt<50 THEN 3
                 WHEN t2_day1.plt<100 THEN 2
                 WHEN t2_day1.plt<150 THEN 1
                 ELSE 0 END) AS sofacoag_day1,
        MAX(CASE WHEN t2_day1.bili>12 THEN 4
                 WHEN t2_day1.bili>6 THEN 3
                 WHEN t2_day1.bili>2 THEN 2
                 WHEN t2_day1.bili>1.2 THEN 1
                 ELSE 0 END) AS sofaliver_day1,
        MAX(CASE WHEN t1_day1.gcs=15 THEN 0
                 WHEN t1_day1.gcs>=13 THEN 1
                 WHEN t1_day1.gcs>=10 THEN 2
                 WHEN t1_day1.gcs>=6 THEN 3
                 WHEN t1_day1.gcs>=3 THEN 4
                 ELSE 0 END) AS sofacns_day1,
        MIN(t1_day4.gcs) AS gcs_day4,
        MAX(t2_day4.bili) AS bili_day4,
        MIN(t2_day4.plt) AS plt_day4,
        MAX(CASE WHEN t2_day4.plt<20 THEN 4
                 WHEN t2_day4.plt<50 THEN 3
                 WHEN t2_day4.plt<100 THEN 2
                 WHEN t2_day4.plt<150 THEN 1
                 ELSE 0 END) AS sofacoag_day4,
        MAX(CASE WHEN t2_day4.bili>12 THEN 4
                 WHEN t2_day4.bili>6 THEN 3
                 WHEN t2_day4.bili>2 THEN 2
                 WHEN t2_day4.bili>1.2 THEN 1
                 ELSE 0 END) AS sofaliver_day4,
        MAX(CASE WHEN t1_day4.gcs=15 THEN 0
                 WHEN t1_day4.gcs>=13 THEN 1
                 WHEN t1_day4.gcs>=10 THEN 2
                 WHEN t1_day4.gcs>=6 THEN 3
                 WHEN t1_day4.gcs>=3 THEN 4
                 ELSE 0 END) AS sofacns_day4
        FROM patient pt
        LEFT OUTER JOIN t1_day1
            ON t1_day1.patientunitstayid=pt.patientunitstayid
        LEFT OUTER JOIN t2_day1
            ON t2_day1.patientunitstayid=pt.patientunitstayid
        LEFT OUTER JOIN t1_day4
            ON t1_day4.patientunitstayid=pt.patientunitstayid
        LEFT OUTER JOIN t2_day4
            ON t2_day4.patientunitstayid=pt.patientunitstayid
        GROUP BY 
            pt.patientunitstayid, 
            t1_day1.gcs, 
            t2_day1.bili, 
            t2_day1.plt,
            t1_day4.gcs,
            t2_day4.bili,
            t2_day4.plt
        ORDER BY pt.patientunitstayid
    ), sofa_cv_day1_to_day4 AS (
        WITH t1_day1 AS (
            WITH tt1 AS (
                SELECT
                patientunitstayid,
                MIN(CASE WHEN noninvasivemean IS NOT NULL THEN noninvasivemean
                    ELSE NULL END) AS map
                FROM vitalaperiodic
                LEFT JOIN lowest_hgb_offset USING(patientunitstayid)
                WHERE observationoffset = labresultoffset
                GROUP BY patientunitstayid
            ), tt2 AS (
                SELECT
                patientunitstayid,
                MIN(CASE WHEN systemicmean IS NOT NULL THEN systemicmean
                ELSE NULL END) AS map
                FROM vitalperiodic
                LEFT JOIN lowest_hgb_offset USING(patientunitstayid)  
                WHERE observationoffset  = labresultoffset 
                GROUP BY patientunitstayid
            )
        SELECT
        pt.patientunitstayid,
        CASE WHEN tt1.map IS NOT NULL THEN tt1.map
             WHEN tt2.map IS NOT NULL THEN tt2.map
             ELSE NULL END AS map
        FROM patient pt
        LEFT OUTER JOIN tt1
            ON tt1.patientunitstayid=pt.patientunitstayid
        LEFT OUTER JOIN tt2
            ON tt2.patientunitstayid=pt.patientunitstayid
        ), t2_day1 AS (
            SELECT
            DISTINCT patientunitstayid,
            MAX(CASE WHEN LOWER(drugname) LIKE '%(ml/hr)%' THEN ROUND(CAST(drugrate AS INT64)/3,3) -- rate in ml/h * 1600 mcg/ml / 80 kg / 60 min, to convert in mcg/kg/min
                     WHEN LOWER(drugname) LIKE '%(mcg/kg/min)%' THEN CAST(drugrate AS INT64)
                     ELSE NULL END) AS dopa
            FROM infusiondrug id
            LEFT JOIN lowest_hgb_offset USING(patientunitstayid)  
            WHERE LOWER(drugname) LIKE '%dopamine%'
            AND infusionoffset = labresultoffset 
            AND REGEXP_CONTAINS(drugrate, '^[0-9]{0,5}$')
            AND drugrate<>''
            AND drugrate<>'.'
            GROUP BY patientunitstayid
            ORDER BY patientunitstayid
        ), t3_day1 AS (
            SELECT
            DISTINCT patientunitstayid,
            MAX(CASE WHEN LOWER(drugname) LIKE '%(ml/hr)%' AND drugrate<>'' AND drugrate<>'.' THEN ROUND(CAST(drugrate AS INT64)/300,3) -- rate in ml/h * 16 mcg/ml / 80 kg / 60 min, to convert in mcg/kg/min
                     WHEN LOWER(drugname) LIKE '%(mcg/min)%' AND drugrate<>'' AND drugrate<>'.' THEN ROUND(CAST(drugrate AS INT64)/80,3)-- divide by 80 kg
                     WHEN LOWER(drugname) LIKE '%(mcg/kg/min)%' AND drugrate<>'' AND drugrate<>'.' THEN CAST(drugrate AS INT64)
                     ELSE NULL END) AS norepi
            FROM infusiondrug id
            LEFT JOIN lowest_hgb_offset
            USING(patientunitstayid)  
            WHERE LOWER(drugname) LIKE '%epinephrine%'
            AND infusionoffset = labresultoffset 
            AND REGEXP_CONTAINS(drugrate, '^[0-9]{0,5}$')
            AND drugrate<>''
            AND drugrate<>'.'-- this regex will capture norepi as well
            GROUP BY patientunitstayid
        ), t4_day1 AS (
            SELECT
            DISTINCT patientunitstayid,
            1 AS dobu
            FROM infusiondrug id
            LEFT JOIN lowest_hgb_offset USING(patientunitstayid)  
            WHERE LOWER(drugname) LIKE '%dobutamin%'
            AND drugrate <>''
            AND drugrate<>'.'
            AND drugrate <>'0'
            AND REGEXP_CONTAINS(drugrate, '^[0-9]{0,5}$')
            AND infusionoffset = labresultoffset 
        ), t1_day4 AS (
            WITH tt1 AS (
                SELECT
                patientunitstayid,
                MIN(CASE WHEN noninvasivemean IS NOT NULL THEN noninvasivemean
                    ELSE NULL END) AS map
                FROM vitalaperiodic
                LEFT JOIN lowest_hgb_offset USING(patientunitstayid)  
                WHERE observationoffset BETWEEN labresultoffset AND labresultoffset + 1440 
            GROUP BY patientunitstayid 
            ), tt2 AS (
                SELECT
                patientunitstayid,
                MIN(CASE WHEN systemicmean IS NOT NULL THEN systemicmean
                    ELSE NULL END) AS map
                FROM vitalperiodic
                LEFT JOIN lowest_hgb_offset USING(patientunitstayid)  
                WHERE observationoffset BETWEEN labresultoffset AND labresultoffset + 1440
                GROUP BY patientunitstayid
            )
            SELECT
            pt.patientunitstayid,
            CASE WHEN tt1.map IS NOT NULL THEN tt1.map
                 WHEN tt2.map IS NOT NULL THEN tt2.map
                 ELSE NULL
            END AS map
            FROM patient pt
            LEFT OUTER JOIN tt1
                ON tt1.patientunitstayid=pt.patientunitstayid
            LEFT OUTER JOIN tt2
                ON tt2.patientunitstayid=pt.patientunitstayid
            ORDER BY pt.patientunitstayid
        ), t2_day4 AS (
            SELECT
            DISTINCT patientunitstayid,
            MAX(CASE WHEN LOWER(drugname) LIKE '%(ml/hr)%' THEN ROUND(CAST(drugrate AS INT64)/3,3) -- rate in ml/h * 1600 mcg/ml / 80 kg / 60 min, to convert in mcg/kg/min
                     WHEN LOWER(drugname) LIKE '%(mcg/kg/min)%' THEN CAST(drugrate AS INT64)
                     ELSE NULL END) AS dopa
            FROM infusiondrug id
            LEFT JOIN lowest_hgb_offset USING(patientunitstayid)  
            WHERE LOWER(drugname) LIKE '%dopamine%'
            AND infusionoffset BETWEEN labresultoffset AND labresultoffset + 1440
            AND REGEXP_CONTAINS(drugrate, '^[0-9]{0,5}$')
            AND drugrate<>''
            AND drugrate<>'.'
            GROUP BY patientunitstayid
            ORDER BY patientunitstayid
        ), t3_day4 AS (
            SELECT
            DISTINCT patientunitstayid,
            MAX(CASE WHEN LOWER(drugname) LIKE '%(ml/hr)%' AND drugrate<>'' AND drugrate<>'.' THEN ROUND(CAST(drugrate AS INT64)/300,3) -- rate in ml/h * 16 mcg/ml / 80 kg / 60 min, to convert in mcg/kg/min
                     WHEN LOWER(drugname) LIKE '%(mcg/min)%' AND drugrate<>'' AND drugrate<>'.' THEN ROUND(CAST(drugrate AS INT64)/80,3)-- divide by 80 kg
                     WHEN LOWER(drugname) LIKE '%(mcg/kg/min)%' AND drugrate<>'' AND drugrate<>'.' THEN CAST(drugrate AS INT64)
                     ELSE NULL END ) AS norepi
            FROM infusiondrug id
            LEFT JOIN lowest_hgb_offset USING(patientunitstayid)  
            WHERE LOWER(drugname) LIKE '%epinephrine%'
            AND infusionoffset BETWEEN labresultoffset AND labresultoffset + 1440
            AND REGEXP_CONTAINS(drugrate, '^[0-9]{0,5}$')
            AND drugrate<>''
            AND drugrate<>'.'-- this regex will capture norepi as well
            GROUP BY patientunitstayid
            ORDER BY patientunitstayid
        ), t4_day4 AS (
            SELECT DISTINCT patientunitstayid,
            1 AS dobu
            FROM infusiondrug id
            LEFT JOIN lowest_hgb_offset USING(patientunitstayid)
            WHERE LOWER(drugname) LIKE '%dobutamin%'
            AND drugrate <>''
            AND drugrate<>'.'
            AND drugrate <>'0'
            AND REGEXP_CONTAINS(drugrate, '^[0-9]{0,5}$')
            AND infusionoffset BETWEEN labresultoffset AND labresultoffset + 1440
            ORDER BY patientunitstayid
        )
        SELECT
        pt.patientunitstayid,
        ------------------VARS day1------------------------
        t1_day1.map AS map_day1,
        t2_day1.dopa AS dopa_day1,
        t3_day1.norepi AS norepi_day1,
        t4_day1.dobu AS dobu_day1,
        CASE WHEN t2_day1.dopa >= 15 OR t3_day1.norepi >0.1 THEN 4
             WHEN t2_day1.dopa > 5 OR (t3_day1.norepi > 0 AND t3_day1.norepi <= 0.1) THEN 3
             WHEN t2_day1.dopa <= 5 OR t4_day1.dobu > 0 THEN 2 WHEN t1_day1.map < 70 THEN 1 
             ELSE 0 END AS SOFA_cv_day1, 
        ------------------VARS day4------------------------
        t1_day4.map AS map_day4,
        t2_day4.dopa AS dopa_day4,
        t3_day4.norepi AS norepi_day4,
        t4_day4.dobu AS dobu_day4,
        CASE WHEN t2_day4.dopa >= 15 OR t3_day4.norepi >0.1 THEN 4
             WHEN t2_day4.dopa > 5 OR (t3_day4.norepi > 0 AND t3_day4.norepi <= 0.1) THEN 3
             WHEN t2_day4.dopa <= 5 OR t4_day4.dobu > 0 THEN 2 WHEN t1_day4.map < 70 THEN 1 
             ELSE 0 END AS SOFA_cv_day4       
        FROM patient pt
        ------------------VARS day1------------------------  
        LEFT OUTER JOIN t1_day1
            ON t1_day1.patientunitstayid=pt.patientunitstayid
        LEFT OUTER JOIN t2_day1
            ON t2_day1.patientunitstayid=pt.patientunitstayid
        LEFT OUTER JOIN t3_day1
            ON t3_day1.patientunitstayid=pt.patientunitstayid
        LEFT OUTER JOIN t4_day1
            ON t4_day1.patientunitstayid=pt.patientunitstayid  
        ------------------VARS day4------------------------     
        LEFT OUTER JOIN t1_day4
            ON t1_day4.patientunitstayid=pt.patientunitstayid
        LEFT OUTER JOIN t2_day4
            ON t2_day4.patientunitstayid=pt.patientunitstayid
        LEFT OUTER JOIN t3_day4
            ON t3_day4.patientunitstayid=pt.patientunitstayid
        LEFT OUTER JOIN t4_day4
            ON t4_day4.patientunitstayid=pt.patientunitstayid  
        ORDER BY pt.patientunitstayid
    ), sofa_renal_day1_to_day4 AS(
        WITH t1_day1 AS (
            SELECT
            pt.patientunitstayid,
            MAX(CASE WHEN LOWER(labname) LIKE 'creatin%' THEN labresult
                ELSE NULL END) AS creat
            FROM patient pt
            LEFT OUTER JOIN lab lb USING(patientunitstayid)  
            LEFT JOIN lowest_hgb_offset USING(patientunitstayid)  
            WHERE lb.labresultoffset = lowest_hgb_offset.labresultoffset 
            GROUP BY pt.patientunitstayid
        ), t2_day1 AS (
            WITH uotemp AS (
                SELECT
                patientunitstayid,
                CASE WHEN dayz=1 THEN SUM(outputtotal)
                     ELSE NULL END AS uod1
                FROM (
                    SELECT
                    DISTINCT patientunitstayid,
                    intakeoutputoffset,
                    outputtotal,
                    CASE WHEN (intakeoutputoffset) = lowest_hgb_offset.labresultoffset THEN 1
                         ELSE NULL END AS dayz
                    FROM intakeoutput
                    LEFT JOIN lowest_hgb_offset USING(patientunitstayid)    
                    WHERE intakeoutputoffset = labresultoffset
                    ORDER BY patientunitstayid, intakeoutputoffset
                ) AS temp
                GROUP BY patientunitstayid, temp.dayz 
            )
            SELECT
            pt.patientunitstayid,
            MAX(CASE WHEN uod1 IS NOT NULL THEN uod1
                     ELSE NULL END) AS UO
           FROM patient pt
           LEFT OUTER JOIN uotemp
            ON uotemp.patientunitstayid=pt.patientunitstayid
           GROUP BY pt.patientunitstayid
        ), t1_day4 AS (
            SELECT
            pt.patientunitstayid,
            MAX(CASE WHEN LOWER(labname) LIKE 'creatin%' THEN labresult
                     ELSE NULL END) AS creat
            FROM patient pt
            LEFT OUTER JOIN lab lb USING(patientunitstayid) 
            LEFT JOIN lowest_hgb_offset  USING(patientunitstayid)    
            WHERE lb.labresultoffset BETWEEN lowest_hgb_offset.labresultoffset AND lowest_hgb_offset.labresultoffset + 1440 
            GROUP BY pt.patientunitstayid
        ), t2_day4 AS (
            WITH uotemp AS (
                SELECT
                patientunitstayid,
                CASE WHEN dayz=1 THEN SUM(outputtotal)
                     ELSE NULL END AS uod1
                FROM (
                    SELECT
                    DISTINCT patientunitstayid,
                    intakeoutputoffset,
                    outputtotal,
                    CASE WHEN intakeoutputoffset BETWEEN labresultoffset AND labresultoffset + 1440 THEN 1
                        ELSE NULL END AS dayz
                    FROM intakeoutput
                    LEFT JOIN lowest_hgb_offset USING(patientunitstayid)
                    WHERE intakeoutputoffset BETWEEN labresultoffset
                    AND labresultoffset + 1440
                    ORDER BY patientunitstayid, intakeoutputoffset 
                ) AS temp
                GROUP BY patientunitstayid, temp.dayz
            )
            SELECT
            pt.patientunitstayid,
            MAX(CASE WHEN uod1 IS NOT NULL THEN uod1
                    ELSE NULL END) AS UO
            FROM patient pt
            LEFT OUTER JOIN uotemp
                ON uotemp.patientunitstayid=pt.patientunitstayid
            GROUP BY pt.patientunitstayid
        )    
        SELECT
        pt.patientunitstayid,
        -- t1.creat, t2.uo,
        CASE WHEN t2_day1.uo <200 OR t1_day1.creat>5 THEN 4
            WHEN t2_day1.uo <500 OR t1_day1.creat >3.5 THEN 3
            WHEN t1_day1.creat BETWEEN 2 AND 3.5 THEN 2
            WHEN t1_day1.creat BETWEEN 1.2 AND 2 THEN 1
            ELSE 0 END AS sofarenal_day1,        
        CASE WHEN t2_day4.uo <200 OR t1_day4.creat>5 THEN 4
            WHEN t2_day4.uo <500 OR t1_day4.creat >3.5 THEN 3
            WHEN t1_day4.creat BETWEEN 2 AND 3.5 THEN 2
            WHEN t1_day4.creat BETWEEN 1.2 AND 2 THEN 1
            ELSE 0 END AS sofarenal_day4
        FROM patient pt
        LEFT OUTER JOIN t1_day1
            ON t1_day1.patientunitstayid=pt.patientunitstayid
        LEFT OUTER JOIN t2_day1
            ON t2_day1.patientunitstayid=pt.patientunitstayid
        LEFT OUTER JOIN t1_day4
            ON t1_day4.patientunitstayid=pt.patientunitstayid
        LEFT OUTER JOIN t2_day4
            ON t2_day4.patientunitstayid=pt.patientunitstayid
        ORDER BY pt.patientunitstayid
    ), sofa_respi_day1_to_day4 AS (
        WITH tempo2_day1 AS (
            WITH tempo1_day1 AS (
                WITH t1_day1 AS (
                    SELECT
                    *
                    FROM (
                        SELECT DISTINCT patientunitstayid,
                        MAX(CAST(respchartvalue AS INT64)) AS rcfio2
                        -- , max(case when respchartvaluelabel = 'FiO2' then respchartvalue else null end) as fiO2
                        FROM respiratorycharting
                        LEFT JOIN lowest_hgb_offset USING(patientunitstayid)
                        WHERE respchartoffset = labresultoffset 
                        AND respchartvalue <> ''
                        AND REGEXP_CONTAINS(respchartvalue, '^[0-9]{0,2}$')
                        GROUP BY patientunitstayid
                    ) AS tempo
                    WHERE rcfio2 >20 -- many values are liters per minute!
                ORDER BY patientunitstayid
                ), t2_day1 AS (
                    SELECT DISTINCT patientunitstayid,
                    MAX(CAST(nursingchartvalue AS INT64)) AS ncfio2
                    FROM nursecharting nc
                    LEFT JOIN lowest_hgb_offset USING(patientunitstayid)
                    WHERE LOWER(nursingchartcelltypevallabel) LIKE '%fio2%'
                    AND REGEXP_CONTAINS(nursingchartvalue, '^[0-9]{0,2}$')
                    AND nursingchartentryoffset = labresultoffset 
                    GROUP BY patientunitstayid
                ), t3_day1 AS (
                    SELECT
                    patientunitstayid,
                    MIN(CASE WHEN sao2 IS NOT NULL THEN sao2
                        ELSE NULL END) AS sao2
                    FROM vitalperiodic
                    LEFT JOIN lowest_hgb_offset USING(patientunitstayid)
                    WHERE observationoffset = labresultoffset
                    GROUP BY patientunitstayid
                ), t4_day1 AS (
                    SELECT
                    patientunitstayid,
                    MIN(CASE WHEN LOWER(labname) LIKE 'pao2%' THEN labresult
                             ELSE NULL END) AS pao2
                    FROM lab
                    LEFT JOIN lowest_hgb_offset USING(patientunitstayid)  
                    WHERE lab.labresultoffset = lowest_hgb_offset.labresultoffset
                    GROUP BY patientunitstayid
                ), t5_day1 AS (
                    WITH t1_day1 AS (
                        SELECT DISTINCT patientunitstayid,
                        MAX(CASE WHEN airwaytype IN ('Oral ETT', 'Nasal ETT', 'Tracheostomy') THEN 1
                            ELSE NULL END) AS airway  -- either invasive airway or NULL
                        FROM respiratorycare
                        LEFT JOIN lowest_hgb_offset USING(patientunitstayid)  
                        WHERE respcarestatusoffset = labresultoffset
                    GROUP BY patientunitstayid-- , respcarestatusoffset
            -- order by patientunitstayid-- , respcarestatusoffset
                    ), t2_day1 AS (
                        SELECT DISTINCT patientunitstayid,
                        1 AS ventilator
                        FROM respiratorycharting rc
                        LEFT JOIN lowest_hgb_offset USING(patientunitstayid)  
                        WHERE (respchartvalue LIKE '%ventilator%'
                        OR respchartvalue LIKE '%vent%'
                        OR respchartvalue LIKE '%bipap%'
                        OR respchartvalue LIKE '%840%'
                        OR respchartvalue LIKE '%cpap%'
                        OR respchartvalue LIKE '%drager%'
                        OR respchartvalue LIKE 'mv%'
                        OR respchartvalue LIKE '%servo%'
                        OR respchartvalue LIKE '%peep%')
                        AND respchartoffset  = labresultoffset
                        GROUP BY patientunitstayid
                        -- order by patientunitstayid
                    ), t3_day1 AS (
                        SELECT DISTINCT patientunitstayid,
                        MAX(CASE WHEN treatmentstring IN ('pulmonary|ventilation and oxygenation|mechanical ventilation',  'pulmonary|ventilation and oxygenation|tracheal suctioning',  'pulmonary|ventilation and oxygenation|ventilator weaning',  'pulmonary|ventilation and oxygenation|mechanical ventilation|assist controlled',  'pulmonary|radiologic procedures / bronchoscopy|endotracheal tube',  'pulmonary|ventilation and oxygenation|oxygen therapy (> 60%)',  'pulmonary|ventilation and oxygenation|mechanical ventilation|tidal volume 6-10 ml/kg',  'pulmonary|ventilation and oxygenation|mechanical ventilation|volume controlled',  'surgery|pulmonary therapies|mechanical ventilation',  'pulmonary|surgery / incision and drainage of thorax|tracheostomy',  'pulmonary|ventilation and oxygenation|mechanical ventilation|synchronized intermittent',  'pulmonary|surgery / incision and drainage of thorax|tracheostomy|performed during current admission for ventilatory support',  'pulmonary|ventilation and oxygenation|ventilator weaning|active',  'pulmonary|ventilation and oxygenation|mechanical ventilation|pressure controlled',  'pulmonary|ventilation and oxygenation|mechanical ventilation|pressure support',  'pulmonary|ventilation and oxygenation|ventilator weaning|slow',  'surgery|pulmonary therapies|ventilator weaning',  'surgery|pulmonary therapies|tracheal suctioning',  'pulmonary|radiologic procedures / bronchoscopy|reintubation',  'pulmonary|ventilation and oxygenation|lung recruitment maneuver',  'pulmonary|surgery / incision and drainage of thorax|tracheostomy|planned',  'surgery|pulmonary therapies|ventilator weaning|rapid',  'pulmonary|ventilation and oxygenation|prone position',  'pulmonary|surgery / incision and drainage of thorax|tracheostomy|conventional',  'pulmonary|ventilation and oxygenation|mechanical ventilation|permissive hypercapnea',  'surgery|pulmonary therapies|mechanical ventilation|synchronized intermittent',  'pulmonary|medications|neuromuscular blocking agent',  'surgery|pulmonary therapies|mechanical ventilation|assist controlled',  'pulmonary|ventilation and oxygenation|mechanical ventilation|volume assured',  'surgery|pulmonary therapies|mechanical ventilation|tidal volume 6-10 ml/kg',  'surgery|pulmonary therapies|mechanical ventilation|pressure support',  'pulmonary|ventilation and oxygenation|non-invasive ventilation',  'pulmonary|ventilation and oxygenation|non-invasive ventilation|face mask',  'pulmonary|ventilation and oxygenation|non-invasive ventilation|nasal mask',  'pulmonary|ventilation and oxygenation|mechanical ventilation|non-invasive ventilation',  'pulmonary|ventilation and oxygenation|mechanical ventilation|non-invasive ventilation|face mask',  'surgery|pulmonary therapies|non-invasive ventilation',  'surgery|pulmonary therapies|non-invasive ventilation|face mask',  'pulmonary|ventilation and oxygenation|mechanical ventilation|non-invasive ventilation|nasal mask',  'surgery|pulmonary therapies|non-invasive ventilation|nasal mask',  'surgery|pulmonary therapies|mechanical ventilation|non-invasive ventilation',  'surgery|pulmonary therapies|mechanical ventilation|non-invasive ventilation|face mask' ) THEN 1
                            ELSE NULL END) AS interface   -- either ETT/NiV or NULL
                        FROM treatment t
                        LEFT JOIN lowest_hgb_offset USING(patientunitstayid)
                        WHERE t.treatmentoffset  = lowest_hgb_offset.labresultoffset
                        GROUP BY patientunitstayid-- , treatmentoffset, interface
                        ORDER BY patientunitstayid-- , treatmentoffset
                    )
                    SELECT
                    pt.patientunitstayid,
                    CASE WHEN t1_day1.airway IS NOT NULL OR t2_day1.ventilator IS NOT NULL OR t3_day1.interface IS NOT NULL THEN 1
                    ELSE NULL END AS mechvent
                    FROM patient pt
                    LEFT OUTER JOIN t1_day1
                        ON t1_day1.patientunitstayid=pt.patientunitstayid
                    LEFT OUTER JOIN t2_day1
                        ON t2_day1.patientunitstayid=pt.patientunitstayid
                    LEFT OUTER JOIN t3_day1
                        ON t3_day1.patientunitstayid=pt.patientunitstayid
                    ORDER BY  pt.patientunitstayid 
            )
            SELECT
            pt.patientunitstayid,
            t3_day1.sao2,
            t4_day1.pao2,
            CASE WHEN t1_day1.rcfio2>20 THEN t1_day1.rcfio2
                 WHEN t2_day1.ncfio2 >20 THEN t2_day1.ncfio2
                 WHEN t1_day1.rcfio2=1 OR t2_day1.ncfio2=1 THEN 100
                 ELSE 21 END AS fio2,
            t5_day1.mechvent
            FROM patient pt
            LEFT OUTER JOIN t1_day1
                ON t1_day1.patientunitstayid=pt.patientunitstayid
            LEFT OUTER JOIN t2_day1
                ON t2_day1.patientunitstayid=pt.patientunitstayid
            LEFT OUTER JOIN t3_day1
                ON t3_day1.patientunitstayid=pt.patientunitstayid
            LEFT OUTER JOIN t4_day1
                ON t4_day1.patientunitstayid=pt.patientunitstayid
            LEFT OUTER JOIN t5_day1
                ON t5_day1.patientunitstayid=pt.patientunitstayid
        -- order by pt.patientunitstayid
        ) --end of tempo1_day1
        SELECT
        *,
        -- coalesce(fio2,nullif(fio2,0),21) as fn, nullif(fio2,0) as nullifzero, coalesce(coalesce(nullif(fio2,0),21),fio2,21) as ifzero21 ,
        coalesce(pao2,100)/coalesce(coalesce(nullif(fio2,0),21),fio2,21) AS pf,
        coalesce(sao2,100)/coalesce(coalesce(nullif(fio2,0),21),fio2,21) AS sf
        FROM
        tempo1_day1
        -- order by fio2
    ), tempo2_day4 AS (
        WITH tempo1_day4 AS (
            WITH t1_day4 AS (
                SELECT
                *
                FROM (
                    SELECT
                    DISTINCT patientunitstayid,
                    MAX(CAST(respchartvalue AS INT64)) AS rcfio2
                    -- , max(case when respchartvaluelabel = 'FiO2' then respchartvalue else null end) as fiO2
                    FROM respiratorycharting
                    LEFT JOIN lowest_hgb_offset using(patientunitstayid)  
                    WHERE
                    respchartoffset BETWEEN labresultoffset AND labresultoffset + 1440
                    AND respchartvalue <> ''
                    AND REGEXP_CONTAINS(respchartvalue, '^[0-9]{0,2}$')
                    GROUP BY patientunitstayid
                ) AS tempo
                WHERE rcfio2 >20 -- many values are liters per minute!
                ORDER BY patientunitstayid
            ), t2_day4 AS (
                SELECT DISTINCT patientunitstayid,
                MAX(CAST(nursingchartvalue AS INT64)) AS ncfio2
                FROM nursecharting nc
                LEFT JOIN lowest_hgb_offset using(patientunitstayid)   
                WHERE LOWER(nursingchartcelltypevallabel) LIKE '%fio2%'
                AND REGEXP_CONTAINS(nursingchartvalue, '^[0-9]{0,2}$')
                AND nursingchartentryoffset BETWEEN labresultoffset AND labresultoffset + 1440
                GROUP BY patientunitstayid
            ), t3_day4 AS (
                SELECT
                patientunitstayid,
                MIN(CASE WHEN sao2 IS NOT NULL THEN sao2
                    ELSE NULL END) AS sao2
                FROM vitalperiodic
                LEFT JOIN lowest_hgb_offset USING(patientunitstayid)   
                WHERE observationoffset BETWEEN labresultoffset AND labresultoffset + 1440
                GROUP BY patientunitstayid
            ), t4_day4 AS (
                SELECT
                patientunitstayid,
                MIN(CASE WHEN LOWER(labname) LIKE 'pao2%' THEN labresult
                    ELSE NULL END) AS pao2
                FROM lab
                LEFT JOIN lowest_hgb_offset USING(patientunitstayid)   
                WHERE lab.labresultoffset BETWEEN lowest_hgb_offset.labresultoffset AND lowest_hgb_offset.labresultoffset + 1440 
                GROUP BY patientunitstayid
            ), t5_day4 AS (
                WITH t1_day4 AS (
                    SELECT DISTINCT patientunitstayid,
                    MAX(CASE WHEN airwaytype IN ('Oral ETT', 'Nasal ETT', 'Tracheostomy') THEN 1
                        ELSE NULL END) AS airway  -- either invasive airway or NULL
                    FROM respiratorycare
                    LEFT JOIN lowest_hgb_offset using(patientunitstayid)   
                    WHERE respcarestatusoffset BETWEEN labresultoffset AND labresultoffset + 1440   
                    GROUP BY patientunitstayid-- , respcarestatusoffset
            -- order by patientunitstayid-- , respcarestatusoffset
                ), t2_day4 AS (
                    SELECT DISTINCT patientunitstayid,
                    1 AS ventilator
                    FROM respiratorycharting rc
                    LEFT JOIN lowest_hgb_offset using(patientunitstayid)   
                    WHERE (respchartvalue LIKE '%ventilator%'
                    OR respchartvalue LIKE '%vent%'
                    OR respchartvalue LIKE '%bipap%'
                    OR respchartvalue LIKE '%840%'
                    OR respchartvalue LIKE '%cpap%'
                    OR respchartvalue LIKE '%drager%'
                    OR respchartvalue LIKE 'mv%'
                    OR respchartvalue LIKE '%servo%'
                    OR respchartvalue LIKE '%peep%')
                    AND respchartoffset BETWEEN labresultoffset AND labresultoffset + 1440   
                    GROUP BY patientunitstayid
                    -- order by patientunitstayid
                ), t3_day4 AS (
                    SELECT DISTINCT patientunitstayid,
                    MAX(CASE WHEN treatmentstring IN ('pulmonary|ventilation and oxygenation|mechanical ventilation',  'pulmonary|ventilation and oxygenation|tracheal suctioning',  'pulmonary|ventilation and oxygenation|ventilator weaning',  'pulmonary|ventilation and oxygenation|mechanical ventilation|assist controlled',  'pulmonary|radiologic procedures / bronchoscopy|endotracheal tube',  'pulmonary|ventilation and oxygenation|oxygen therapy (> 60%)',  'pulmonary|ventilation and oxygenation|mechanical ventilation|tidal volume 6-10 ml/kg',  'pulmonary|ventilation and oxygenation|mechanical ventilation|volume controlled',  'surgery|pulmonary therapies|mechanical ventilation',  'pulmonary|surgery / incision and drainage of thorax|tracheostomy',  'pulmonary|ventilation and oxygenation|mechanical ventilation|synchronized intermittent',  'pulmonary|surgery / incision and drainage of thorax|tracheostomy|performed during current admission for ventilatory support',  'pulmonary|ventilation and oxygenation|ventilator weaning|active',  'pulmonary|ventilation and oxygenation|mechanical ventilation|pressure controlled',  'pulmonary|ventilation and oxygenation|mechanical ventilation|pressure support',  'pulmonary|ventilation and oxygenation|ventilator weaning|slow',  'surgery|pulmonary therapies|ventilator weaning',  'surgery|pulmonary therapies|tracheal suctioning',  'pulmonary|radiologic procedures / bronchoscopy|reintubation',  'pulmonary|ventilation and oxygenation|lung recruitment maneuver',  'pulmonary|surgery / incision and drainage of thorax|tracheostomy|planned',  'surgery|pulmonary therapies|ventilator weaning|rapid',  'pulmonary|ventilation and oxygenation|prone position',  'pulmonary|surgery / incision and drainage of thorax|tracheostomy|conventional',  'pulmonary|ventilation and oxygenation|mechanical ventilation|permissive hypercapnea',  'surgery|pulmonary therapies|mechanical ventilation|synchronized intermittent',  'pulmonary|medications|neuromuscular blocking agent',  'surgery|pulmonary therapies|mechanical ventilation|assist controlled',  'pulmonary|ventilation and oxygenation|mechanical ventilation|volume assured',  'surgery|pulmonary therapies|mechanical ventilation|tidal volume 6-10 ml/kg',  'surgery|pulmonary therapies|mechanical ventilation|pressure support',  'pulmonary|ventilation and oxygenation|non-invasive ventilation',  'pulmonary|ventilation and oxygenation|non-invasive ventilation|face mask',  'pulmonary|ventilation and oxygenation|non-invasive ventilation|nasal mask',  'pulmonary|ventilation and oxygenation|mechanical ventilation|non-invasive ventilation',  'pulmonary|ventilation and oxygenation|mechanical ventilation|non-invasive ventilation|face mask',  'surgery|pulmonary therapies|non-invasive ventilation',  'surgery|pulmonary therapies|non-invasive ventilation|face mask',  'pulmonary|ventilation and oxygenation|mechanical ventilation|non-invasive ventilation|nasal mask',  'surgery|pulmonary therapies|non-invasive ventilation|nasal mask',  'surgery|pulmonary therapies|mechanical ventilation|non-invasive ventilation',  'surgery|pulmonary therapies|mechanical ventilation|non-invasive ventilation|face mask' ) THEN 1
                        ELSE NULL END) AS interface   -- either ETT/NiV or NULL
                    FROM treatment
                    LEFT JOIN lowest_hgb_offset using(patientunitstayid)
                    WHERE treatmentoffset BETWEEN labresultoffset AND labresultoffset + 1440
                    GROUP BY patientunitstayid-- , treatmentoffset, interface
                    ORDER BY patientunitstayid-- , treatmentoffset
                )
                SELECT
                pt.patientunitstayid,
                CASE WHEN t1_day4.airway IS NOT NULL OR t2_day4.ventilator IS NOT NULL OR t3_day4.interface IS NOT NULL THEN 1
                     ELSE NULL END AS mechvent
                FROM patient pt
                LEFT OUTER JOIN t1_day4
                    ON t1_day4.patientunitstayid=pt.patientunitstayid
                LEFT OUTER JOIN t2_day4
                    ON t2_day4.patientunitstayid=pt.patientunitstayid
                LEFT OUTER JOIN t3_day4
                    ON t3_day4.patientunitstayid=pt.patientunitstayid
                ORDER BY pt.patientunitstayid
            )
            SELECT
            pt.patientunitstayid,
            t3_day4.sao2,
            t4_day4.pao2,
            CASE WHEN t1_day4.rcfio2>20 THEN t1_day4.rcfio2
                 WHEN t2_day4.ncfio2 >20 THEN t2_day4.ncfio2
                 WHEN t1_day4.rcfio2=1 OR t2_day4.ncfio2=1 THEN 100
                 ELSE 21 END AS fio2,
            t5_day4.mechvent
            FROM patient pt
            LEFT OUTER JOIN t1_day4
                ON t1_day4.patientunitstayid=pt.patientunitstayid
            LEFT OUTER JOIN t2_day4
                ON t2_day4.patientunitstayid=pt.patientunitstayid
            LEFT OUTER JOIN t3_day4
                ON t3_day4.patientunitstayid=pt.patientunitstayid
            LEFT OUTER JOIN t4_day4
                ON t4_day4.patientunitstayid=pt.patientunitstayid
            LEFT OUTER JOIN t5_day4
                ON t5_day4.patientunitstayid=pt.patientunitstayid
        -- order by pt.patientunitstayid
            )
        SELECT
        *,
        -- coalesce(fio2,nullif(fio2,0),21) as fn, nullif(fio2,0) as nullifzero, coalesce(coalesce(nullif(fio2,0),21),fio2,21) as ifzero21 ,
        COALESCE(pao2,100)/COALESCE(COALESCE(NULLIF(fio2,0),21),fio2,21) AS pf,
        COALESCE(sao2,100)/COALESCE(COALESCE(NULLIF(fio2,0),21),fio2,21) AS sf
        FROM
        tempo1_day4
        -- order by fio2
    )
    SELECT
    pt.patientunitstayid,
    CASE WHEN tempo2_day1.pf <1 OR tempo2_day1.sf <0.67 THEN 4
         WHEN tempo2_day1.pf BETWEEN 1 AND 2 OR tempo2_day1.sf BETWEEN 0.67 AND 1.41 THEN 3
         WHEN tempo2_day1.pf BETWEEN 2 AND 3 OR tempo2_day1.sf BETWEEN 1.42 AND 2.2 THEN 2
         WHEN tempo2_day1.pf BETWEEN 3 AND 4 OR tempo2_day1.sf BETWEEN 2.21 AND 3.01 THEN 1
         WHEN tempo2_day1.pf > 4 OR tempo2_day1.sf> 3.01 THEN 0
         ELSE 0 END AS SOFA_respi_day1,
    CASE WHEN tempo2_day4.pf <1 OR tempo2_day4.sf <0.67 THEN 4
         WHEN tempo2_day4.pf BETWEEN 1 AND 2 OR tempo2_day4.sf BETWEEN 0.67 AND 1.41 THEN 3
         WHEN tempo2_day4.pf BETWEEN 2 AND 3 OR tempo2_day4.sf BETWEEN 1.42 AND 2.2 THEN 2
         WHEN tempo2_day4.pf BETWEEN 3 AND 4 OR tempo2_day4.sf BETWEEN 2.21 AND 3.01 THEN 1
         WHEN tempo2_day4.pf > 4 OR tempo2_day4.sf> 3.01 THEN 0 
         ELSE 0 END AS SOFA_respi_day4
    FROM patient pt
    LEFT OUTER JOIN tempo2_day1 
        ON tempo2_day1.patientunitstayid=pt.patientunitstayid
    LEFT OUTER JOIN tempo2_day4 
        ON tempo2_day4.patientunitstayid=pt.patientunitstayid
    ORDER BY pt.patientunitstayid

    ) --end of SOFA respi 
    SELECT patient.patientunitstayid,
    MAX(sofa_cv_day1_to_day4.sofa_cv_day1 + sofa_respi_day1_to_day4.sofa_respi_day1 + sofa_renal_day1_to_day4.sofarenal_day1 + sofa_3others_dayHmg_dayAHmg.sofacoag_day1 + sofa_3others_dayHmg_dayAHmg.sofaliver_day1 + sofa_3others_dayHmg_dayAHmg.sofacns_day1) AS sofatotal_dayLowestHmg,
    MAX(sofa_cv_day1_to_day4.sofa_cv_day4 + sofa_respi_day1_to_day4.sofa_respi_day4 + sofa_renal_day1_to_day4.sofarenal_day4 + sofa_3others_dayHmg_dayAHmg.sofacoag_day4 + sofa_3others_dayHmg_dayAHmg.sofaliver_day4 + sofa_3others_dayHmg_dayAHmg.sofacns_day4) AS sofatotal_dayAfterLowestHmg
    FROM patient
    INNER JOIN sofa_cv_day1_to_day4 
        ON patient.patientunitstayid = sofa_cv_day1_to_day4.patientunitstayid
    INNER JOIN sofa_respi_day1_to_day4
        ON patient.patientunitstayid = sofa_respi_day1_to_day4.patientunitstayid
    INNER JOIN sofa_renal_day1_to_day4
        ON patient.patientunitstayid = sofa_renal_day1_to_day4.patientunitstayid
    INNER JOIN sofa_3others_dayHmg_dayAHmg
        ON patient.patientunitstayid = sofa_3others_dayHmg_dayAHmg.patientunitstayid
    GROUP BY patient.patientunitstayid
) --end of sofalist
--Main Query--
SELECT 
DISTINCT patientunitstayid,
unitvisitnumber,
CASE WHEN (age like '%> 89%' ) THEN '89' ELSE age END AS age, 
CASE WHEN (LOWER(gender) not like '%male%') THEN NULL ELSE gender END AS gender,
CASE WHEN lowest_hgb_offset.hgbmin = lowest_hgb_offset.admission_hgbmin THEN 1 ELSE 0 END AS subgroup_flag,
lowest_hgb_offset.hgbmin,
CASE WHEN unabridgedActualVentdays IS NULL THEN 0 ELSE 1 END AS ventmarker,
CASE WHEN septicflag = 1 THEN 1 ELSE 0 END AS septicflag,
CASE WHEN pvdflag = 1 THEN 1 ELSE 0 END AS pvdflag,
CASE WHEN ihdflag = 1 THEN 1 ELSE 0 END AS ihdflag,
CASE WHEN chfflag = 1 THEN 1 ELSE 0 END AS chfflag,
CASE WHEN esrfflag = 1 THEN 1 ELSE 0 END AS esrfflag,
CASE WHEN surgeryflag = 1 THEN 1 ELSE 0 END AS surgeryflag,
CASE WHEN patientunitstayid IN(
    SELECT patientunitstayid 
    FROM tr
    WHERE vasopressor = 1
    ) THEN 1 ELSE 0 END AS vasopressor,
CASE WHEN LOWER(unitdischargestatus) LIKE '%expired%' THEN 1 ELSE 0 END AS icu_mort,
CASE WHEN LOWER(hospitaldischargestatus) LIKE '%expired%' THEN 1 else 0 end as expiremarker,
CASE WHEN LOWER(unitdischargestatus) LIKE '%expired%' THEN unitdischargeoffset/1440 
     WHEN LOWER(hospitaldischargestatus) LIKE '%expired%' THEN hospitaldischargeoffset/1440
     ELSE NULL END AS death_offset,
sofatotal_dayLowestHmg,
sofatotal_dayAfterLowestHmg,
lowest_hgb_offset.labresultoffset/1440 as hgbmin_offset,
trsfsn.treatmentoffset/1440 as transfusion_offset,
CASE WHEN transfusioncount >= 1 THEN 1 ELSE 0 END AS transfusedmarker,
CASE WHEN transfusioncount >= 1 THEN transfusioncount ELSE 0 END as transfusioncount
FROM patient
LEFT JOIN conditions_list USING(patientunitstayid)
LEFT JOIN apachepatientresult USING(patientunitstayid) 
LEFT JOIN lowest_hgb_offset USING(patientunitstayid)
LEFT JOIN sofalist USING(patientunitstayid)
LEFT JOIN treatment USING(patientunitstayid)
LEFT JOIN trsfsn USING(patientunitstayid)
)

### eICU cohort breakdown code

In [0]:
%%bigquery eicu_total_cases

SELECT 
COUNT(DISTINCT(patientunitstayid)) as eicu_total_cases
FROM `amsterdam-translation.amsterdam_custom.eicu_transfusion`

In [0]:
%%bigquery eicu_exclude_agebelow18

SELECT 
COUNT(DISTINCT(patientunitstayid)) as eicu_exclude_agebelow18
FROM `amsterdam-translation.amsterdam_custom.eicu_transfusion`
WHERE SAFE_CAST(age AS int64) >= 18

In [0]:
%%bigquery eicu_exclude_readmissions

SELECT 
COUNT(DISTINCT(patientunitstayid)) as eicu_exclude_readmissions
FROM `amsterdam-translation.amsterdam_custom.eicu_transfusion`
WHERE SAFE_CAST(age AS int64) >= 18
AND unitvisitnumber = 1

In [0]:
%%bigquery eicu_exclude_bleeding_trauma_dx

WITH diagnosis AS (
    SELECT * FROM `physionet-data.eicu_crd.diagnosis`
)
SELECT 
COUNT(DISTINCT(patientunitstayid)) as eicu_exclude_bleeding_trauma_dx
FROM `amsterdam-translation.amsterdam_custom.eicu_transfusion`
WHERE SAFE_CAST(age AS int64) >= 18
AND unitvisitnumber = 1
AND patientunitstayid NOT IN (
    SELECT DISTINCT patientUnitStayID
    FROM diagnosis
    WHERE (LOWER(diagnosisString) like '%hemorrhage%' 
    OR LOWER(diagnosisString) like '%blood loss%' 
    OR (LOWER(diagnosisString) Like '%bleed%')
    AND NOT LOWER(diagnosisString) Like '%bleeding and red blood cell disorders%')
    OR LOWER(diagnosisstring) LIKE '%trauma%' 
)

In [0]:
%%bigquery eicu_exclude_missing_fluids_data

WITH diagnosis AS (
    SELECT * FROM `physionet-data.eicu_crd.diagnosis`
), patient AS ( --shortening the table names for use in bigquery--
    SELECT * FROM `physionet-data.eicu_crd.patient`
), Reliable_ICUs as( --pickup reliable ICUs
    SELECT *
    FROM patient
    WHERE
    (wardID IN(259,261,267,273,285,286,307,317,324,337,338,345,347,362,369,376,377,384,391,394,
    408,413,417,425,428,429,430,431,434,445,464,451,487,489,491,495,498,504,506,512,513,594,601,
    602,607,608,609,611,613,619,622,628,829,831,809,814,840,841,991,876,962,953,966,984,1017,1021,
    1020,1030,1035,1026,1027,1029,1037,1032,1039,1041,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2016)
    OR
    (wardID IN(261,286,307,273,290,285,259,267,384,347,394,317,362,369,337,402,345,413,408,335,377,
    417,391,376,427,428,425,431,430,429,445,464,434,451,491,498,489,506,601,607,609,608,602,619,628,
    622,611,613,809,829,772,831,822,814,841,840,876,983,991,962,966,953,968,1020,1017,1021,1030,1032,
    1027,1035,1037,1026,1025,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2015)
    OR
    (wardID IN(261,286,307,290,256,285,273,259,267,347,384,317,394,362,369,402,337,413,408,345,335,377,
    386,364,417,376,391,425,428,431,430,445,451,434,464,489,609,607,601,608,602,619,622,822,829,809,804,
    766,814,888,841,876,953,996,1020,1017,1021,1026,1039,1027,1029,1037,1032,1035,1025,1053,1043,1087,1074)
    AND hospitaldischargeyear =2014)
    OR
    (wardID IN(384,347,317,394,362,369,402,345,337,413,408,386,391,376,698,809,814,888,841,876,1087,1074)
    AND hospitaldischargeyear =2013)
    OR
    (wardID IN(809,831,888,841,876)
    AND hospitaldischargeyear =2012)
    ORDER BY hospitalID
)
SELECT 
COUNT(DISTINCT(patientunitstayid)) as eicu_exclude_missing_fluids_data
FROM `amsterdam-translation.amsterdam_custom.eicu_transfusion`
WHERE SAFE_CAST(age AS int64) >= 18
AND unitvisitnumber = 1
AND patientunitstayid NOT IN (
    SELECT DISTINCT patientUnitStayID
    FROM diagnosis
    WHERE (LOWER(diagnosisString) like '%hemorrhage%' 
    OR LOWER(diagnosisString) like '%blood loss%' 
    OR (LOWER(diagnosisString) Like '%bleed%')
    AND NOT LOWER(diagnosisString) Like '%bleeding and red blood cell disorders%')
    OR LOWER(diagnosisstring) LIKE '%trauma%' 
)
AND patientunitstayid IN (
    SELECT DISTINCT patientunitstayid FROM Reliable_ICUs
)

In [0]:
%%bigquery eicu_exclude_missing_hemoglobin

WITH diagnosis AS (
    SELECT * FROM `physionet-data.eicu_crd.diagnosis`
), patient AS ( --shortening the table names for use in bigquery--
    SELECT * FROM `physionet-data.eicu_crd.patient`
), Reliable_ICUs as( --pickup reliable ICUs
    SELECT *
    FROM patient
    WHERE
    (wardID IN(259,261,267,273,285,286,307,317,324,337,338,345,347,362,369,376,377,384,391,394,
    408,413,417,425,428,429,430,431,434,445,464,451,487,489,491,495,498,504,506,512,513,594,601,
    602,607,608,609,611,613,619,622,628,829,831,809,814,840,841,991,876,962,953,966,984,1017,1021,
    1020,1030,1035,1026,1027,1029,1037,1032,1039,1041,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2016)
    OR
    (wardID IN(261,286,307,273,290,285,259,267,384,347,394,317,362,369,337,402,345,413,408,335,377,
    417,391,376,427,428,425,431,430,429,445,464,434,451,491,498,489,506,601,607,609,608,602,619,628,
    622,611,613,809,829,772,831,822,814,841,840,876,983,991,962,966,953,968,1020,1017,1021,1030,1032,
    1027,1035,1037,1026,1025,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2015)
    OR
    (wardID IN(261,286,307,290,256,285,273,259,267,347,384,317,394,362,369,402,337,413,408,345,335,377,
    386,364,417,376,391,425,428,431,430,445,451,434,464,489,609,607,601,608,602,619,622,822,829,809,804,
    766,814,888,841,876,953,996,1020,1017,1021,1026,1039,1027,1029,1037,1032,1035,1025,1053,1043,1087,1074)
    AND hospitaldischargeyear =2014)
    OR
    (wardID IN(384,347,317,394,362,369,402,345,337,413,408,386,391,376,698,809,814,888,841,876,1087,1074)
    AND hospitaldischargeyear =2013)
    OR
    (wardID IN(809,831,888,841,876)
    AND hospitaldischargeyear =2012)
    ORDER BY hospitalID
)
SELECT 
COUNT(DISTINCT(patientunitstayid)) as eicu_exclude_missing_hemoglobin
FROM `amsterdam-translation.amsterdam_custom.eicu_transfusion`
WHERE SAFE_CAST(age AS int64) >= 18
AND unitvisitnumber = 1
AND patientunitstayid NOT IN (
    SELECT DISTINCT patientUnitStayID
    FROM diagnosis
    WHERE (LOWER(diagnosisString) like '%hemorrhage%' 
    OR LOWER(diagnosisString) like '%blood loss%' 
    OR (LOWER(diagnosisString) Like '%bleed%')
    AND NOT LOWER(diagnosisString) Like '%bleeding and red blood cell disorders%')
    OR LOWER(diagnosisstring) LIKE '%trauma%' 
)
AND patientunitstayid IN (
    SELECT DISTINCT patientunitstayid FROM Reliable_ICUs
)
AND hgbmin IS NOT NULL

In [0]:
%%bigquery eicu_exclude_hb_6

WITH diagnosis AS (
    SELECT * FROM `physionet-data.eicu_crd.diagnosis`
), patient AS ( --shortening the table names for use in bigquery--
    SELECT * FROM `physionet-data.eicu_crd.patient`
), Reliable_ICUs as( --pickup reliable ICUs
    SELECT *
    FROM patient
    WHERE
    (wardID IN(259,261,267,273,285,286,307,317,324,337,338,345,347,362,369,376,377,384,391,394,
    408,413,417,425,428,429,430,431,434,445,464,451,487,489,491,495,498,504,506,512,513,594,601,
    602,607,608,609,611,613,619,622,628,829,831,809,814,840,841,991,876,962,953,966,984,1017,1021,
    1020,1030,1035,1026,1027,1029,1037,1032,1039,1041,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2016)
    OR
    (wardID IN(261,286,307,273,290,285,259,267,384,347,394,317,362,369,337,402,345,413,408,335,377,
    417,391,376,427,428,425,431,430,429,445,464,434,451,491,498,489,506,601,607,609,608,602,619,628,
    622,611,613,809,829,772,831,822,814,841,840,876,983,991,962,966,953,968,1020,1017,1021,1030,1032,
    1027,1035,1037,1026,1025,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2015)
    OR
    (wardID IN(261,286,307,290,256,285,273,259,267,347,384,317,394,362,369,402,337,413,408,345,335,377,
    386,364,417,376,391,425,428,431,430,445,451,434,464,489,609,607,601,608,602,619,622,822,829,809,804,
    766,814,888,841,876,953,996,1020,1017,1021,1026,1039,1027,1029,1037,1032,1035,1025,1053,1043,1087,1074)
    AND hospitaldischargeyear =2014)
    OR
    (wardID IN(384,347,317,394,362,369,402,345,337,413,408,386,391,376,698,809,814,888,841,876,1087,1074)
    AND hospitaldischargeyear =2013)
    OR
    (wardID IN(809,831,888,841,876)
    AND hospitaldischargeyear =2012)
    ORDER BY hospitalID
)
SELECT 
COUNT(DISTINCT(patientunitstayid)) as eicu_exclude_missing_hemoglobin
FROM `amsterdam-translation.amsterdam_custom.eicu_transfusion`
WHERE SAFE_CAST(age AS int64) >= 18
AND unitvisitnumber = 1
AND patientunitstayid NOT IN (
    SELECT DISTINCT patientUnitStayID
    FROM diagnosis
    WHERE (LOWER(diagnosisString) like '%hemorrhage%' 
    OR LOWER(diagnosisString) like '%blood loss%' 
    OR (LOWER(diagnosisString) Like '%bleed%')
    AND NOT LOWER(diagnosisString) Like '%bleeding and red blood cell disorders%')
    OR LOWER(diagnosisstring) LIKE '%trauma%' 
)
AND patientunitstayid IN (
    SELECT DISTINCT patientunitstayid FROM Reliable_ICUs
)
AND hgbmin IS NOT NULL
AND hgbmin >= 6

In [0]:
%%bigquery eicu_exclude_missing_sofa_scores

WITH diagnosis AS (
    SELECT * FROM `physionet-data.eicu_crd.diagnosis`
), patient AS ( --shortening the table names for use in bigquery--
    SELECT * FROM `physionet-data.eicu_crd.patient`
), Reliable_ICUs as( --pickup reliable ICUs
    SELECT *
    FROM patient
    WHERE
    (wardID IN(259,261,267,273,285,286,307,317,324,337,338,345,347,362,369,376,377,384,391,394,
    408,413,417,425,428,429,430,431,434,445,464,451,487,489,491,495,498,504,506,512,513,594,601,
    602,607,608,609,611,613,619,622,628,829,831,809,814,840,841,991,876,962,953,966,984,1017,1021,
    1020,1030,1035,1026,1027,1029,1037,1032,1039,1041,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2016)
    OR
    (wardID IN(261,286,307,273,290,285,259,267,384,347,394,317,362,369,337,402,345,413,408,335,377,
    417,391,376,427,428,425,431,430,429,445,464,434,451,491,498,489,506,601,607,609,608,602,619,628,
    622,611,613,809,829,772,831,822,814,841,840,876,983,991,962,966,953,968,1020,1017,1021,1030,1032,
    1027,1035,1037,1026,1025,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2015)
    OR
    (wardID IN(261,286,307,290,256,285,273,259,267,347,384,317,394,362,369,402,337,413,408,345,335,377,
    386,364,417,376,391,425,428,431,430,445,451,434,464,489,609,607,601,608,602,619,622,822,829,809,804,
    766,814,888,841,876,953,996,1020,1017,1021,1026,1039,1027,1029,1037,1032,1035,1025,1053,1043,1087,1074)
    AND hospitaldischargeyear =2014)
    OR
    (wardID IN(384,347,317,394,362,369,402,345,337,413,408,386,391,376,698,809,814,888,841,876,1087,1074)
    AND hospitaldischargeyear =2013)
    OR
    (wardID IN(809,831,888,841,876)
    AND hospitaldischargeyear =2012)
    ORDER BY hospitalID
)
SELECT 
COUNT(DISTINCT(patientunitstayid)) as eicu_exclude_missing_sofa_scores
FROM `amsterdam-translation.amsterdam_custom.eicu_transfusion`
WHERE SAFE_CAST(age AS int64) >= 18
AND unitvisitnumber = 1
AND patientunitstayid NOT IN (
    SELECT DISTINCT patientUnitStayID
    FROM diagnosis
    WHERE (LOWER(diagnosisString) like '%hemorrhage%' 
    OR LOWER(diagnosisString) like '%blood loss%' 
    OR (LOWER(diagnosisString) Like '%bleed%')
    AND NOT LOWER(diagnosisString) Like '%bleeding and red blood cell disorders%')
    OR LOWER(diagnosisstring) LIKE '%trauma%' 
)
AND patientunitstayid IN (
    SELECT DISTINCT patientunitstayid FROM Reliable_ICUs
)
AND hgbmin IS NOT NULL
AND hgbmin >= 6
AND sofatotal_dayLowestHmg IS NOT NULL
AND sofatotal_dayAfterLowestHmg IS NOT NULL

In [0]:
%%bigquery eicu_exclude_invalid_transfusion_offset

WITH diagnosis AS (
    SELECT * FROM `physionet-data.eicu_crd.diagnosis`
), patient AS ( --shortening the table names for use in bigquery--
    SELECT * FROM `physionet-data.eicu_crd.patient`
), Reliable_ICUs as( --pickup reliable ICUs
    SELECT *
    FROM patient
    WHERE
    (wardID IN(259,261,267,273,285,286,307,317,324,337,338,345,347,362,369,376,377,384,391,394,
    408,413,417,425,428,429,430,431,434,445,464,451,487,489,491,495,498,504,506,512,513,594,601,
    602,607,608,609,611,613,619,622,628,829,831,809,814,840,841,991,876,962,953,966,984,1017,1021,
    1020,1030,1035,1026,1027,1029,1037,1032,1039,1041,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2016)
    OR
    (wardID IN(261,286,307,273,290,285,259,267,384,347,394,317,362,369,337,402,345,413,408,335,377,
    417,391,376,427,428,425,431,430,429,445,464,434,451,491,498,489,506,601,607,609,608,602,619,628,
    622,611,613,809,829,772,831,822,814,841,840,876,983,991,962,966,953,968,1020,1017,1021,1030,1032,
    1027,1035,1037,1026,1025,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2015)
    OR
    (wardID IN(261,286,307,290,256,285,273,259,267,347,384,317,394,362,369,402,337,413,408,345,335,377,
    386,364,417,376,391,425,428,431,430,445,451,434,464,489,609,607,601,608,602,619,622,822,829,809,804,
    766,814,888,841,876,953,996,1020,1017,1021,1026,1039,1027,1029,1037,1032,1035,1025,1053,1043,1087,1074)
    AND hospitaldischargeyear =2014)
    OR
    (wardID IN(384,347,317,394,362,369,402,345,337,413,408,386,391,376,698,809,814,888,841,876,1087,1074)
    AND hospitaldischargeyear =2013)
    OR
    (wardID IN(809,831,888,841,876)
    AND hospitaldischargeyear =2012)
    ORDER BY hospitalID
)
SELECT 
COUNT(DISTINCT(patientunitstayid)) as eicu_exclude_missing_sofa_scores
FROM `amsterdam-translation.amsterdam_custom.eicu_transfusion`
WHERE SAFE_CAST(age AS int64) >= 18
AND unitvisitnumber = 1
AND patientunitstayid NOT IN (
    SELECT DISTINCT patientUnitStayID
    FROM diagnosis
    WHERE (LOWER(diagnosisString) like '%hemorrhage%' 
    OR LOWER(diagnosisString) like '%blood loss%' 
    OR (LOWER(diagnosisString) Like '%bleed%')
    AND NOT LOWER(diagnosisString) Like '%bleeding and red blood cell disorders%')
    OR LOWER(diagnosisstring) LIKE '%trauma%' 
)
AND patientunitstayid IN (
    SELECT DISTINCT patientunitstayid FROM Reliable_ICUs
)
AND hgbmin IS NOT NULL
AND hgbmin >= 6
--AND sofatotal_dayLowestHmg IS NOT NULL
--AND sofatotal_dayAfterLowestHmg IS NOT NULL
AND (transfusedmarker = 0 OR (transfusion_offset - hgbmin_offset) < 2)
AND subgroup_flag = 1 

In [0]:
%%bigquery eicu_final_transfused

WITH diagnosis AS (
    SELECT * FROM `physionet-data.eicu_crd.diagnosis`
), patient AS ( --shortening the table names for use in bigquery--
    SELECT * FROM `physionet-data.eicu_crd.patient`
), Reliable_ICUs as( --pickup reliable ICUs
    SELECT *
    FROM patient
    WHERE
    (wardID IN(259,261,267,273,285,286,307,317,324,337,338,345,347,362,369,376,377,384,391,394,
    408,413,417,425,428,429,430,431,434,445,464,451,487,489,491,495,498,504,506,512,513,594,601,
    602,607,608,609,611,613,619,622,628,829,831,809,814,840,841,991,876,962,953,966,984,1017,1021,
    1020,1030,1035,1026,1027,1029,1037,1032,1039,1041,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2016)
    OR
    (wardID IN(261,286,307,273,290,285,259,267,384,347,394,317,362,369,337,402,345,413,408,335,377,
    417,391,376,427,428,425,431,430,429,445,464,434,451,491,498,489,506,601,607,609,608,602,619,628,
    622,611,613,809,829,772,831,822,814,841,840,876,983,991,962,966,953,968,1020,1017,1021,1030,1032,
    1027,1035,1037,1026,1025,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2015)
    OR
    (wardID IN(261,286,307,290,256,285,273,259,267,347,384,317,394,362,369,402,337,413,408,345,335,377,
    386,364,417,376,391,425,428,431,430,445,451,434,464,489,609,607,601,608,602,619,622,822,829,809,804,
    766,814,888,841,876,953,996,1020,1017,1021,1026,1039,1027,1029,1037,1032,1035,1025,1053,1043,1087,1074)
    AND hospitaldischargeyear =2014)
    OR
    (wardID IN(384,347,317,394,362,369,402,345,337,413,408,386,391,376,698,809,814,888,841,876,1087,1074)
    AND hospitaldischargeyear =2013)
    OR
    (wardID IN(809,831,888,841,876)
    AND hospitaldischargeyear =2012)
    ORDER BY hospitalID
)
SELECT 
COUNT(DISTINCT(patientunitstayid)) as eicu_final_transfused
FROM `amsterdam-translation.amsterdam_custom.eicu_transfusion`
WHERE SAFE_CAST(age AS int64) >= 18
AND unitvisitnumber = 1
AND patientunitstayid NOT IN (
    SELECT DISTINCT patientUnitStayID
    FROM diagnosis
    WHERE (LOWER(diagnosisString) like '%hemorrhage%' 
    OR LOWER(diagnosisString) like '%blood loss%' 
    OR (LOWER(diagnosisString) Like '%bleed%')
    AND NOT LOWER(diagnosisString) Like '%bleeding and red blood cell disorders%')
    OR LOWER(diagnosisstring) LIKE '%trauma%' 
)
AND patientunitstayid IN (
    SELECT DISTINCT patientunitstayid FROM Reliable_ICUs
)
AND hgbmin IS NOT NULL
AND hgbmin >= 6
--AND sofatotal_dayLowestHmg IS NOT NULL
--AND sofatotal_dayAfterLowestHmg IS NOT NULL
AND (transfusion_offset - hgbmin_offset) < 2
AND subgroup_flag = 1 
AND transfusedmarker = 1

In [0]:
%%bigquery eicu_final_nontransfused

WITH diagnosis AS (
    SELECT * FROM `physionet-data.eicu_crd.diagnosis`
), patient AS ( --shortening the table names for use in bigquery--
    SELECT * FROM `physionet-data.eicu_crd.patient`
), Reliable_ICUs as( --pickup reliable ICUs
    SELECT *
    FROM patient
    WHERE
    (wardID IN(259,261,267,273,285,286,307,317,324,337,338,345,347,362,369,376,377,384,391,394,
    408,413,417,425,428,429,430,431,434,445,464,451,487,489,491,495,498,504,506,512,513,594,601,
    602,607,608,609,611,613,619,622,628,829,831,809,814,840,841,991,876,962,953,966,984,1017,1021,
    1020,1030,1035,1026,1027,1029,1037,1032,1039,1041,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2016)
    OR
    (wardID IN(261,286,307,273,290,285,259,267,384,347,394,317,362,369,337,402,345,413,408,335,377,
    417,391,376,427,428,425,431,430,429,445,464,434,451,491,498,489,506,601,607,609,608,602,619,628,
    622,611,613,809,829,772,831,822,814,841,840,876,983,991,962,966,953,968,1020,1017,1021,1030,1032,
    1027,1035,1037,1026,1025,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2015)
    OR
    (wardID IN(261,286,307,290,256,285,273,259,267,347,384,317,394,362,369,402,337,413,408,345,335,377,
    386,364,417,376,391,425,428,431,430,445,451,434,464,489,609,607,601,608,602,619,622,822,829,809,804,
    766,814,888,841,876,953,996,1020,1017,1021,1026,1039,1027,1029,1037,1032,1035,1025,1053,1043,1087,1074)
    AND hospitaldischargeyear =2014)
    OR
    (wardID IN(384,347,317,394,362,369,402,345,337,413,408,386,391,376,698,809,814,888,841,876,1087,1074)
    AND hospitaldischargeyear =2013)
    OR
    (wardID IN(809,831,888,841,876)
    AND hospitaldischargeyear =2012)
    ORDER BY hospitalID
)
SELECT 
COUNT(DISTINCT(patientunitstayid)) as eicu_final_nontransfused
FROM `amsterdam-translation.amsterdam_custom.eicu_transfusion`
WHERE SAFE_CAST(age AS int64) >= 18
AND unitvisitnumber = 1
AND patientunitstayid NOT IN (
    SELECT DISTINCT patientUnitStayID
    FROM diagnosis
    WHERE (LOWER(diagnosisString) like '%hemorrhage%' 
    OR LOWER(diagnosisString) like '%blood loss%' 
    OR (LOWER(diagnosisString) Like '%bleed%')
    AND NOT LOWER(diagnosisString) Like '%bleeding and red blood cell disorders%')
    OR LOWER(diagnosisstring) LIKE '%trauma%' 
)
AND patientunitstayid IN (
    SELECT DISTINCT patientunitstayid FROM Reliable_ICUs
)
AND hgbmin IS NOT NULL
AND hgbmin >= 6
--AND sofatotal_dayLowestHmg IS NOT NULL
--AND sofatotal_dayAfterLowestHmg IS NOT NULL
AND subgroup_flag = 1 
AND transfusedmarker = 0

## Amsterdam SQL code


In [0]:
%%bigquery 

CREATE OR REPLACE TABLE `amsterdam-translation.amsterdam_custom.amsterdam_transfusion` AS (
WITH adm as (
    SELECT 
        admissionid,
        patientid,
        agegroup,
        admissioncount,
        CASE WHEN gender = 'Vrouw' THEN 'Female' WHEN gender = 'Man' THEN 'Male' ELSE gender END AS gender,
        weightgroup,
        lengthgroup,
        CASE WHEN location = 'MC' THEN 'High Dependency' WHEN location = 'IC' THEN 'ICU' END AS location,
        CASE WHEN origin IN ('Eerste Hulp afdeling zelfde ziekenhuis','Eerste Hulp afdeling ander ziekenhuis') THEN 'emergency'
             WHEN origin IN ('Recovery zelfde ziekenhuis (alleen bij niet geplande IC-opname)','Recovery ander ziekenhuis') THEN 'recovery'
             WHEN origin IN ('Operatiekamer vanaf verpleegafdeling zelfde ziekenhuis','Operatiekamer vanaf Eerste Hulp afdeling zelfde ziekenhuis') THEN 'operating_room'
             WHEN origin IN ('Special/Medium care zelfde ziekenhuis','Special/Medium care ander ziekenhuis') THEN 'special_medium_care'
             WHEN origin IN ('CCU/IC zelfde ziekenhuis','CCU/IC ander ziekenhuis') THEN 'other_icu'
             WHEN origin IN ('Verpleegafdeling zelfde ziekenhuis','Verpleegafdeling ander ziekenhuis') THEN 'floor'
             WHEN origin IN ('Andere locatie zelfde ziekenhuis, transport per ambulance','Huis','Anders') THEN 'other' 
             ELSE origin END AS origin,
        CASE WHEN origin IN ('Verpleegafdeling zelfde ziekenhuis',
                             'Eerste Hulp afdeling zelfde ziekenhuis',
                             'CCU/IC zelfde ziekenhuis',
                             'Recovery zelfde ziekenhuis (alleen bij niet geplande IC-opname)',
                             'Special/Medium care zelfde ziekenhuis',
                             'Andere locatie zelfde ziekenhuis, transport per ambulance',
                            'Operatiekamer vanaf Eerste Hulp afdeling zelfde ziekenhuis') THEN true ELSE false END AS origin_same_hospital, 
        CEIL(dischargedat/3600000) as hospital_los_hours, -- dischargedat = number of milliseconds from hospital admission to hospital discharge; 1 hour = 3.6 million milliseconds
        CASE WHEN destination = 'Overleden' THEN true ELSE false END as icu_mortality,
        -- CASE WHEN dateofdeath BETWEEN admittedat AND (admittedat+lengthofstay) THEN true ELSE false END AS icu_mortality, -- admittedat = milliseconds since first admission
        CASE WHEN dateofdeath < dischargedat THEN true ELSE false END AS hospital_mortality, -- dischargedat = milliseconds since hospital admission
        CASE WHEN dateofdeath IS NOT NULL THEN true ELSE false END AS all_mortality, -- dateofdeath = milliseconds since admission
        dateofdeath,
        lengthofstay as icu_los_hours, -- lengthofstay = milliseconds since ICU admission
        --Amsterdam UMC, like most Dutch medical centers run mixed ICUs, there is not distinction between e.g. CCU and MICU and SICU
        CASE WHEN specialty = 'Cardiologie' THEN 'Medical - Cardiology'
             WHEN specialty = 'Neurologie' THEN 'Medical - Neurology'
             WHEN specialty = 'Cardiochirurgie' THEN 'Cardiac Surgery'
             WHEN specialty = 'Inwendig' THEN 'Medical - Internal Medicine'
             WHEN specialty = 'Longziekte' THEN 'Medical - Pulmonology'
             WHEN specialty = 'Nefrologie' THEN 'Medical - Nephrology'
             WHEN specialty = 'Hematologie' THEN 'Medical - Hematology'
             WHEN specialty = 'Reumatologie' THEN 'Medical - Rheumatology'
             WHEN specialty = 'Oncologie Inwendig' THEN 'Medical - Oncology'
             WHEN specialty = 'Maag-,Darm-,Leverziekten' THEN 'Medical - Gastroenterology and Hepatology'
             WHEN specialty = 'Intensive Care Volwassenen' THEN 'Medical - Intensive Care Medicine'
             WHEN specialty = 'Urologie' THEN 'Surgical - Urology'
             WHEN specialty IN('Obstetrie','Gynaecologie','Verloskunde') THEN 'Surgical - OBGYN'
             WHEN specialty = 'Orthopedie' THEN 'Surgical - Orthopedics'
             WHEN specialty = 'Oogheelkunde' THEN 'Surgical - Ophthalmology'
             WHEN specialty = 'Mondheelkunde' THEN 'Surgical - Oral Surgery'
             WHEN specialty = 'Traumatologie' THEN 'Surgical - Trauma Surgery'
             WHEN specialty = 'Vaatchirurgie' THEN 'Surgical - Vascular surgery'
             WHEN specialty = 'Heelkunde Oncologie' THEN 'Surgical - Oncology'
             WHEN specialty = 'Keel, Neus & Oorarts' THEN 'Surgical - ENT'
             WHEN specialty = 'Plastische chirurgie' THEN 'Surgical - Plastic Surgery'
             WHEN specialty = 'Heelkunde Longen/Oncologie' THEN 'Surgical - Thoracic Surgery'
             WHEN specialty = 'Heelkunde Gastro-enterologie' THEN 'Surgical - GI Surgery'
             WHEN specialty = 'Neurochirurgie' THEN 'Surgical - Neurosurgery'
             ELSE 'Other' -- also contains 'ders'
             END AS unitType
    FROM `physionet-data.amsterdamdb.admissions`
), tx AS (
    SELECT 
    di.admissionid,
    MIN(di.start/86400000) AS transfusionoffset,
    MAX(CASE WHEN item IS NOT NULL THEN 1 END) AS transfusedmarker,
    COUNT(item) as transfusioncount
    FROM `physionet-data.amsterdamdb.drugitems` di
    LEFT JOIN adm a
        ON di.admissionid=a.admissionid
    WHERE di.item IN("Gefiltreerde Ery's",'Packed cells')
    AND a.admissioncount = 1
    GROUP BY di.admissionid
), daily_sofa_labs as (
        with all_days as (
        SELECT
        admissionid
        , 0 as endoffset
        , lengthofstay as startoffset
        , GENERATE_ARRAY(0, CAST(ceil(lengthofstay/24.0) AS INT64)) as day
    from `physionet-data.amsterdamdb.admissions`
    ), daily as (
        SELECT
        admissionid
        , CAST(day AS INT64) as day
        , endoffset + day-1 as startoffset
        , endoffset + day as endoffset
        FROM all_days
        CROSS JOIN UNNEST(all_days.day) AS day
    ), gcs as (
        SELECT 
        admissionid,
        measuredat/86400000 as day,
        CASE WHEN item = 'Actief openen van de ogen' AND value = 'Geen reactie' THEN 1 -- not reactive
            WHEN item = 'Actief openen van de ogen' AND value = 'Reactie op pijnprikkel' THEN 2 -- opens to painful stimuli
            WHEN item = 'Actief openen van de ogen' AND value = 'Reactie op verbale prikkel' THEN 3 -- opens to verbal stimuli
            WHEN item = 'Actief openen van de ogen' AND value = 'Spontane reactie' THEN 4 -- opens spontaneously
            ELSE NULL
            END AS gcs_eye,
        CASE WHEN item = 'Beste verbale reactie' AND value = 'Geen reactie (geen zichtbare poging tot praten)' THEN 1 -- no response (no visible attempt to speak)
            WHEN item = 'Beste verbale reactie' AND value = 'Geïntubeerd' THEN 1 --intubated
            WHEN item = 'Beste verbale reactie' AND value = 'Onbegrijpelijke geluiden' THEN 2 -- incomprehensible sounds
            WHEN item = 'Beste verbale reactie' AND value = 'Onduidelijke woorden (pogingen tot communicatie, maar onduidelijk)' THEN 3 -- unclear words (attempts at communication, but unclear)
            WHEN item = 'Beste verbale reactie' AND value = 'Verwarde conversatie' THEN 4 -- confused conversation
            WHEN item = 'Beste verbale reactie' AND value = 'Helder en adequaat (communicatie mogelijk)' THEN 5 -- clear and adequate communication 
            ELSE NULL
            END AS gcs_verbal,
        CASE WHEN item = 'Beste motore reactie van de armen' AND value = 'Geen reactie' THEN 1 -- no response 
            WHEN item = 'Beste motore reactie van de armen' AND value = 'Strekken' THEN 2 -- stretching (?extension to pain)
            WHEN item = 'Beste motore reactie van de armen' AND value = 'Decortatie reflex (abnormaal buigen)' THEN 3 -- Abnormal flexion
            WHEN item = 'Beste motore reactie van de armen' AND value = 'Spastische reactie (terugtrekken)' THEN 4 -- Spastic reaction (withdrawal)
            WHEN item = 'Beste motore reactie van de armen' AND value = 'Localiseert pijn' THEN 5 -- locates pain
            WHEN item = 'Beste motore reactie van de armen' AND value = "Volgt verbale commando's op" THEN 6 -- follows verbal commands
            ELSE NULL 
            END AS gcs_motor
        FROM `physionet-data.amsterdamdb.listitems`
        WHERE item IN('Actief openen van de ogen','Beste verbale reactie','Beste motore reactie van de armen')
    )
    SELECT
    d.admissionid,
    sofa_gcs_score,
    sofa_resp_score,
    sofa_circ_score,
    sofa_liver_score,
    sofa_hematology_score,
    (sofa_gcs_score + sofa_resp_score + sofa_circ_score + sofa_liver_score + sofa_hematology_score) AS sofa_score, 
    hgb_min as hgbmin, 
    trop_max, 
    creatinine_max,
    ph_min, 
    lactate_max,
    d.day
    FROM daily d
    LEFT JOIN(
        SELECT
        admissionid,
        day,
        CASE WHEN MIN(gcs_eye) + MIN(gcs_verbal) + MIN(gcs_motor) =  15 THEN 0
            WHEN MIN(gcs_eye) + MIN(gcs_verbal) + MIN(gcs_motor) BETWEEN 13 AND 14 THEN 1
            WHEN MIN(gcs_eye) + MIN(gcs_verbal) + MIN(gcs_motor) BETWEEN 10 AND 12 THEN 2
            WHEN MIN(gcs_eye) + MIN(gcs_verbal) + MIN(gcs_motor) BETWEEN 6 AND 9 THEN 3
            WHEN MIN(gcs_eye) + MIN(gcs_verbal) + MIN(gcs_motor) < 6 THEN 4
            END AS sofa_gcs_score
        FROM (
            SELECT 
            d.admissionid,
            d.day,
            -- Neurology
            -- Glasgow coma scale	SOFA score
            -- 15	0
            -- 13–14	+1
            -- 10–12	+2
            -- 6–9	+3
            -- < 6	+4
            CASE WHEN gcs_eye IS NOT NULL THEN gcs_eye
                ELSE (SELECT MIN(gcs.gcs_eye) 
                    FROM gcs 
                    WHERE d.admissionid = gcs.admissionid 
                    AND gcs.day < d.day) 
                END AS gcs_eye,
            CASE WHEN gcs_verbal IS NOT NULL THEN gcs_verbal
                ELSE (SELECT MIN(gcs.gcs_verbal) 
                    FROM gcs 
                    WHERE d.admissionid = gcs.admissionid 
                    AND gcs.day < d.day) 
                END AS gcs_verbal,
            CASE WHEN gcs_motor IS NOT NULL THEN gcs_motor
                ELSE (SELECT MIN(gcs.gcs_motor) 
                    FROM gcs 
                    WHERE d.admissionid = gcs.admissionid 
                    AND gcs.day < d.day) 
                END AS gcs_motor
            FROM daily d
            LEFT JOIN gcs
                ON d.admissionid = gcs.admissionid
                AND gcs.day BETWEEN d.startoffset AND d.endoffset
        )
        GROUP BY admissionid, day
    ) gcs_daily
    ON d.admissionid = gcs_daily.admissionid
    AND d.day = gcs_daily.day
    LEFT JOIN (
        SELECT 
        d.admissionid,
        d.day,
        -- Respiratory
        -- PaO2/FiO2 [mmHg (kPa)]	SOFA score
        -- ≥ 400 (53.3)	0
        -- < 400 (53.3)	+1
        -- < 300 (40)	+2
        -- < 200 (26.7) and mechanically ventilated	+3
        -- < 100 (13.3) and mechanically ventilated	+4
        CASE WHEN MIN(pao2)/MIN(fio2) >= 400 THEN 0
            WHEN MIN(pao2)/MIN(fio2) BETWEEN 300 AND 399 THEN 1
            WHEN MIN(pao2)/MIN(fio2) BETWEEN 200 AND 299 THEN 2
            WHEN MIN(pao2)/MIN(fio2) BETWEEN 100 AND 199 THEN 3
            WHEN MIN(pao2)/MIN(fio2) < 100 THEN 4
            ELSE NULL
            END as sofa_resp_score
        FROM daily d
        LEFT JOIN (
            SELECT 
            admissionid,
            measuredat/86400000 as day,
            CASE WHEN item IN('PO2 (bloed)','PO2') AND value > 20 THEN value 
                WHEN item IN('PO2 (bloed) - kPa') AND value > 20/7.50062 THEN value * 7.50062 ELSE NULL END AS pao2,
            CASE WHEN item IN('O2 concentratie (Set)','O2 concentratie','FiO2 %','SET %O2','A_FiO2','MCA_FiO2') AND value > 1 AND value <= 100 THEN value/100 
                WHEN item IN('O2 concentratie (Set)','O2 concentratie','FiO2 %','SET %O2','A_FiO2','MCA_FiO2') AND value BETWEEN 0.2 AND 1 THEN value ELSE NULL END AS fio2, -- recorded as percentage
            FROM 
            `physionet-data.amsterdamdb.numericitems` 
            WHERE item IN('PO2 (bloed)','PO2','PO2 (bloed) - kPa','O2 concentratie (Set)','O2 concentratie','FiO2 %','SET %O2','A_FiO2','MCA_FiO2')
        ) pf
            ON d.admissionid = pf.admissionid
            AND pf.day BETWEEN d.startoffset AND d.endoffset
        GROUP BY d.admissionid, d.day
    ) pf_daily
    ON d.admissionid = pf_daily.admissionid
    AND d.day = pf_daily.day
    LEFT JOIN (
        SELECT 
        d.admissionid,
        d.day,
        -- Circulatory
        -- Mean arterial pressure OR administration of vasopressors required	SOFA score
        -- MAP ≥ 70 mmHg	0
        -- MAP < 70 mmHg	+1
        -- dopamine ≤ 5 μg/kg/min or dobutamine (any dose)	+2
        -- dopamine > 5 μg/kg/min OR epinephrine ≤ 0.1 μg/kg/min OR norepinephrine ≤ 0.1 μg/kg/min	+3
        -- dopamine > 15 μg/kg/min OR epinephrine > 0.1 μg/kg/min OR norepinephrine > 0.1 μg/kg/min	+4
        CASE WHEN MIN(map) >= 70.0 THEN 0
            WHEN MIN(map) < 70.0 THEN 1
            WHEN MAX(dopamine_dose) <= 5.0 OR MAX(dobutamine_dose) IS NOT NULL THEN 2
            WHEN MAX(dopamine_dose) BETWEEN 6.0 AND 15.0 OR MAX(epinephrine_dose) <= 0.1 OR MAX(norepinephrine_dose) < 0.1 THEN 3
            WHEN MAX(dopamine_dose) > 15.0 OR MAX(epinephrine_dose) > 0.1 OR MAX(norepinephrine_dose) > 0.1 THEN 4
            ELSE NULL
            END as sofa_circ_score
        FROM daily d
        LEFT JOIN (
            SELECT 
            admissionid,
            measuredat/86400000 as day,
            CASE WHEN item IN('ABP gemiddeld','Niet invasieve bloeddruk gemiddeld','IABP Mean Blood Pressure') AND value > 20 THEN value ELSE NULL END AS map
            FROM `physionet-data.amsterdamdb.numericitems` 
            WHERE item IN('ABP gemiddeld','Niet invasieve bloeddruk gemiddeld','IABP Mean Blood Pressure')
        ) map_daily
            ON d.admissionid = map_daily.admissionid
            AND map_daily.day BETWEEN d.startoffset AND d.endoffset
        LEFT JOIN (
            SELECT 
            drug.admissionid,
            drug.start/86400000 as day,
            CASE WHEN item = 'Dopamine (Inotropin)' AND doseunit = 'mg' AND doserateunit = 'uur' THEN dose * 1000/(adm.weight*60) ELSE NULL END AS dopamine_dose,
            CASE WHEN item = 'Dobutamine (Dobutrex)' AND doseunit = 'mg' AND doserateunit = 'uur' THEN dose * 1000/(adm.weight*60) ELSE NULL END AS dobutamine_dose,
            CASE WHEN item = 'Noradrenaline (Norepinefrine)' AND doseunit = 'mg' AND doserateunit = 'uur' THEN dose * 1000/(adm.weight*60) ELSE NULL END AS epinephrine_dose,
            CASE WHEN item = 'Adrenaline (Epinefrine)' AND doseunit = 'mg' AND doserateunit = 'uur' THEN dose * 1000/(adm.weight*60) ELSE NULL END as norepinephrine_dose
            FROM `physionet-data.amsterdamdb.drugitems` drug 
            LEFT JOIN (
                SELECT
                admissionid,
                weight
                FROM (
                    SELECT 
                    admissionid,
                    CASE WHEN weight IS NOT NULL THEN weight
                            WHEN weightgroup = '59-' THEN 59
                            WHEN weightgroup = '60-69' THEN 65
                            WHEN weightgroup = '70-79' THEN 75
                            WHEN weightgroup = '80-89' THEN 85
                            WHEN weightgroup = '90-99' THEN 95
                            WHEN weightgroup = '100-109' THEN 105
                            WHEN weightgroup = '110+' THEN 110
                            ELSE NULL END AS weight,
                    FROM (
                        SELECT 
                        a.admissionid,
                        ANY_VALUE(a.weightgroup) as weightgroup,
                        MAX(CASE WHEN n.item IN('PatiëntGewicht','Gewicht bij opname') THEN n.value ELSE NULL END) as weight
                        FROM `physionet-data.amsterdamdb.numericitems` n
                        LEFT JOIN `physionet-data.amsterdamdb.admissions` a
                        ON n.admissionid = a.admissionid
                        WHERE item IN('PatiëntGewicht','Gewicht bij opname')
                        GROUP BY a.admissionid
                    )
                ) 
            ) adm
            ON drug.admissionid = adm.admissionid
            WHERE drug.item IN('Dopamine (Inotropin)','Dobutamine (Dobutrex)','Noradrenaline (Norepinefrine)','Adrenaline (Epinefrine)')
        ) vasopressors
            ON d.admissionid = vasopressors.admissionid
            AND vasopressors.day BETWEEN d.startoffset AND d.endoffset
        GROUP BY d.admissionid, d.day
    ) circ_daily
    ON d.admissionid = circ_daily.admissionid
    AND d.day = circ_daily.day
    LEFT JOIN (
        SELECT
        d.admissionid,
        d.day,
        -- Liver
        -- Bilirubin (mg/dl) [μmol/L]	SOFA score
        -- < 1.2 [< 20]	0
        -- 1.2–1.9 [20-32]	+1
        -- 2.0–5.9 [33-101]	+2
        -- 6.0–11.9 [102-204]	+3
        -- > 12.0 [> 204]	+4
        -- units are in umol/L for amsterdamdb
        CASE WHEN MAX(bilirubin) < 20 THEN 0
            WHEN MAX(bilirubin) BETWEEN 20 AND 32 THEN 1
            WHEN MAX(bilirubin) BETWEEN 33 AND 101 THEN 2
            WHEN MAX(bilirubin) BETWEEN 102 AND 204 THEN 3
            WHEN MAX(bilirubin) > 204 THEN 4
            ELSE 0
            END AS sofa_liver_score
        FROM daily d
        LEFT JOIN (
            SELECT 
            admissionid,
            measuredat/86400000 as day,
            CASE WHEN item IN('Bilirubine (bloed)','Bili Totaal') THEN value END AS bilirubin
            FROM `physionet-data.amsterdamdb.numericitems`
            WHERE item IN('Bilirubine (bloed)','Bili Totaal')
            AND islabresult
        ) bil
            ON d.admissionid = bil.admissionid
            AND bil.day BETWEEN d.startoffset AND d.endoffset
        GROUP BY d.admissionid, d.day
    ) liver_daily
        ON d.admissionid = liver_daily.admissionid
        AND d.day = liver_daily.day
    LEFT JOIN (
        SELECT
        d.admissionid,
        d.day,
        -- Hematology
        -- Platelets×103/μl	SOFA score
        -- ≥ 150	0
        -- < 150	+1
        -- < 100	+2
        -- < 50	+3
        -- < 20	+4
        CASE WHEN MIN(platelets) >= 150 THEN 0
            WHEN MIN(platelets) BETWEEN 100 AND 149 THEN 1
            WHEN MIN(platelets) BETWEEN 50 AND 99 THEN 2
            WHEN MIN(platelets) BETWEEN 20 AND 49 THEN 3
            WHEN MIN(platelets) BETWEEN 0 AND 20 THEN 4
            ELSE NULL
            END AS sofa_hematology_score 
        FROM daily d
        LEFT JOIN (
            SELECT 
            admissionid,
            measuredat/86400000 as day,
            value as platelets
            FROM `physionet-data.amsterdamdb.numericitems`
            WHERE item = "Thrombo's (bloed)" AND islabresult
        ) plt
            ON d.admissionid=plt.admissionid
            AND plt.day BETWEEN d.startoffset AND d.endoffset
        GROUP BY d.admissionid, d.day
    ) hematology_daily
        ON d.admissionid = hematology_daily.admissionid
        AND d.day = hematology_daily.day
    LEFT JOIN (
        SELECT
        d.admissionid,
        d.day,
        -- Renal 
        -- Creatinine (mg/dl) [μmol/L] (or urine output)	SOFA score
        -- < 1.2 [< 110]	0
        -- 1.2–1.9 [110-170]	+1
        -- 2.0–3.4 [171-299]	+2
        -- 3.5–4.9 [300-440] (or < 500 ml/d)	+3
        -- > 5.0 [> 440] (or < 200 ml/d)	+4
        -- units in amsterdamdb are umol/L
        CASE WHEN MAX(creatinine) < 110 THEN 0
            WHEN MAX(creatinine) BETWEEN 110 AND 170 THEN 1
            WHEN MAX(creatinine) BETWEEN 171 AND 299 THEN 2
            WHEN MAX(creatinine) BETWEEN 300 AND 440 THEN 3
            WHEN MAX(creatinine) > 440 THEN 4
            ELSE NULL
            END AS sofa_renal_score
        FROM daily d
        LEFT JOIN (
            SELECT
            admissionid,
            measuredat/86400000 as day,
            value as creatinine
            FROM `physionet-data.amsterdamdb.numericitems`
            WHERE item = 'Kreatinine (bloed)' AND islabresult
        ) cr
            ON d.admissionid=cr.admissionid
            AND cr.day BETWEEN d.startoffset AND d.endoffset
        GROUP BY d.admissionid, d.day
    ) daily_renal
        ON d.admissionid=daily_renal.admissionid
        AND d.day=daily_renal.day
    LEFT JOIN (
        SELECT 
        d.admissionid,
        d.day,
        MIN(hemoglobin) as hgb_min,
        MAX(trop) as trop_max,
        MAX(creatinine) as creatinine_max,
        MIN(ph) as ph_min,
        MAX(lactate) as lactate_max
        FROM daily d
        LEFT JOIN (
            SELECT
            admissionid,
            measuredat/86400000 as day,
            CASE WHEN item IN('Hb (bloed)','Hb(v.Bgs) (bloed)') THEN value * 1.61 ELSE NULL END AS hemoglobin, --hemoglobin is recorded as mmol/dL, multiply by 1.61 to convert to the more commonly used mg/dL 
            CASE WHEN item IN('TroponineT (bloed)','Troponine') THEN value ELSE NULL END AS trop,
            CASE WHEN item IN('Kreatinine (bloed)') THEN value ELSE NULL END AS creatinine,
            CASE WHEN item IN('pH (bloed)') THEN value ELSE NULL END AS ph,
            CASE WHEN item IN('Lactaat (bloed)') THEN value ELSE NULL END AS lactate
            FROM `physionet-data.amsterdamdb.numericitems`
            WHERE islabresult
        ) lab
            ON d.admissionid = lab.admissionid
            AND lab.day BETWEEN d.startoffset AND d.endoffset
        GROUP BY d.admissionid, d.day
    ) daily_labs
        ON d.admissionid=daily_labs.admissionid
        AND d.day=daily_labs.day
    ORDER BY d.admissionid, d.day
), sofa as (
    SELECT
    a.admissionid,
    MIN(admission_hgbmin) as admission_hgbmin,
    MAX(day_min_hb) AS day,
    MIN(CASE WHEN a.day BETWEEN tx.transfusionoffset - 2 AND tx.transfusionoffset 
             AND a.hgbmin BETWEEN 2 AND 25 THEN a.hgbmin 
             WHEN tx.transfusionoffset IS NULL THEN admission_hgbmin ELSE NULL END) as hgbmin,
    MAX(CASE WHEN day = day_min_hb AND sofa_score IS NOT NULL THEN sofa_score END) as sofatotal_dayLowestHmg,
    MAX(CASE WHEN day = day_after AND sofa_score IS NOT NULL THEN sofa_score END) as sofatotal_dayAfterLowestHmg,
    1 AS dummykey
    FROM daily_sofa_labs a
    LEFT JOIN (
        SELECT 
        a.admissionid,
        admission_hgbmin,
        day AS day_min_hb,
        day + 1 AS day_after
        FROM (
            SELECT
            admissionid,
            MIN(hgbmin) as admission_hgbmin
            FROM daily_sofa_labs
            GROUP BY admissionid
        ) a
        LEFT JOIN (
            SELECT 
            admissionid,
            day,
            hgbmin
            FROM daily_sofa_labs
        ) b
        ON a.admissionid=b.admissionid
        AND a.admission_hgbmin=b.hgbmin
    ) b
    ON a.admissionid=b.admissionid
    LEFT JOIN tx
        ON a.admissionid=tx.admissionid
    GROUP BY a.admissionid
), sofa_quantiles AS (
    SELECT 
    --day of lowest Hb
    sofatotal_dayLowestHmg_quantiles[OFFSET(0)] AS sofatotal_dayLowestHmg_q1_min,
    sofatotal_dayLowestHmg_quantiles[OFFSET(1)] AS sofatotal_dayLowestHmg_q1_max,
    sofatotal_dayLowestHmg_quantiles[OFFSET(1)] + 1 AS sofatotal_dayLowestHmg_q2_min,
    sofatotal_dayLowestHmg_quantiles[OFFSET(2)] AS sofatotal_dayLowestHmg_q2_max,
    sofatotal_dayLowestHmg_quantiles[OFFSET(2)] + 1 AS sofatotal_dayLowestHmg_q3_min,
    sofatotal_dayLowestHmg_quantiles[OFFSET(3)] as sofatotal_dayLowestHmg_q3_max,
    --day after lowest Hb
    sofatotal_dayAfterLowestHmg_quantiles[OFFSET(0)] AS sofatotal_dayAfterLowestHmg_q1_min,
    sofatotal_dayAfterLowestHmg_quantiles[OFFSET(1)] AS sofatotal_dayAfterLowestHmg_q1_max,
    sofatotal_dayAfterLowestHmg_quantiles[OFFSET(1)] + 1 AS sofatotal_dayAfterLowestHmg_q2_min,
    sofatotal_dayAfterLowestHmg_quantiles[OFFSET(2)] AS sofatotal_dayAfterLowestHmg_q2_max,
    sofatotal_dayAfterLowestHmg_quantiles[OFFSET(2)] + 1 AS sofatotal_dayAfterLowestHmg_q3_min,
    sofatotal_dayAfterLowestHmg_quantiles[OFFSET(3)] as sofatotal_dayAfterLowestHmg_q3_max,
    1 AS dummykey
    FROM (
        SELECT 
        APPROX_QUANTILES(sofatotal_dayLowestHmg, 3) AS sofatotal_dayLowestHmg_quantiles,
        APPROX_QUANTILES(sofatotal_dayAfterLowestHmg, 3) AS sofatotal_dayAfterLowestHmg_quantiles
        FROM sofa
    )
), apache AS (
    SELECT
    admissionid,
    measuredat,
    CASE WHEN value BETWEEN 0 AND 71 THEN value ELSE NULL END AS apacheScore,
    RANK() OVER (PARTITION BY admissionid ORDER BY measuredat ASC) AS rank
    FROM
    `physionet-data.amsterdamdb.numericitems`
    WHERE item = 'A_Apache_Score'
)
SELECT 
-- admissions table
adm.admissionid as patientunitstayid, 
--ANY_VALUE(adm.unitType) as unitType, 
ANY_VALUE(agegroup) as age,
ANY_VALUE(adm.gender) as gender,
--MAX(CASE WHEN apache.rank = 1 AND (apache.measuredat/86400000 < sofa.day) THEN apache.apacheScore ELSE NULL END) as apacheScore,
MAX(CASE WHEN sofa.hgbmin = sofa.admission_hgbmin THEN 1 ELSE 0 END) AS subgroup_flag,
MIN(CASE WHEN sofa.hgbmin = dsl.hgbmin then dsl.day END) as hgbmin_offset,
MIN(sofa.hgbmin) as hgbmin,
-- processitems
MAX(CASE WHEN adm.admissionid IN (
    SELECT 
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.processitems`
    WHERE item IN('Beademen')
) THEN 1 ELSE 0 END) as ventmarker,
-- listitems
MAX(CASE WHEN adm.admissionid IN (
    SELECT 
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.listitems`
    WHERE item IN('Opname Sepsis','DMC_Opname Sepsis') AND value = 'Ja'
) THEN 1 ELSE 0 END) AS septicflag,
MAX(CASE WHEN adm.admissionid IN (
    SELECT 
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.drugitems`
    WHERE item IN('Dobutamine (Dobutrex)','Dopamine (Inotropin)','Isoprenaline (Isuprel)','Noradrenaline (Norepinefrine)','Adrenaline (Epinefrine)')
) THEN 1 ELSE 0 END) as vasopressor, 
MAX(pvdflag) as pvdflag,
MAX(ihdflag) as ihdflag,
MAX(chfflag) as chfflag,
MAX(esrfflag) as esrfflag,
MAX(surgeryflag) as surgeryflag,
MAX(adm.icu_mortality) as icu_mort,
MAX(adm.hospital_mortality) as expiremarker,
MIN(dateofdeath/86400000) as death_offset, 
MAX(sofa.sofatotal_dayLowestHmg) as sofatotal_dayLowestHmg,
MAX(sofa.sofatotal_dayAfterLowestHmg) as sofatotal_dayAfterLowestHmg, 
MIN(tx.transfusionoffset) as transfusion_offset,
MAX(CASE WHEN transfusedmarker = 1 THEN 1 ELSE 0 END) as transfusedmarker,
MAX(CASE WHEN transfusioncount >= 1 THEN transfusioncount ELSE 0 END) as transfusioncount
FROM adm 
LEFT JOIN sofa
    ON adm.admissionid = sofa.admissionid
LEFT JOIN daily_sofa_labs dsl
    ON adm.admissionid = dsl.admissionid
LEFT JOIN (
    SELECT
    admissionid,
    MAX(CASE WHEN value IN(
                    'Post-operative cardiovascular - Graft, aorto-iliac bypass',
                    'Post-operative cardiovascular - Dilatation (with general anesthesia)',
                    'Vaatchirurgie','Operatief Cardiovasculair - Perifeer vasculair','Perifeer vasculair',
                    'Post-operative cardiovascular - Complications of prev. peripheral vasc. surgery, (i.e.ligation of bleeder, exploration',
                    'Complications of prev. peripheral vasc. surgery, (i.e.ligation of bleeder, exploration',
                    'PTCA (perifere vaten)','Post-operative cardiovascular - Graft, aorto-femoral bypass',
                    'Post-operative cardiovascular - Graft, femoral-femoral bypass',
                    'Post-operative cardiovascular - Graft, femoral-popliteal bypass',
                    'Non-operative cardiovascular - Vascular medical, other','Vascular surgery, other',
                    'Operatief Cardiovasculair - Perifeer vasculair','Endovasculaire stent plaatsing',
                    'Post-operative cardiovascular - Endarterectomy (other vessels)',
                    'Post-operative cardiovascular - Endarterectomy, carotid',
                    'Post-operative cardiovascular - Grafts, all other bypass (except renal)',
                    'Non-operative cardiovascular - Thrombus,arterial','Embolectomie/trombolyse',
                    'Post-operative cardiovascular - Thrombectomy (with general anesthesia)',
                    'Thrombus,arterial')
                    THEN 1 ELSE 0
                    END) as pvdflag,
    MAX(CASE WHEN value IN('Coronair lijden','Chron. cardiovasculaire ziekte',
                           'Operatief Cardiovasculair - Chron. cardiovasculaire ziekte',
                           'Post-operative cardiovascular - CABG redo with other operation',
                           'Non-operative cardiovascular - Angina, stable (asymp or stable pattern of symptoms w/meds)',
                           'Non-operative cardiovascular - Complications of previous open heart surgery (i.e. bleeding, infection etc.)',
                           'Post-operative cardiovascular - Complications of previous open-heart surgery, surgery for (i.e. bleeding, infection',
                           'Post-operative cardiovascular - CABG with other operation',
                           'Post-operative cardiovascular - CABG redo with valve repair/replacement',
                           'Post-operative cardiovascular - CABG alone, coronary artery bypass grafting',
                           'Post-operative cardiovascular - CABG with mitral valve replacement',
                           'Post-operative cardiovascular - CABG alone, redo',
                           'Post-operative cardiovascular - CABG with double valve repair/replacement',
                           'Non-operative cardiovascular - Cardiac arrest (with or without respiratory arrest;',
                           'Non-operative cardiovascular - MI admitted > 24hrs after onset of ischemia',
                           'Non-operative cardiovascular - Contusion, myocardial (include R/O)',
                           'Non-operative cardiovascular - Infarction, acute myocardial (MI), ANTERIOR',
                           'Infarction, acute myocardial (MI), ANTERIOR',
                           'Angina pectoris/myocardinfarct',
                           'Infarction, acute myocardial (MI), INFEROLATERAL',
                           'Non-operatief Cardiovasculair - Na cardiac arrest',
                           'Post-operative cardiovascular - Cardiac arrest (with or without respiratory arrest; for respiratory arrest see',
                           'Infarction, acute myocardial (MI), none of the above',
                           'Non-operative cardiovascular - Infarction, acute myocardial (MI), INFEROLATERAL',
                           'Non-operative cardiovascular - Infarction, acute myocardial (MI), none of the above',
                           'Infarction, acute myocardial (MI), NON Q Wave',
                           'Non-operative cardiovascular - Infarction, acute myocardial (MI), NON Q Wave',
                           'Non-operative cardiovascular - Angina, unstable (angina interferes w/quality of life or meds are tolerated poorly)',
                           'Operatief Cardiovasculair - Na cardiac arrest',
                           'Cardiovasculair - Myocard infarct') 
                    THEN 1 ELSE 0
                    END) as ihdflag, 
    MAX(CASE WHEN value IN('Non-operatief Cardiovasculair - Congestief hart falen',
                        'Non-operative cardiovascular - CHF, congestive heart failure',
                        'Congestief hart falen',
                        'CHF, congestive heart failure')
                        THEN 1 ELSE 0 END) AS chfflag,
    MAX(CASE WHEN value IN('Dialyselijn Subclavia',
                           'DMC_Interne Geneeskunde_Renaal',
                           'Non-operatief Renaal',
                           'Dialyselijn',
                           'Apache II Operatief Renaal',
                           'Graft for dialysis, insertion of',
                           'D_Interne Geneeskunde_Renaal',
                           'Apache II Non-Operatief Renaal',
                           'Renaal',
                           'Post-operative cardiovascular - Graft for dialysis, insertion of')
                        THEN 1 ELSE 0 END) AS esrfflag,
    MAX(CASE WHEN LOWER(value) LIKE '%surgery%' OR LOWER(value) LIKE '%post-operative%' THEN 1 ELSE 0 END) as surgeryflag
    FROM `physionet-data.amsterdamdb.listitems`
    GROUP BY admissionid
) pmhx
    ON adm.admissionid=pmhx.admissionid
LEFT JOIN apache 
    ON adm.admissionid = apache.admissionid
LEFT JOIN tx
    ON adm.admissionid = tx.admissionid
GROUP BY adm.admissionid
ORDER BY adm.admissionid
)

### Amstermdam cohort breakdown code

In [0]:
%%bigquery amsterdam_total_cases

SELECT 
COUNT(DISTINCT(patientunitstayid)) AS amsterdam_total_cases
FROM `amsterdam-translation.amsterdam_custom.amsterdam_transfusion`

In [0]:
%%bigquery amsterdam_exclude_agebelow18

SELECT 
COUNT(DISTINCT(patientunitstayid)) AS amsterdam_exclude_readmissions
FROM `amsterdam-translation.amsterdam_custom.amsterdam_transfusion`
WHERE age IS NOT NULL

In [0]:
%%bigquery amsterdam_exclude_readmissions

SELECT 
COUNT(DISTINCT(patientunitstayid)) AS amsterdam_exclude_readmissions
FROM `amsterdam-translation.amsterdam_custom.amsterdam_transfusion`
WHERE age IS NOT NULL
AND patientunitstayid IN (
    SELECT
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.admissions`
    WHERE admissioncount = 1
)

In [0]:
%%bigquery amsterdam_exclude_bleeding_trauma_dx

SELECT 
COUNT(DISTINCT(patientunitstayid)) AS amsterdam_exclude_bleeding_dx
FROM `amsterdam-translation.amsterdam_custom.amsterdam_transfusion`
WHERE age IS NOT NULL
AND patientunitstayid IN (
    SELECT
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.admissions`
    WHERE admissioncount = 1
)
AND patientunitstayid NOT IN ( --exclude: any patient who had bleeding 
    SELECT 
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.listitems`
    WHERE LOWER(item) LIKE '%bloeding%'
    OR LOWER(item) LIKE '%bleed%'
    OR LOWER(item) LIKE '%hemorr%'
    OR LOWER(value) LIKE '%bloeding%'
    OR LOWER(value) LIKE '%bleed%'
    OR LOWER(value) LIKE '%hemorr%'
    OR LOWER(item) LIKE '%trauma'
    OR LOWER(value) LIKE '%trauma'
)

In [0]:
%%bigquery amsterdam_exclude_missing_fluids_data 

SELECT 
COUNT(DISTINCT(patientunitstayid)) AS amsterdam_exclude_readmissions
FROM `amsterdam-translation.amsterdam_custom.amsterdam_transfusion`
WHERE age IS NOT NULL
AND patientunitstayid IN (
    SELECT
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.admissions`
    WHERE admissioncount = 1
)
AND patientunitstayid NOT IN ( --exclude: any patient who had bleeding 
    SELECT 
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.listitems`
    WHERE LOWER(item) LIKE '%bloeding%'
    OR LOWER(item) LIKE '%bleed%'
    OR LOWER(item) LIKE '%hemorr%'
    OR LOWER(value) LIKE '%bloeding%'
    OR LOWER(value) LIKE '%bleed%'
    OR LOWER(value) LIKE '%hemorr%'
    OR LOWER(item) LIKE '%trauma'
    OR LOWER(value) LIKE '%trauma'
)
AND transfusedmarker IS NOT NULL

In [0]:
%%bigquery amsterdam_exclude_missing_hemoglobin

SELECT 
COUNT(DISTINCT(patientunitstayid)) AS amsterdam_exclude_missing_hemoglobin
FROM `amsterdam-translation.amsterdam_custom.amsterdam_transfusion`
WHERE age IS NOT NULL
AND patientunitstayid IN (
    SELECT
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.admissions`
    WHERE admissioncount = 1
)
AND patientunitstayid NOT IN ( --exclude: any patient who had bleeding 
    SELECT 
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.listitems`
    WHERE LOWER(item) LIKE '%bloeding%'
    OR LOWER(item) LIKE '%bleed%'
    OR LOWER(item) LIKE '%hemorr%'
    OR LOWER(value) LIKE '%bloeding%'
    OR LOWER(value) LIKE '%bleed%'
    OR LOWER(value) LIKE '%hemorr%'
    OR LOWER(item) LIKE '%trauma'
    OR LOWER(value) LIKE '%trauma'
)
AND transfusedmarker IS NOT NULL
AND hgbmin IS NOT NULL

In [0]:
%%bigquery amsterdam_exclude_hb_6

SELECT 
COUNT(DISTINCT(patientunitstayid)) AS amsterdam_exclude_missing_hemoglobin
FROM `amsterdam-translation.amsterdam_custom.amsterdam_transfusion`
WHERE age IS NOT NULL
AND patientunitstayid IN (
    SELECT
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.admissions`
    WHERE admissioncount = 1
)
AND patientunitstayid NOT IN ( --exclude: any patient who had bleeding 
    SELECT 
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.listitems`
    WHERE LOWER(item) LIKE '%bloeding%'
    OR LOWER(item) LIKE '%bleed%'
    OR LOWER(item) LIKE '%hemorr%'
    OR LOWER(value) LIKE '%bloeding%'
    OR LOWER(value) LIKE '%bleed%'
    OR LOWER(value) LIKE '%hemorr%'
    OR LOWER(item) LIKE '%trauma'
    OR LOWER(value) LIKE '%trauma'
)
AND transfusedmarker IS NOT NULL
AND hgbmin IS NOT NULL
AND hgbmin >= 6

In [0]:
%%bigquery amsterdam_exclude_missing_sofa_scores

SELECT 
COUNT(DISTINCT(patientunitstayid)) AS amsterdam_exclude_missing_sofa_scores
FROM `amsterdam-translation.amsterdam_custom.amsterdam_transfusion`
WHERE age IS NOT NULL
AND patientunitstayid IN (
    SELECT
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.admissions`
    WHERE admissioncount = 1
)
AND patientunitstayid NOT IN ( --exclude: any patient who had bleeding 
    SELECT 
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.listitems`
    WHERE LOWER(item) LIKE '%bloeding%'
    OR LOWER(item) LIKE '%bleed%'
    OR LOWER(item) LIKE '%hemorr%'
    OR LOWER(value) LIKE '%bloeding%'
    OR LOWER(value) LIKE '%bleed%'
    OR LOWER(value) LIKE '%hemorr%'
    OR LOWER(item) LIKE '%trauma'
    OR LOWER(value) LIKE '%trauma'
)
AND transfusedmarker IS NOT NULL
AND hgbmin IS NOT NULL 
AND hgbmin >= 6
AND sofatotal_dayLowestHmg IS NOT NULL
AND sofatotal_dayAfterLowestHmg IS NOT NULL

In [0]:
%%bigquery amsterdam_exclude_invalid_transfusion_offset

SELECT 
COUNT(DISTINCT(patientunitstayid)) AS amsterdam_exclude_missing_sofa_scores
FROM `amsterdam-translation.amsterdam_custom.amsterdam_transfusion`
WHERE age IS NOT NULL
AND patientunitstayid IN (
    SELECT
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.admissions`
    WHERE admissioncount = 1
)
AND patientunitstayid NOT IN ( --exclude: any patient who had bleeding 
    SELECT 
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.listitems`
    WHERE LOWER(item) LIKE '%bloeding%'
    OR LOWER(item) LIKE '%bleed%'
    OR LOWER(item) LIKE '%hemorr%'
    OR LOWER(value) LIKE '%bloeding%'
    OR LOWER(value) LIKE '%bleed%'
    OR LOWER(value) LIKE '%hemorr%'
    OR LOWER(item) LIKE '%trauma'
    OR LOWER(value) LIKE '%trauma'
)
AND transfusedmarker IS NOT NULL
AND hgbmin IS NOT NULL 
AND hgbmin >= 6
--AND sofatotal_dayLowestHmg IS NOT NULL
--AND sofatotal_dayAfterLowestHmg IS NOT NULL
AND (transfusedmarker = 0 OR (transfusion_offset - hgbmin_offset) < 2)
AND subgroup_flag = 1 

In [0]:
%%bigquery amsterdam_final_transfused

SELECT 
COUNT(DISTINCT(patientunitstayid)) AS amsterdam_final_transfused
FROM `amsterdam-translation.amsterdam_custom.amsterdam_transfusion`
WHERE age IS NOT NULL
AND patientunitstayid IN (
    SELECT
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.admissions`
    WHERE admissioncount = 1
)
AND patientunitstayid NOT IN ( --exclude: any patient who had bleeding 
    SELECT 
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.listitems`
    WHERE LOWER(item) LIKE '%bloeding%'
    OR LOWER(item) LIKE '%bleed%'
    OR LOWER(item) LIKE '%hemorr%'
    OR LOWER(value) LIKE '%bloeding%'
    OR LOWER(value) LIKE '%bleed%'
    OR LOWER(value) LIKE '%hemorr%'
    OR LOWER(item) LIKE '%trauma'
    OR LOWER(value) LIKE '%trauma'
)
AND hgbmin IS NOT NULL 
AND hgbmin >= 6
--AND sofatotal_dayLowestHmg IS NOT NULL
--AND sofatotal_dayAfterLowestHmg IS NOT NULL
AND (transfusion_offset - hgbmin_offset) < 2
AND subgroup_flag = 1 
AND transfusedmarker = 1

In [0]:
%%bigquery amsterdam_final_nontransfused

SELECT 
COUNT(DISTINCT(patientunitstayid)) AS amsterdam_final_nontransfused
FROM `amsterdam-translation.amsterdam_custom.amsterdam_transfusion`
WHERE age IS NOT NULL
AND patientunitstayid IN (
    SELECT
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.admissions`
    WHERE admissioncount = 1
)
AND patientunitstayid NOT IN ( --exclude: any patient who had bleeding 
    SELECT 
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.listitems`
    WHERE LOWER(item) LIKE '%bloeding%'
    OR LOWER(item) LIKE '%bleed%'
    OR LOWER(item) LIKE '%hemorr%'
    OR LOWER(value) LIKE '%bloeding%'
    OR LOWER(value) LIKE '%bleed%'
    OR LOWER(value) LIKE '%hemorr%'
    OR LOWER(item) LIKE '%trauma'
    OR LOWER(value) LIKE '%trauma'
)
AND hgbmin IS NOT NULL 
--AND sofatotal_dayLowestHmg IS NOT NULL
--AND sofatotal_dayAfterLowestHmg IS NOT NULL
AND subgroup_flag = 1 
AND transfusedmarker = 0

## Cohort Breakdown

In [0]:
total = mimic_total_cases.values[0] + eicu_total_cases.values[0] + amsterdam_total_cases.values[0]
exclude_agebelow18 = mimic_exclude_agebelow18.values[0] + eicu_exclude_agebelow18.values[0] + amsterdam_exclude_agebelow18.values[0]
exclude_readmissions = mimic_exclude_readmissions.values[0] + eicu_exclude_readmissions.values[0] + amsterdam_exclude_readmissions.values[0]
exclude_bleeding_trauma_dx = mimic_exclude_bleeding_trauma_dx.values[0] + eicu_exclude_bleeding_trauma_dx.values[0] + amsterdam_exclude_bleeding_trauma_dx.values[0]
exclude_missing_fluids_data = mimic_exclude_missing_fluids_data.values[0] + eicu_exclude_missing_fluids_data.values[0] +  amsterdam_exclude_missing_fluids_data.values[0]
exclude_missing_hemoglobin = mimic_exclude_missing_hemoglobin.values[0] + eicu_exclude_missing_hemoglobin.values[0] + amsterdam_exclude_missing_hemoglobin.values[0]
exclude_hb_6 = mimic_exclude_hb_6.values[0] + eicu_exclude_hb_6.values[0] + amsterdam_exclude_hb_6.values[0]
exclude_missing_sofa_scores = mimic_exclude_missing_sofa_scores.values[0] + eicu_exclude_missing_sofa_scores.values[0] + amsterdam_exclude_missing_sofa_scores.values[0]
exclude_invalid_transfusion_offset = mimic_exclude_invalid_transfusion_offset.values[0] + eicu_exclude_invalid_transfusion_offset.values[0] + amsterdam_exclude_invalid_transfusion_offset.values[0]
final_transfused = mimic_final_transfused.values[0] + eicu_final_transfused.values[0] + amsterdam_final_transfused.values[0]
final_nontransfused = mimic_final_nontransfused.values[0] + eicu_final_nontransfused.values[0] + amsterdam_final_nontransfused.values[0]

print(f'1. combined cohort:')
print(f'2. total cases: {total}')
print(f'3. exclude age < 18 or age unknown (n={total-exclude_agebelow18}): {exclude_agebelow18}')
print(f'4. exclude readmissions (n={exclude_agebelow18-exclude_readmissions}): {exclude_readmissions}')
print(f'5. exclude bleeding diagnosis (n={exclude_readmissions-exclude_bleeding_trauma_dx}): {exclude_bleeding_trauma_dx}')
print(f'6. exclude missing fluids data (n={exclude_bleeding_trauma_dx-exclude_missing_fluids_data}): {exclude_missing_fluids_data}')
print(f'7. exclude missing hemoglobin (n={exclude_missing_fluids_data-exclude_missing_hemoglobin}): {exclude_missing_hemoglobin}')
#print(f'8. exclude missing SOFA scores (n={exclude_missing_hemoglobin-exclude_missing_sofa_scores}: {exclude_missing_sofa_scores}')
#print(f'9. exclude invalid transfusion offset (n={exclude_missing_sofa_scores-exclude_invalid_transfusion_offset}: {exclude_invalid_transfusion_offset}')
print(f'8. exclude hb less than 6 (n={exclude_missing_hemoglobin-exclude_hb_6}): {exclude_hb_6}')
print(f'9. exclude invalid transfusion offset (n={exclude_hb_6-exclude_invalid_transfusion_offset}: {exclude_invalid_transfusion_offset}')
print(f'10. final - transfused: {final_transfused}')
print(f'11. final - not transfused: {final_nontransfused}')

1. combined cohort:
2. total cases: [285563]
3. exclude age < 18 or age unknown (n=[8795]): [276768]
4. exclude readmissions (n=[49033]): [227735]
5. exclude bleeding diagnosis (n=[34396]): [193339]
6. exclude missing fluids data (n=[96101]): [97238]
7. exclude missing hemoglobin (n=[4455]): [92783]
8. exclude hb less than 6 (n=[1215]): [91568]
9. exclude invalid transfusion offset (n=[5937]: [85631]
10. final - transfused: [8565]
11. final - not transfused: [77215]


In [0]:
print(f'1. mimic cohort:')
print(f'2. mimic total cases: {mimic_total_cases.values[0]}')
print(f'3. mimic exclude age < 18 or age unknown (n={mimic_total_cases.values[0]-mimic_exclude_agebelow18.values[0]}): {mimic_exclude_agebelow18.values[0]}')
print(f'4. mimic exclude readmissions (n={mimic_exclude_agebelow18.values[0]-mimic_exclude_readmissions.values[0]}): {mimic_exclude_readmissions.values[0]}')
print(f'5. mimic exclude bleeding diagnosis (n={mimic_exclude_readmissions.values[0]-mimic_exclude_bleeding_trauma_dx.values[0]}): {mimic_exclude_bleeding_trauma_dx.values[0]}')
print(f'6. mimic exclude missing fluids data (n={mimic_exclude_bleeding_trauma_dx.values[0]-mimic_exclude_missing_fluids_data.values[0]}): {mimic_exclude_missing_fluids_data.values[0]}')
print(f'7. mimic exclude missing hemoglobin (n={mimic_exclude_missing_fluids_data.values[0]-mimic_exclude_missing_hemoglobin.values[0]}): {mimic_exclude_missing_hemoglobin.values[0]}')
#print(f'8. mimic exclude missing SOFA scores (n={mimic_exclude_missing_hemoglobin.values[0]-mimic_exclude_missing_sofa_scores.values[0]}: {mimic_exclude_missing_sofa_scores.values[0]}')
#print(f'9. mimic exclude invalid transfusion offset (n={mimic_exclude_missing_sofa_scores.values[0]-mimic_exclude_invalid_transfusion_offset.values[0]}: {mimic_exclude_invalid_transfusion_offset.values[0]}')
print(f'8. exclude hb less than 6 (n={mimic_exclude_missing_hemoglobin.values[0]-mimic_exclude_hb_6.values[0]}): {mimic_exclude_hb_6.values[0]}')
print(f'9. mimic exclude invalid transfusion offset (n={mimic_exclude_missing_hemoglobin.values[0]-mimic_exclude_invalid_transfusion_offset.values[0]}: {mimic_exclude_invalid_transfusion_offset.values[0]}')
print(f'10. mimic final - transfused: {mimic_final_transfused.values[0]}')
print(f'11. mimic final - not transfused: {mimic_final_nontransfused.values[0]}')

1. mimic cohort:
2. mimic total cases: [61532]
3. mimic exclude age < 18 or age unknown (n=[8170]): [53362]
4. mimic exclude readmissions (n=[3637]): [49725]
5. mimic exclude bleeding diagnosis (n=[6110]): [43615]
6. mimic exclude missing fluids data (n=[0]): [43615]
7. mimic exclude missing hemoglobin (n=[1229]): [42386]
8. exclude hb less than 6 (n=[514]): [41872]
9. mimic exclude invalid transfusion offset (n=[4051]: [38335]
10. mimic final - transfused: [7302]
11. mimic final - not transfused: [31033]


In [0]:
print(f'1. eICU cohort:')
print(f'2. eICU total cases: {eicu_total_cases.values[0]}')
print(f'3. eICU exclude age < 18 or age unknown (n={eicu_total_cases.values[0]-eicu_exclude_agebelow18.values[0]}): {eicu_exclude_agebelow18.values[0]}')
print(f'4. eICU exclude readmissions (n={eicu_exclude_agebelow18.values[0]-eicu_exclude_readmissions.values[0]}): {eicu_exclude_readmissions.values[0]}')
print(f'5. eICU exclude bleeding diagnosis (n={eicu_exclude_readmissions.values[0]-eicu_exclude_bleeding_trauma_dx.values[0]}): {eicu_exclude_bleeding_trauma_dx.values[0]}')
print(f'6. eICU exclude missing fluids data (n={eicu_exclude_bleeding_trauma_dx.values[0]-eicu_exclude_missing_fluids_data.values[0]}): {eicu_exclude_missing_fluids_data.values[0]}')
print(f'7. eICU exclude missing hemoglobin (n={eicu_exclude_missing_fluids_data.values[0]-eicu_exclude_missing_hemoglobin.values[0]}): {eicu_exclude_missing_hemoglobin.values[0]}')
#print(f'8. eICU exclude missing SOFA scores (n={eicu_exclude_missing_hemoglobin.values[0]-eicu_exclude_missing_sofa_scores.values[0]}: {eicu_exclude_missing_sofa_scores.values[0]}')
#print(f'9. eICU exclude invalid transfusion offset (n={eicu_exclude_missing_sofa_scores.values[0]-eicu_exclude_invalid_transfusion_offset.values[0]}: {eicu_exclude_invalid_transfusion_offset.values[0]}')
print(f'8. exclude hb less than 6 (n={eicu_exclude_missing_hemoglobin.values[0]-eicu_exclude_hb_6.values[0]}): {eicu_exclude_hb_6.values[0]}')
print(f'9. eICU exclude invalid transfusion offset (n={eicu_exclude_missing_hemoglobin.values[0]-eicu_exclude_invalid_transfusion_offset.values[0]}: {eicu_exclude_invalid_transfusion_offset.values[0]}')
print(f'10. eICU final - transfused: {eicu_final_transfused.values[0]}')
print(f'11. eICU final - not transfused: {eicu_final_nontransfused.values[0]}')

1. eICU cohort:
2. eICU total cases: [200859]
3. eICU exclude age < 18 or age unknown (n=[625]): [200234]
4. eICU exclude readmissions (n=[42351]): [157883]
5. eICU exclude bleeding diagnosis (n=[22502]): [135381]
6. eICU exclude missing fluids data (n=[96101]): [39280]
7. eICU exclude missing hemoglobin (n=[1089]): [38191]
8. exclude hb less than 6 (n=[479]): [37712]
9. eICU exclude invalid transfusion offset (n=[856]: [37335]
10. eICU final - transfused: [744]
11. eICU final - not transfused: [36591]


In [0]:
print(f'1. Amsterdam cohort:')
print(f'2. Amsterdam total cases: {amsterdam_total_cases.values[0]}')
print(f'3. Amsterdam exclude age < 18 or age unknown (n={amsterdam_total_cases.values[0]-amsterdam_exclude_agebelow18.values[0]}): {amsterdam_exclude_agebelow18.values[0]}')
print(f'4. Amsterdam exclude readmissions (n={amsterdam_exclude_agebelow18.values[0]-amsterdam_exclude_readmissions.values[0]}): {amsterdam_exclude_readmissions.values[0]}')
print(f'5. Amsterdam exclude bleeding and trauma diagnosis (n={amsterdam_exclude_readmissions.values[0]-amsterdam_exclude_bleeding_trauma_dx.values[0]}): {amsterdam_exclude_bleeding_trauma_dx.values[0]}')
print(f'6. Amsterdam exclude missing fluids data (n={amsterdam_exclude_bleeding_trauma_dx.values[0]-amsterdam_exclude_missing_fluids_data.values[0]}): {amsterdam_exclude_missing_fluids_data.values[0]}')
print(f'7. Amsterdam exclude missing hemoglobin (n={amsterdam_exclude_missing_fluids_data.values[0]-amsterdam_exclude_missing_hemoglobin.values[0]}): {amsterdam_exclude_missing_hemoglobin.values[0]}')
#print(f'8. Amsterdam exclude missing SOFA scores (n={amsterdam_exclude_missing_hemoglobin.values[0]-amsterdam_exclude_missing_sofa_scores.values[0]}: {amsterdam_exclude_missing_sofa_scores.values[0]}')
#print(f'9. Amsterdam exclude invalid transfusion offset (n={amsterdam_exclude_missing_sofa_scores.values[0]-amsterdam_exclude_invalid_transfusion_offset.values[0]}: {amsterdam_exclude_invalid_transfusion_offset.values[0]}')
print(f'8. exclude hb less than 6 (n={amsterdam_exclude_missing_hemoglobin.values[0]-amsterdam_exclude_hb_6.values[0]}): {amsterdam_exclude_hb_6.values[0]}')
print(f'9. Amsterdam exclude invalid transfusion offset (n={amsterdam_exclude_missing_hemoglobin.values[0]-amsterdam_exclude_invalid_transfusion_offset.values[0]}: {amsterdam_exclude_invalid_transfusion_offset.values[0]}')
print(f'10. Amsterdam final - transfused: {amsterdam_final_transfused.values[0]}')
print(f'11. Amsterdam final - not transfused: {amsterdam_final_nontransfused.values[0]}')

1. Amsterdam cohort:
2. Amsterdam total cases: [23172]
3. Amsterdam exclude age < 18 or age unknown (n=[0]): [23172]
4. Amsterdam exclude readmissions (n=[3045]): [20127]
5. Amsterdam exclude bleeding and trauma diagnosis (n=[5784]): [14343]
6. Amsterdam exclude missing fluids data (n=[0]): [14343]
7. Amsterdam exclude missing hemoglobin (n=[2137]): [12206]
8. exclude hb less than 6 (n=[222]): [11984]
9. Amsterdam exclude invalid transfusion offset (n=[2245]: [9961]
10. Amsterdam final - transfused: [519]
11. Amsterdam final - not transfused: [9591]


# Get & Clean Data (Start Here)

In [0]:
client = bigquery.Client(project='amsterdam-translation')

mimic = client.query("""
SELECT
*
FROM `amsterdam-translation.amsterdam_custom.mimic_transfusion`
WHERE age >= 18
AND first_icu_stay = True
AND icustay_id NOT IN (
    SELECT 
    i.icustay_id
    FROM `physionet-data.mimiciii_clinical.diagnoses_icd` d
    INNER JOIN `physionet-data.mimiciii_clinical.icustays` i
        ON d.hadm_id=i.hadm_id
    WHERE icd9_code IN (
        --ref: https://www.bmj.com/content/bmj/suppl/2015/02/03/bmj.h246.DC1/junm020747.ww1_default.pdf
        --intracranial bleeding
        '430', --Subarachnoid
        '431', --intracerebral
        '4320', --other and unspecified intracranial bleeding
        '4321', --subdural
        '4329', --unspecified intracranial bleeding
        --upper gastrointestinal
        '5310', --acute gastric ulcer with bleeding without obstruction
        '5312', --acute gastric ulcer with bleeding and perforation without obstruction
        '5314', --chronic or unspecified gastric ulcer with bleeding without obstruction
        '5316', --chronic or unspecified gastric ulcer with bleeding and perforation without obstruction
        '5320', --acute duodenal ulcer with bleeding without obstruction
        '5322', --acute duodenal ulcer with bleeding and perforation without obstruction
        '5324', --chronic or unspecified duodenal ulcer with bleeding without obstruction
        '5326', --chronic or unspecified duodenal ulcer with bleeding and perforation without obstruction
        '5330', --acute peptic ulcer of unspecified site with bleeding without obstruction
        '5332', --acute peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5334', --chronic or unspecified peptic ulcer of unspecified site with bleeding without obstruction
        '5336', --chronic or unspecified peptic ulcer of unspecified site with bleeding and perforation without obstruction
        '5340', --acute gastrojejunal ulcer with bleeding without obstruction
        '5342', --acute gastrojejunal ulcer with bleeding and perforation without obstruction
        '5344', --chronic or unspecified gastrojejunal ulcer with bleeding without obstruction
        '5346', --chronic or unspecified gastrojejunal ulcer with bleeding and perforation without obstruction
        '5780', --hematemesis
        '5781', --blood in stool
        '5789', --bleeding of gastrointestinal tract unspecified
        --Lower GI
        '5693', --bleeding of rectum and anus
        --Other Bleeding
        '2878', --other unspecified hemorrhagic conditions
        '2879', --unspecified hemorrhagic conditions
        '5967', --bleeding into bladder wall
        '7848', --bleeding from throat 
        '5997', --hematuria, unspecified
        '6271', --postmenopausal bleeding
        '4590', --bleeding unspecified
        '7191', --hemarthrosis site unspecified 
        '7863', --hemoptysis, unspecified    
        '72992', --nontraumatic hematoma soft tissue
        '9582' --secondary or recurrent following trauma
    )
    OR icd9_code LIKE '900%' --vessel injuries
    OR icd9_code LIKE '901%' --vessel injuries
    OR icd9_code LIKE '902%' --vessel injuries
    OR icd9_code LIKE '903%' --vessel injuries
    OR icd9_code LIKE '904%' --vessel injuries
)
AND hgbmin IS NOT NULL
AND hgbmin >= 6
--AND sofatotal_dayLowestHmg IS NOT NULL
--AND sofatotal_dayAfterLowestHmg IS NOT NULL
AND (transfusedmarker = 0 OR (transfusion_offset - hgbmin_offset) < 2)
AND subgroup_flag = 1 
""").to_dataframe()

eicu = client.query("""
WITH diagnosis AS (
    SELECT * FROM `physionet-data.eicu_crd.diagnosis`
), patient AS ( --shortening the table names for use in bigquery--
    SELECT * FROM `physionet-data.eicu_crd.patient`
), Reliable_ICUs as( --pickup reliable ICUs
    SELECT *
    FROM patient
    WHERE
    (wardID IN(259,261,267,273,285,286,307,317,324,337,338,345,347,362,369,376,377,384,391,394,
    408,413,417,425,428,429,430,431,434,445,464,451,487,489,491,495,498,504,506,512,513,594,601,
    602,607,608,609,611,613,619,622,628,829,831,809,814,840,841,991,876,962,953,966,984,1017,1021,
    1020,1030,1035,1026,1027,1029,1037,1032,1039,1041,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2016)
    OR
    (wardID IN(261,286,307,273,290,285,259,267,384,347,394,317,362,369,337,402,345,413,408,335,377,
    417,391,376,427,428,425,431,430,429,445,464,434,451,491,498,489,506,601,607,609,608,602,619,628,
    622,611,613,809,829,772,831,822,814,841,840,876,983,991,962,966,953,968,1020,1017,1021,1030,1032,
    1027,1035,1037,1026,1025,1048,1053,1043,1087,1074)
    AND hospitaldischargeyear =2015)
    OR
    (wardID IN(261,286,307,290,256,285,273,259,267,347,384,317,394,362,369,402,337,413,408,345,335,377,
    386,364,417,376,391,425,428,431,430,445,451,434,464,489,609,607,601,608,602,619,622,822,829,809,804,
    766,814,888,841,876,953,996,1020,1017,1021,1026,1039,1027,1029,1037,1032,1035,1025,1053,1043,1087,1074)
    AND hospitaldischargeyear =2014)
    OR
    (wardID IN(384,347,317,394,362,369,402,345,337,413,408,386,391,376,698,809,814,888,841,876,1087,1074)
    AND hospitaldischargeyear =2013)
    OR
    (wardID IN(809,831,888,841,876)
    AND hospitaldischargeyear =2012)
    ORDER BY hospitalID
)
SELECT 
*
FROM `amsterdam-translation.amsterdam_custom.eicu_transfusion`
WHERE SAFE_CAST(age AS int64) >= 18
AND unitvisitnumber = 1
AND patientunitstayid NOT IN (
    SELECT DISTINCT patientUnitStayID
    FROM diagnosis
    WHERE (LOWER(diagnosisString) like '%hemorrhage%' 
    OR LOWER(diagnosisString) like '%blood loss%' 
    OR (LOWER(diagnosisString) Like '%bleed%')
    AND NOT LOWER(diagnosisString) Like '%bleeding and red blood cell disorders%')
    OR LOWER(diagnosisstring) LIKE '%trauma%' 
)
AND patientunitstayid IN (
    SELECT DISTINCT patientunitstayid FROM Reliable_ICUs
)
AND hgbmin IS NOT NULL
AND hgbmin >= 6
--AND sofatotal_dayLowestHmg IS NOT NULL
--AND sofatotal_dayAfterLowestHmg IS NOT NULL
AND (transfusedmarker = 0 OR (transfusion_offset - hgbmin_offset) < 2)
AND subgroup_flag = 1 
""").to_dataframe()

amsterdam = client.query("""
SELECT 
*
FROM `amsterdam-translation.amsterdam_custom.amsterdam_transfusion`
WHERE age IS NOT NULL
AND patientunitstayid IN (
    SELECT
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.admissions`
    WHERE admissioncount = 1
)
AND patientunitstayid NOT IN ( --exclude: any patient who had bleeding 
    SELECT 
    DISTINCT(admissionid)
    FROM `physionet-data.amsterdamdb.listitems`
    WHERE LOWER(item) LIKE '%bloeding%'
    OR LOWER(item) LIKE '%bleed%'
    OR LOWER(item) LIKE '%hemorr%'
    OR LOWER(value) LIKE '%bloeding%'
    OR LOWER(value) LIKE '%bleed%'
    OR LOWER(value) LIKE '%hemorr%'
    OR LOWER(item) LIKE '%trauma'
    OR LOWER(value) LIKE '%trauma'
)
AND hgbmin IS NOT NULL 
AND hgbmin >= 6
--AND sofatotal_dayLowestHmg IS NOT NULL
--AND sofatotal_dayAfterLowestHmg IS NOT NULL
AND (transfusedmarker = 0 OR (transfusion_offset - hgbmin_offset) < 2)
AND subgroup_flag = 1 
""").to_dataframe()

selected_columns = ['age',
                    'gender',
                    'hgbmin',
                    'transfusioncount',
                    'ventmarker',
                    'septicflag',
                    'pvdflag',
                    'ihdflag',
                    'chfflag',
                    'esrfflag',
                    'surgeryflag',
                    'vasopressor',
                    'icu_mort',
                    'expiremarker',
                    'sofatotal_dayLowestHmg',
                    'sofatotal_dayAfterLowestHmg',
                    'transfusedmarker',
                    'subgroup_flag',
                    'hgbmin_offset',
                    'transfusion_offset',
                    'death_offset']

def delta_sofa_hb_grouper(df):
    df['sofatotal_dayLowestHmg_quantile'] = pd.cut(df['sofatotal_dayLowestHmg'],3,labels=False)
    df['sofatotal_dayAfterLowestHmg_quantile'] = pd.cut(df['sofatotal_dayAfterLowestHmg'],3,labels=False) 

    def sofa_categorizer(a,b):
        if b > a:
            return 1
        elif (a == 1 or a == 2) and b == a:
            return 1
        elif (a == 0 and b == 0) or b < a:
            return 0
        else:
            return np.nan

    df['delta_sofa'] = df[['sofatotal_dayLowestHmg_quantile','sofatotal_dayAfterLowestHmg_quantile']].apply(lambda row: sofa_categorizer(row[0],row[1]),axis=1)

    def hb_grouper(hb):
        if hb <6:
            return 'Less than 6'
        elif hb >=6 and hb <7:
            return '6 to 7'
        elif hb >= 7 and hb <8:
            return '7 to 8'
        elif hb >= 8 and hb <9:
            return '8 to 9'
        elif hb >= 9 and hb <10:
            return '9 to 10'
        elif hb >= 10:
            return '10 or more'
        else:
            return np.nan
    
    df['hb_group'] = df['hgbmin'].apply(lambda row: hb_grouper(row))

    return df

def age_groups(age):
    if age >= 18 and age <= 39:
        return '18-39'
    elif age >= 40 and age <= 49:
        return '40-49'
    elif age >= 50 and age <= 59:
        return '50-59'
    elif age >= 60 and age <= 69:
        return '60-69'
    elif age >= 70 and age <= 79:
        return '70-79'
    elif age >= 80:
        return '80+'
    else:
        return np.nan

mimic = delta_sofa_hb_grouper(mimic[selected_columns])
mimic['gender'] = mimic['gender'].replace({'F':'Female','M':'Male'})
mimic['age'] = mimic['age'].astype('int64')
mimic['age'] = mimic['age'].apply(lambda row: age_groups(row))

eicu = delta_sofa_hb_grouper(eicu[selected_columns])
eicu['age'] = eicu['age'].astype('int64')
eicu['age'] = eicu['age'].apply(lambda row: age_groups(row))

amsterdam = delta_sofa_hb_grouper(amsterdam[selected_columns])

# Descriptive Statistics (Table 1)

## Combined

### Combined - overall

In [0]:
d = pd.concat([mimic,eicu,amsterdam])
numeric = ['hgbmin','transfusioncount','sofatotal_dayLowestHmg','sofatotal_dayAfterLowestHmg','hgbmin_offset','transfusion_offset','death_offset']
categorical = ['age','gender','vasopressor','ventmarker','septicflag','pvdflag','ihdflag','chfflag','esrfflag','surgeryflag','icu_mort','expiremarker','hb_group','delta_sofa']
t1 = TableOne(data=d, columns=categorical+numeric, categorical=categorical)
print(f'Combined, overall (n={len(d)})')
display(HTML(t1.tabulate(tablefmt="html")))

Combined, overall (n=85631)


Unnamed: 0,Unnamed: 1,Missing,Overall
n,,,85631
age,18-39,0.0,9168 (10.7)
,40-49,,8538 (10.0)
,50-59,,15086 (17.6)
,60-69,,19003 (22.2)
,70-79,,18011 (21.0)
,80+,,15825 (18.5)
gender,,10.0,187 (0.2)
,Female,,37813 (44.2)
,Male,,47621 (55.6)


### Combined - stratified by transfusion

In [0]:
d = pd.concat([mimic,eicu,amsterdam])
numeric = ['hgbmin','transfusioncount','sofatotal_dayLowestHmg','sofatotal_dayAfterLowestHmg','hgbmin_offset','transfusion_offset','death_offset']
categorical = ['age','gender','vasopressor','ventmarker','septicflag','pvdflag','ihdflag','chfflag','esrfflag','surgeryflag','icu_mort','expiremarker','hb_group','delta_sofa']
t1 = TableOne(data=d, columns=categorical+numeric, categorical=categorical, groupby='transfusedmarker',pval=True)
print(f'Combined, stratified by Transfused (n={len(d)})')
display(HTML(t1.tabulate(tablefmt="html")))

Combined, stratified by Transfused (n=85631)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,77066,8565,,
age,18-39,0.0,8675 (11.3),493 (5.8),<0.001,Chi-squared
,40-49,,7860 (10.2),678 (7.9),,
,50-59,,13796 (17.9),1290 (15.1),,
,60-69,,17054 (22.1),1949 (22.8),,
,70-79,,15814 (20.5),2197 (25.7),,
,80+,,13867 (18.0),1958 (22.9),,
gender,,10.0,176 (0.2),11 (0.1),<0.001,Chi-squared
,Female,,33765 (43.8),4048 (47.3),,
,Male,,43115 (56.0),4506 (52.6),,


### Combined - stratified by Hb group

In [0]:
data = pd.concat([mimic,eicu,amsterdam])
for grp in list(reversed(data['hb_group'].value_counts().index.values.tolist())):
    d = data[data.hb_group == grp]
    numeric = ['hgbmin','transfusioncount','sofatotal_dayLowestHmg','sofatotal_dayAfterLowestHmg','hgbmin_offset','transfusion_offset','death_offset']
    categorical = ['age','gender','vasopressor','ventmarker','septicflag','pvdflag','ihdflag','chfflag','esrfflag','surgeryflag','icu_mort','expiremarker','hb_group','delta_sofa']
    t1 = TableOne(data=d, columns=categorical+numeric, categorical=categorical, groupby='transfusedmarker',pval=True)
    print(f'Combined {grp}, stratified by Transfused (n={len(d)})')
    display(HTML(t1.tabulate(tablefmt="html")))
    print()
    print()
    print()

Combined 6 to 7, stratified by Transfused (n=3366)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,1893,1473,,
age,18-39,0.0,181 (9.6),145 (9.8),0.655,Chi-squared
,40-49,,166 (8.8),146 (9.9),,
,50-59,,331 (17.5),274 (18.6),,
,60-69,,446 (23.6),327 (22.2),,
,70-79,,448 (23.7),327 (22.2),,
,80+,,321 (17.0),254 (17.2),,
gender,,0.0,2 (0.1),2 (0.1),0.294,Chi-squared (warning: expected count < 5)
,Female,,993 (52.5),733 (49.8),,
,Male,,898 (47.4),738 (50.1),,





Combined 7 to 8, stratified by Transfused (n=8500)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,5732,2768,,
age,18-39,0.0,507 (8.8),176 (6.4),<0.001,Chi-squared
,40-49,,509 (8.9),241 (8.7),,
,50-59,,964 (16.8),435 (15.7),,
,60-69,,1355 (23.6),622 (22.5),,
,70-79,,1367 (23.8),705 (25.5),,
,80+,,1030 (18.0),589 (21.3),,
gender,,0.0,11 (0.2),5 (0.2),0.049,Chi-squared
,Female,,2999 (52.3),1370 (49.5),,
,Male,,2722 (47.5),1393 (50.3),,





Combined 8 to 9, stratified by Transfused (n=13316)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,10544,2772,,
age,18-39,0.0,837 (7.9),114 (4.1),<0.001,Chi-squared
,40-49,,859 (8.1),186 (6.7),,
,50-59,,1788 (17.0),389 (14.0),,
,60-69,,2520 (23.9),632 (22.8),,
,70-79,,2457 (23.3),767 (27.7),,
,80+,,2083 (19.8),684 (24.7),,
gender,,0.0,35 (0.3),3 (0.1),<0.001,Chi-squared
,Female,,5266 (49.9),1252 (45.2),,
,Male,,5243 (49.7),1517 (54.7),,





Combined 9 to 10, stratified by Transfused (n=15422)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,14171,1251,,
age,18-39,0.0,1117 (7.9),44 (3.5),<0.001,Chi-squared
,40-49,,1146 (8.1),80 (6.4),,
,50-59,,2318 (16.4),148 (11.8),,
,60-69,,3356 (23.7),305 (24.4),,
,70-79,,3298 (23.3),331 (26.5),,
,80+,,2936 (20.7),343 (27.4),,
gender,,2.0,58 (0.4),1 (0.1),0.021,Chi-squared (warning: expected count < 5)
,Female,,6736 (47.5),558 (44.6),,
,Male,,7375 (52.1),692 (55.3),,





Combined 10 or more, stratified by Transfused (n=45027)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,44726,301,,
age,18-39,0.0,6033 (13.5),14 (4.7),<0.001,Chi-squared
,40-49,,5180 (11.6),25 (8.3),,
,50-59,,8395 (18.8),44 (14.6),,
,60-69,,9377 (21.0),63 (20.9),,
,70-79,,8244 (18.4),67 (22.3),,
,80+,,7497 (16.8),88 (29.2),,
gender,,8.0,70 (0.2),,0.161,Chi-squared (warning: expected count < 5)
,Female,,17771 (39.7),135 (44.9),,
,Male,,26877 (60.1),166 (55.1),,







## MIMIC

### MIMIC - overall

In [0]:
d = mimic
numeric = ['hgbmin','transfusioncount','sofatotal_dayLowestHmg','sofatotal_dayAfterLowestHmg','hgbmin_offset','transfusion_offset','death_offset']
categorical = ['age','gender','vasopressor','ventmarker','septicflag','pvdflag','ihdflag','chfflag','esrfflag','surgeryflag','icu_mort','expiremarker','hb_group','delta_sofa']
t1 = TableOne(data=d, columns=categorical+numeric, categorical=categorical)
print(f'MIMIC, overall (n={len(d)})')
display(HTML(t1.tabulate(tablefmt="html")))

MIMIC, overall (n=38335)


Unnamed: 0,Unnamed: 1,Missing,Overall
n,,,38335
age,18-39,0.0,4014 (10.5)
,40-49,,4081 (10.6)
,50-59,,6644 (17.3)
,60-69,,8093 (21.1)
,70-79,,7605 (19.8)
,80+,,7898 (20.6)
gender,Female,0.0,16899 (44.1)
,Male,,21436 (55.9)
vasopressor,0,0.0,26630 (69.5)


### MIMIC - stratified by transfusion

In [0]:
d = mimic.copy()
numeric = ['hgbmin','transfusioncount','sofatotal_dayLowestHmg','sofatotal_dayAfterLowestHmg','hgbmin_offset','transfusion_offset','death_offset']
categorical = ['age','gender','vasopressor','ventmarker','septicflag','pvdflag','ihdflag','chfflag','esrfflag','surgeryflag','icu_mort','expiremarker','hb_group','delta_sofa']
t1 = TableOne(data=d, columns=categorical+numeric, categorical=categorical, groupby='transfusedmarker',pval=True)
print(f'MIMIC, stratified by Transfused (n={len(d)})')
display(HTML(t1.tabulate(tablefmt="html")))

MIMIC, stratified by Transfused (n=38335)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,31033,7302,,
age,18-39,0.0,3616 (11.7),398 (5.5),<0.001,Chi-squared
,40-49,,3485 (11.2),596 (8.2),,
,50-59,,5556 (17.9),1088 (14.9),,
,60-69,,6449 (20.8),1644 (22.5),,
,70-79,,5774 (18.6),1831 (25.1),,
,80+,,6153 (19.8),1745 (23.9),,
gender,Female,0.0,13433 (43.3),3466 (47.5),<0.001,Chi-squared
,Male,,17600 (56.7),3836 (52.5),,
vasopressor,0,0.0,23250 (74.9),3380 (46.3),<0.001,Chi-squared


### MIMIC - stratified by Hb group

In [0]:
data = mimic.copy()
for grp in list(reversed(data['hb_group'].value_counts().index.values.tolist())):
    d = data[data.hb_group == grp]
    numeric = ['hgbmin','transfusioncount','sofatotal_dayLowestHmg','sofatotal_dayAfterLowestHmg','hgbmin_offset','transfusion_offset','death_offset']
    categorical = ['age','gender','vasopressor','ventmarker','septicflag','pvdflag','ihdflag','chfflag','esrfflag','surgeryflag','icu_mort','expiremarker','hb_group','delta_sofa']
    t1 = TableOne(data=d, columns=categorical+numeric, categorical=categorical, groupby='transfusedmarker',pval=True)
    print(f'MIMIC {grp}, stratified by Transfused (n={len(d)})')
    display(HTML(t1.tabulate(tablefmt="html")))
    print()
    print()
    print()

MIMIC 6 to 7, stratified by Transfused (n=1341)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,375,966,,
age,18-39,0.0,44 (11.7),89 (9.2),0.427,Chi-squared
,40-49,,51 (13.6),106 (11.0),,
,50-59,,53 (14.1),160 (16.6),,
,60-69,,78 (20.8),206 (21.3),,
,70-79,,76 (20.3),216 (22.4),,
,80+,,73 (19.5),189 (19.6),,
gender,Female,0.0,200 (53.3),470 (48.7),0.140,Chi-squared
,Male,,175 (46.7),496 (51.3),,
vasopressor,0,0.0,235 (62.7),497 (51.4),<0.001,Chi-squared





MIMIC 7 to 8, stratified by Transfused (n=4281)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,1959,2322,,
age,18-39,0.0,206 (10.5),150 (6.5),<0.001,Chi-squared
,40-49,,208 (10.6),215 (9.3),,
,50-59,,325 (16.6),380 (16.4),,
,60-69,,437 (22.3),513 (22.1),,
,70-79,,379 (19.3),559 (24.1),,
,80+,,404 (20.6),505 (21.7),,
gender,Female,0.0,1029 (52.5),1167 (50.3),0.147,Chi-squared
,Male,,930 (47.5),1155 (49.7),,
vasopressor,0,0.0,1218 (62.2),1004 (43.2),<0.001,Chi-squared





MIMIC 8 to 9, stratified by Transfused (n=7329)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,4785,2544,,
age,18-39,0.0,399 (8.3),105 (4.1),<0.001,Chi-squared
,40-49,,448 (9.4),176 (6.9),,
,50-59,,848 (17.7),367 (14.4),,
,60-69,,1064 (22.2),580 (22.8),,
,70-79,,1032 (21.6),684 (26.9),,
,80+,,994 (20.8),632 (24.8),,
gender,Female,0.0,2430 (50.8),1159 (45.6),<0.001,Chi-squared
,Male,,2355 (49.2),1385 (54.4),,
vasopressor,0,0.0,3031 (63.3),1099 (43.2),<0.001,Chi-squared





MIMIC 9 to 10, stratified by Transfused (n=7769)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,6584,1185,,
age,18-39,0.0,513 (7.8),41 (3.5),<0.001,Chi-squared
,40-49,,591 (9.0),75 (6.3),,
,50-59,,1095 (16.6),140 (11.8),,
,60-69,,1448 (22.0),287 (24.2),,
,70-79,,1412 (21.4),309 (26.1),,
,80+,,1525 (23.2),333 (28.1),,
gender,Female,0.0,3118 (47.4),540 (45.6),0.270,Chi-squared
,Male,,3466 (52.6),645 (54.4),,
vasopressor,0,0.0,4471 (67.9),607 (51.2),<0.001,Chi-squared





MIMIC 10 or more, stratified by Transfused (n=17615)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,17330,285,,
age,18-39,0.0,2454 (14.2),13 (4.6),<0.001,Chi-squared
,40-49,,2187 (12.6),24 (8.4),,
,50-59,,3235 (18.7),41 (14.4),,
,60-69,,3422 (19.7),58 (20.4),,
,70-79,,2875 (16.6),63 (22.1),,
,80+,,3157 (18.2),86 (30.2),,
gender,Female,0.0,6656 (38.4),130 (45.6),0.016,Chi-squared
,Male,,10674 (61.6),155 (54.4),,
vasopressor,0,0.0,14295 (82.5),173 (60.7),<0.001,Chi-squared







## eICU

### eICU - overall

In [0]:
d = eicu
numeric = ['hgbmin','transfusioncount','sofatotal_dayLowestHmg','sofatotal_dayAfterLowestHmg','hgbmin_offset','transfusion_offset','death_offset']
categorical = ['age','gender','vasopressor','ventmarker','septicflag','pvdflag','ihdflag','chfflag','esrfflag','surgeryflag','icu_mort','expiremarker','hb_group','delta_sofa']
t1 = TableOne(data=d, columns=categorical+numeric, categorical=categorical)
print(f'eICU, overall (n={len(d)})')
display(HTML(t1.tabulate(tablefmt="html")))

eICU, overall (n=37335)


Unnamed: 0,Unnamed: 1,Missing,Overall
n,,,37335
age,18-39,0.0,3833 (10.3)
,40-49,,3341 (8.9)
,50-59,,6667 (17.9)
,60-69,,8412 (22.5)
,70-79,,8160 (21.9)
,80+,,6922 (18.5)
gender,Female,10.0,17160 (46.0)
,Male,,20165 (54.0)
vasopressor,0,0.0,30754 (82.4)


### eICU - stratified by transfusion

In [0]:
d = eicu.copy()
numeric = ['hgbmin','transfusioncount','sofatotal_dayLowestHmg','sofatotal_dayAfterLowestHmg','hgbmin_offset','transfusion_offset','death_offset']
categorical = ['age','gender','vasopressor','ventmarker','septicflag','pvdflag','ihdflag','chfflag','esrfflag','surgeryflag','icu_mort','expiremarker','hb_group','delta_sofa']
t1 = TableOne(data=d, columns=categorical+numeric, categorical=categorical, groupby='transfusedmarker',pval=True)
print(f'eICU, stratified by Transfused (n={len(d)})')
display(HTML(t1.tabulate(tablefmt="html")))

eICU, stratified by Transfused (n=37335)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,36591,744,,
age,18-39,0.0,3775 (10.3),58 (7.8),0.011,Chi-squared
,40-49,,3288 (9.0),53 (7.1),,
,50-59,,6528 (17.8),139 (18.7),,
,60-69,,8239 (22.5),173 (23.3),,
,70-79,,7966 (21.8),194 (26.1),,
,80+,,6795 (18.6),127 (17.1),,
gender,Female,10.0,16785 (45.9),375 (50.4),0.016,Chi-squared
,Male,,19796 (54.1),369 (49.6),,
vasopressor,0,0.0,30387 (83.0),367 (49.3),<0.001,Chi-squared


### eICU - stratified by Hb group

In [0]:
data = eicu.copy()
for grp in list(reversed(data['hb_group'].value_counts().index.values.tolist())):
    d = data[data.hb_group == grp]
    numeric = ['hgbmin','transfusioncount','sofatotal_dayLowestHmg','sofatotal_dayAfterLowestHmg','hgbmin_offset','transfusion_offset','death_offset']
    categorical = ['age','gender','vasopressor','ventmarker','septicflag','pvdflag','ihdflag','chfflag','esrfflag','surgeryflag','icu_mort','expiremarker','hb_group','delta_sofa']
    t1 = TableOne(data=d, columns=categorical+numeric, categorical=categorical, groupby='transfusedmarker',pval=True)
    print(f'eICU {grp}, stratified by Transfused (n={len(d)})')
    display(HTML(t1.tabulate(tablefmt="html")))
    print()
    print()
    print()

eICU 6 to 7, stratified by Transfused (n=1729)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,1339,390,,
age,18-39,0.0,97 (7.2),41 (10.5),0.011,Chi-squared
,40-49,,102 (7.6),30 (7.7),,
,50-59,,250 (18.7),92 (23.6),,
,60-69,,331 (24.7),97 (24.9),,
,70-79,,327 (24.4),85 (21.8),,
,80+,,232 (17.3),45 (11.5),,
gender,Female,0.0,691 (51.6),205 (52.6),0.783,Chi-squared
,Male,,648 (48.4),185 (47.4),,
vasopressor,0,0.0,850 (63.5),191 (49.0),<0.001,Chi-squared





eICU 7 to 8, stratified by Transfused (n=3423)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,3182,241,,
age,18-39,0.0,203 (6.4),14 (5.8),0.101,Chi-squared
,40-49,,241 (7.6),17 (7.1),,
,50-59,,530 (16.7),31 (12.9),,
,60-69,,773 (24.3),47 (19.5),,
,70-79,,859 (27.0),76 (31.5),,
,80+,,576 (18.1),56 (23.2),,
gender,Female,0.0,1665 (52.3),119 (49.4),0.414,Chi-squared
,Male,,1517 (47.7),122 (50.6),,
vasopressor,0,0.0,2141 (67.3),119 (49.4),<0.001,Chi-squared





eICU 8 to 9, stratified by Transfused (n=4536)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,4455,81,,
age,18-39,0.0,300 (6.7),3 (3.7),0.661,Chi-squared
,40-49,,291 (6.5),5 (6.2),,
,50-59,,727 (16.3),10 (12.3),,
,60-69,,1100 (24.7),19 (23.5),,
,70-79,,1093 (24.5),23 (28.4),,
,80+,,944 (21.2),21 (25.9),,
gender,Female,0.0,2261 (50.8),40 (49.4),0.895,Chi-squared
,Male,,2194 (49.2),41 (50.6),,
vasopressor,0,0.0,3242 (72.8),40 (49.4),<0.001,Chi-squared





eICU 9 to 10, stratified by Transfused (n=5276)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,5258,18,,
age,18-39,0.0,405 (7.7),,0.543,Chi-squared (warning: expected count < 5)
,40-49,,349 (6.6),,,
,50-59,,839 (16.0),3 (16.7),,
,60-69,,1257 (23.9),6 (33.3),,
,70-79,,1278 (24.3),6 (33.3),,
,80+,,1130 (21.5),3 (16.7),,
gender,Female,2.0,2758 (52.5),6 (33.3),0.165,Chi-squared
,Male,,2498 (47.5),12 (66.7),,
vasopressor,0,0.0,4068 (77.4),10 (55.6),0.043,Fisher's exact





eICU 10 or more, stratified by Transfused (n=22371)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,22357,14,,
age,18-39,0.0,2770 (12.4),,0.715,Chi-squared (warning: expected count < 5)
,40-49,,2305 (10.3),1 (7.1),,
,50-59,,4182 (18.7),3 (21.4),,
,60-69,,4778 (21.4),4 (28.6),,
,70-79,,4409 (19.7),4 (28.6),,
,80+,,3913 (17.5),2 (14.3),,
gender,Female,8.0,9410 (42.1),5 (35.7),0.831,Chi-squared
,Male,,12939 (57.9),9 (64.3),,
vasopressor,0,0.0,20086 (89.8),7 (50.0),<0.001,Fisher's exact







## Amsterdam

### Amsterdam - overall

In [0]:
d = amsterdam
numeric = ['hgbmin','transfusioncount','sofatotal_dayLowestHmg','sofatotal_dayAfterLowestHmg','hgbmin_offset','transfusion_offset','death_offset']
categorical = ['age','gender','vasopressor','ventmarker','septicflag','pvdflag','ihdflag','chfflag','esrfflag','surgeryflag','icu_mort','expiremarker','hb_group','delta_sofa']
t1 = TableOne(data=d, columns=numeric+categorical, categorical=categorical)
print(f'AUMC database, overall (n={len(d)})')
display(HTML(t1.tabulate(tablefmt="html")))

AUMC database, overall (n=9961)


Unnamed: 0,Unnamed: 1,Missing,Overall
n,,,9961
hgbmin,,0.0,10.3 (2.0)
transfusioncount,,0.0,0.1 (0.7)
sofatotal_dayLowestHmg,,7664.0,6.1 (2.3)
sofatotal_dayAfterLowestHmg,,8541.0,6.3 (2.2)
hgbmin_offset,,0.0,1.6 (2.8)
transfusion_offset,,9442.0,2.6 (5.5)
death_offset,,6591.0,628.4 (946.0)
age,18-39,0.0,1321 (13.3)
,40-49,,1116 (11.2)


### Amsterdam - stratified by transfusion

In [0]:
d = amsterdam.copy()
numeric = ['hgbmin','transfusioncount','sofatotal_dayLowestHmg','sofatotal_dayAfterLowestHmg','hgbmin_offset','transfusion_offset','death_offset']
categorical = ['age','gender','vasopressor','ventmarker','septicflag','pvdflag','ihdflag','chfflag','esrfflag','surgeryflag','icu_mort','expiremarker','hb_group','delta_sofa']
t1 = TableOne(data=d, columns=categorical+numeric, categorical=categorical, groupby='transfusedmarker',pval=True)
print(f'Amsterdam, stratified by Transfused (n={len(d)})')
display(HTML(t1.tabulate(tablefmt="html")))

Amsterdam, stratified by Transfused (n=9961)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,9442,519,,
age,18-39,0.0,1284 (13.6),37 (7.1),<0.001,Chi-squared
,40-49,,1087 (11.5),29 (5.6),,
,50-59,,1712 (18.1),63 (12.1),,
,60-69,,2366 (25.1),132 (25.4),,
,70-79,,2074 (22.0),172 (33.1),,
,80+,,919 (9.7),86 (16.6),,
gender,,0.0,176 (1.9),11 (2.1),0.494,Chi-squared
,Female,,3547 (37.6),207 (39.9),,
,Male,,5719 (60.6),301 (58.0),,


### Amsterdam by Hb groups

In [0]:
data = amsterdam.copy()
for grp in list(reversed(data['hb_group'].value_counts().index.values.tolist())):
    d = data[data.hb_group == grp]
    numeric = ['hgbmin','transfusioncount','sofatotal_dayLowestHmg','sofatotal_dayAfterLowestHmg','hgbmin_offset','transfusion_offset','death_offset']
    categorical = ['age','gender','vasopressor','ventmarker','septicflag','pvdflag','ihdflag','chfflag','esrfflag','surgeryflag','icu_mort','expiremarker','hb_group','delta_sofa']
    t1 = TableOne(data=d, columns=categorical+numeric, categorical=categorical, groupby='transfusedmarker',pval=True)
    print(f'Amsterdam {grp}, stratified by Transfused (n={len(d)})')
    display(HTML(t1.tabulate(tablefmt="html")))
    print()
    print()
    print()

Amsterdam 6 to 7, stratified by Transfused (n=296)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,179,117,,
age,18-39,0.0,40 (22.3),15 (12.8),0.148,Chi-squared
,40-49,,13 (7.3),10 (8.5),,
,50-59,,28 (15.6),22 (18.8),,
,60-69,,37 (20.7),24 (20.5),,
,70-79,,45 (25.1),26 (22.2),,
,80+,,16 (8.9),20 (17.1),,
gender,,0.0,2 (1.1),2 (1.7),0.440,Chi-squared (warning: expected count < 5)
,Female,,102 (57.0),58 (49.6),,
,Male,,75 (41.9),57 (48.7),,





Amsterdam 7 to 8, stratified by Transfused (n=796)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,591,205,,
age,18-39,0.0,98 (16.6),12 (5.9),<0.001,Chi-squared
,40-49,,60 (10.2),9 (4.4),,
,50-59,,109 (18.4),24 (11.7),,
,60-69,,145 (24.5),62 (30.2),,
,70-79,,129 (21.8),70 (34.1),,
,80+,,50 (8.5),28 (13.7),,
gender,,0.0,11 (1.9),5 (2.4),0.032,Chi-squared (warning: expected count < 5)
,Female,,305 (51.6),84 (41.0),,
,Male,,275 (46.5),116 (56.6),,





Amsterdam 8 to 9, stratified by Transfused (n=1451)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,1304,147,,
age,18-39,0.0,138 (10.6),6 (4.1),<0.001,Chi-squared
,40-49,,120 (9.2),5 (3.4),,
,50-59,,213 (16.3),12 (8.2),,
,60-69,,356 (27.3),33 (22.4),,
,70-79,,332 (25.5),60 (40.8),,
,80+,,145 (11.1),31 (21.1),,
gender,,0.0,35 (2.7),3 (2.0),0.134,Chi-squared (warning: expected count < 5)
,Female,,575 (44.1),53 (36.1),,
,Male,,694 (53.2),91 (61.9),,





Amsterdam 9 to 10, stratified by Transfused (n=2377)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,2329,48,,
age,18-39,0.0,199 (8.5),3 (6.2),0.729,Chi-squared (warning: expected count < 5)
,40-49,,206 (8.8),5 (10.4),,
,50-59,,384 (16.5),5 (10.4),,
,60-69,,651 (28.0),12 (25.0),,
,70-79,,608 (26.1),16 (33.3),,
,80+,,281 (12.1),7 (14.6),,
gender,,0.0,58 (2.5),1 (2.1),0.220,Chi-squared (warning: expected count < 5)
,Female,,860 (36.9),12 (25.0),,
,Male,,1411 (60.6),35 (72.9),,





Amsterdam 10 or more, stratified by Transfused (n=5041)


Unnamed: 0,Unnamed: 1,Missing,0,1,P-Value,Test
n,,,5039,2,,
age,18-39,0.0,809 (16.1),1 (50.0),0.661,Chi-squared (warning: expected count < 5)
,40-49,,688 (13.7),,,
,50-59,,978 (19.4),,,
,60-69,,1177 (23.4),1 (50.0),,
,70-79,,960 (19.1),,,
,80+,,427 (8.5),,,
gender,,0.0,70 (1.4),,0.581,Chi-squared (warning: expected count < 5)
,Female,,1705 (33.8),,,
,Male,,3264 (64.8),2 (100.0),,







# Encoding and Imputation

In [0]:
combined_results = {'combined':{'primary':{},'secondary':{}},'eicu':{'primary':{},'secondary':{}},'aumc':{'primary':{},'secondary':{}}}

# Common Functions
# To maintain consistency, we use age groups for both database analyses instead of numeric age
def onehot_categories(df,column,prefix,prefix_sep,drop_first=True):
    onehot = pd.get_dummies(df[[column]],prefix=prefix,prefix_sep=prefix_sep,drop_first=drop_first)
    df = pd.merge(df,onehot,left_index=True,right_index=True)
    return df

covariates = ['age_40-49','age_50-59','age_60-69','age_70-79','age_80+','gender', 'sofatotal_dayLowestHmg', 'hgbmin','ihdflag', 'septicflag', 'surgeryflag', 'ventmarker', 'vasopressor']
psm_outcome = ['transfusedmarker']
primary_outcome = ['expiremarker']
secondary_outcome = ['delta_sofa']

### MIMIC

In [0]:
# Label encode: 1 = Male, 0 = Female
mimic['gender'] = mimic['gender'].replace({'Female':0,'Male':1})

# Encode age 
mimic = onehot_categories(mimic,column='age',prefix='age',prefix_sep='_',drop_first=False)

# Reset the index
mimic = mimic.reset_index(drop=True)

#Save the data
mimic.to_csv('mimic.csv',index=False)

In [0]:
mimic[covariates]

Unnamed: 0,age_40-49,age_50-59,age_60-69,age_70-79,age_80+,gender,sofatotal_dayLowestHmg,hgbmin,ihdflag,septicflag,surgeryflag,ventmarker,vasopressor
0,0,0,0,0,1,0,,6.0,0,1,0,0,0
1,0,0,0,1,0,0,6.0,6.0,0,1,0,0,1
2,1,0,0,0,0,0,,6.0,0,1,0,0,0
3,0,0,0,1,0,0,4.0,6.0,0,0,1,1,1
4,1,0,0,0,0,0,,6.0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
38330,1,0,0,0,0,0,4.0,6.7,0,1,1,1,1
38331,1,0,0,0,0,1,0.0,7.4,0,1,0,0,1
38332,1,0,0,0,0,0,2.0,6.5,0,1,0,0,1
38333,1,0,0,0,0,1,1.0,6.8,0,1,1,0,1


### eICU

In [0]:
# There are cases missing gender in eICU - impute stratified by Hb group
group = eicu['hb_group'][eicu['gender'].isnull()].values[0]
value = eicu['gender'][eicu['hb_group'] == group].mode().values[0]
eicu['gender'][eicu['gender'].isnull()] = value

# Label encode: 1 = Male, 0 = Female
eicu['gender'] = eicu['gender'].replace({'Female':0,'Male':1})

# Encode age 
eicu = onehot_categories(eicu,column='age',prefix='age',prefix_sep='_',drop_first=False)

# Reset the index
eicu = eicu.reset_index(drop=True)

# Save the file
eicu.to_csv('eicu.csv',index=False)

In [0]:
eicu[covariates]

Unnamed: 0,age_40-49,age_50-59,age_60-69,age_70-79,age_80+,gender,sofatotal_dayLowestHmg,hgbmin,ihdflag,septicflag,surgeryflag,ventmarker,vasopressor
0,0,0,0,0,0,1,0,6.0,0,0,0,0,0
1,0,0,1,0,0,0,0,6.5,0,0,0,0,0
2,0,0,1,0,0,1,0,7.0,0,0,1,1,0
3,1,0,0,0,0,0,0,7.0,0,0,0,0,0
4,0,0,1,0,0,1,0,7.0,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
37330,0,0,0,0,1,1,3,8.8,0,1,0,1,1
37331,0,0,0,1,0,1,4,12.3,0,0,0,1,1
37332,0,0,1,0,0,1,6,16.2,1,1,0,1,1
37333,0,0,1,0,0,0,3,8.3,0,0,0,1,1


### Amsterdam

In [0]:
# There are cases with missing gender in Amsterdam coded as '' - impute stratified by Hb group
group = amsterdam['hb_group'][amsterdam['gender'].isin(['Female','Male'])==False].values[0]
value = amsterdam['gender'][amsterdam['hb_group'] == group].mode().values[0]
amsterdam['gender'][amsterdam['gender'].isin(['Female','Male'])==False] = value

# Label encode: 1 = Male, 0 = Female
amsterdam['gender'] = amsterdam['gender'].replace({'Female':0,'Male':1})

# Encode age 
amsterdam = onehot_categories(amsterdam ,column='age',prefix='age',prefix_sep='_',drop_first=False)

# Reset the index
amsterdam = amsterdam.reset_index(drop=True)

# Save the file
amsterdam.to_csv('aumc.csv',index=False)

### Combined

In [0]:
combined = pd.concat([mimic,eicu,amsterdam])
combined = combined.reset_index(drop=True)
combined.to_csv('combined.csv',index=False)

In [0]:
combined[covariates]

Unnamed: 0,age_40-49,age_50-59,age_60-69,age_70-79,age_80+,gender,sofatotal_dayLowestHmg,hgbmin,ihdflag,septicflag,surgeryflag,ventmarker,vasopressor
0,0,0,0,0,1,0,,6.000,0,1,0,0,0
1,0,0,0,1,0,0,6.0,6.000,0,1,0,0,1
2,1,0,0,0,0,0,,6.000,0,1,0,0,0
3,0,0,0,1,0,0,4.0,6.000,0,0,1,1,1
4,1,0,0,0,0,0,,6.000,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
85626,1,0,0,0,0,0,4.0,11.753,0,0,0,1,1
85627,1,0,0,0,0,0,,9.177,0,0,1,0,1
85628,0,0,0,0,1,1,,8.211,0,0,0,0,0
85629,1,0,0,0,0,0,7.0,6.923,0,0,0,1,1
